From 21d4774803a783e05563aaa493cc15a95911b6e9 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Thu, 7 Mar 2024 15:58:58 +0300 Subject: [PATCH] Add mypy type-checking, recode to comply (#193) --- .github/workflows/python-package.yml | 4 + README.md | 40 +- duckduckgo_search/duckduckgo_search.py | 51 ++- duckduckgo_search/duckduckgo_search_async.py | 383 ++++++++----------- duckduckgo_search/py.typed | 1 + duckduckgo_search/utils.py | 30 +- pyproject.toml | 9 +- requirements-dev.txt | 3 +- 8 files changed, 247 insertions(+), 274 deletions(-) create mode 100644 duckduckgo_search/py.typed diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index b4d8a00..a685651 100755 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -32,6 +32,10 @@ jobs: run: | ruff check . ruff format . --check --target-version py38 + - name: Mypy + run: | + python -m pip install orjson + mypy --install-types . - name: Pytest run: | pytest diff --git a/README.md b/README.md index e1d5b56..b78e14b 100755 --- a/README.md +++ b/README.md @@ -190,17 +190,11 @@ Here is an example of initializing the AsyncDDGS class: ```python3 import asyncio import logging -import sys from duckduckgo_search import AsyncDDGS -# bypass curl-cffi NotImplementedError in windows https://curl-cffi.readthedocs.io/en/latest/faq/ -if sys.platform.lower().startswith("win"): - asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) - async def aget_results(word): - addgs = AsyncDDGS(proxies=None) - results = await addgs.text(word, max_results=100) + results = await AsyncDDGS(proxies=None).text(word, max_results=100) return results async def main(): @@ -257,7 +251,7 @@ def text( timelimit: Optional[str] = None, backend: str = "api", max_results: Optional[int] = None, -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -272,7 +266,7 @@ def text( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with search results, or None if there was an error. + List of dictionaries with search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -296,14 +290,14 @@ results = await AsyncDDGS().text('sun', region='wt-wt', safesearch='off', timeli ## 2. answers() - instant answers by duckduckgo.com ```python -def answers(keywords: str) -> Optional[List[Dict[str, Optional[str]]]]: +def answers(keywords: str) -> List[Dict[str, str]]: """DuckDuckGo instant answers. Query params: https://duckduckgo.com/params. Args: keywords: keywords for query, Returns: - List of dictionaries with instant answers results, or None if there was an error. + List of dictionaries with instant answers results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -335,7 +329,7 @@ def images( layout: Optional[str] = None, license_image: Optional[str] = None, max_results: Optional[int] = None, -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo images search. Query params: https://duckduckgo.com/params. Args: @@ -356,7 +350,7 @@ def images( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with images search results, or None if there was an error. + List of dictionaries with images search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -396,7 +390,7 @@ def videos( duration: Optional[str] = None, license_videos: Optional[str] = None, max_results: Optional[int] = None, -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo videos search. Query params: https://duckduckgo.com/params. Args: @@ -410,7 +404,7 @@ def videos( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with videos search results, or None if there was an error. + List of dictionaries with videos search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -445,7 +439,7 @@ def news( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo news search. Query params: https://duckduckgo.com/params. Args: @@ -456,7 +450,7 @@ def news( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with news search results, or None if there was an error. + List of dictionaries with news search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -490,7 +484,7 @@ def maps( longitude: Optional[str] = None, radius: int = 0, max_results: Optional[int] = None, -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo maps search. Query params: https://duckduckgo.com/params. Args: @@ -509,7 +503,7 @@ def maps( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with maps search results, or None if there was an error. + List of dictionaries with maps search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -535,7 +529,7 @@ def translate( keywords: str, from_: Optional[str] = None, to: str = "en", -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo translate. Args: @@ -544,7 +538,7 @@ def translate( to: what language to translate. Defaults to "en". Returns: - List od dictionaries with translated keywords, or None if there was an error. + List od dictionaries with translated keywords. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -571,7 +565,7 @@ results = await AsyncDDGS().translate('sun', to="de") def suggestions( keywords, region: str = "wt-wt", -) -> Optional[List[Dict[str, Optional[str]]]]: +) -> List[Dict[str, str]]: """DuckDuckGo suggestions. Query params: https://duckduckgo.com/params. Args: @@ -579,7 +573,7 @@ def suggestions( region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". Returns: - List of dictionaries with suggestions results, or None if there was an error. + List of dictionaries with suggestions results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. diff --git a/duckduckgo_search/duckduckgo_search.py b/duckduckgo_search/duckduckgo_search.py index e99c738..22eaa3a 100644 --- a/duckduckgo_search/duckduckgo_search.py +++ b/duckduckgo_search/duckduckgo_search.py @@ -1,51 +1,64 @@ import asyncio +from concurrent.futures import Future from threading import Thread -from typing import Coroutine, Dict, List, Optional +from types import TracebackType +from typing import Any, Awaitable, Dict, Optional, Type, Union from .duckduckgo_search_async import AsyncDDGS # Create an event loop and run it in a separate thread. -_SHARED_LOOP = asyncio.new_event_loop() -_SHARED_THREAD = Thread(target=_SHARED_LOOP.run_forever, daemon=True) +_SHARED_LOOP: asyncio.AbstractEventLoop = asyncio.new_event_loop() +_SHARED_THREAD: Thread = Thread(target=_SHARED_LOOP.run_forever, daemon=True) _SHARED_THREAD.start() class DDGS(AsyncDDGS): - def __init__(self, headers=None, proxies=None, timeout=10) -> None: - super().__init__(headers, proxies, timeout) + def __init__( + self, + headers: Optional[Dict[str, str]] = None, + proxies: Union[Dict[str, str], str, None] = None, + timeout: Optional[int] = 10, + ) -> None: + super().__init__(headers=headers, proxies=proxies, timeout=timeout) self._loop = _SHARED_LOOP - def __enter__(self): + def __enter__(self) -> "DDGS": return self - def __exit__(self, exc_type, exc_val, exc_tb): - pass + def __exit__( + self, + exc_type: Optional[Type[BaseException]], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> bool: + return True - def _run_async_in_thread(self, coro: Coroutine) -> Optional[List[Dict[str, Optional[str]]]]: + def _run_async_in_thread(self, coro: Awaitable[Any]) -> Any: """Runs an async coroutine in a separate thread.""" - future = asyncio.run_coroutine_threadsafe(coro, self._loop) - return future.result() + future: Future[Any] = asyncio.run_coroutine_threadsafe(coro, self._loop) + result = future.result() + return result - def text(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def text(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().text(*args, **kwargs)) - def images(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def images(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().images(*args, **kwargs)) - def videos(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def videos(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().videos(*args, **kwargs)) - def news(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def news(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().news(*args, **kwargs)) - def answers(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def answers(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().answers(*args, **kwargs)) - def suggestions(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def suggestions(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().suggestions(*args, **kwargs)) - def maps(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def maps(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().maps(*args, **kwargs)) - def translate(self, *args, **kwargs) -> Optional[List[Dict[str, Optional[str]]]]: + def translate(self, *args: Any, **kwargs: Any) -> Any: return self._run_async_in_thread(super().translate(*args, **kwargs)) diff --git a/duckduckgo_search/duckduckgo_search_async.py b/duckduckgo_search/duckduckgo_search_async.py index 1004221..1d58f7c 100644 --- a/duckduckgo_search/duckduckgo_search_async.py +++ b/duckduckgo_search/duckduckgo_search_async.py @@ -1,12 +1,12 @@ import asyncio import logging -import sys from concurrent.futures import ThreadPoolExecutor from contextlib import suppress from datetime import datetime, timezone from decimal import Decimal from itertools import cycle, islice -from typing import Dict, List, Optional, Tuple +from types import TracebackType +from typing import Dict, List, Optional, Tuple, Union from curl_cffi import requests from lxml import html @@ -23,17 +23,18 @@ ) logger = logging.getLogger("duckduckgo_search.AsyncDDGS") -# Not working on Windows, NotImplementedError (https://curl-cffi.readthedocs.io/en/latest/faq/) -if sys.platform.lower().startswith("win"): - asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) - _SHARED_EXECUTOR = ThreadPoolExecutor() class AsyncDDGS: """DuckDuckgo_search async class to get search results from duckduckgo.com.""" - def __init__(self, headers=None, proxies=None, timeout=10) -> None: + def __init__( + self, + headers: Optional[Dict[str, str]] = None, + proxies: Union[Dict[str, str], str, None] = None, + timeout: Optional[int] = 10, + ) -> None: """Initialize the AsyncDDGS object. Args: @@ -44,7 +45,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None: Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. """ - self.proxies = proxies if proxies and isinstance(proxies, dict) else {"all": proxies} + self.proxies = {"all": proxies} if isinstance(proxies, str) else proxies self._asession = requests.AsyncSession( headers=headers, proxies=self.proxies, timeout=timeout, impersonate="chrome" ) @@ -54,15 +55,23 @@ async def __aenter__(self) -> "AsyncDDGS": """A context manager method that is called when entering the 'with' statement.""" return self - async def __aexit__(self, exc_type, exc_val, exc_tb) -> None: + async def __aexit__( + self, exc_type: Optional[BaseException], exc_val: Optional[BaseException], exc_tb: Optional[TracebackType] + ) -> None: """Closes the session.""" await self._asession.close() - async def _aget_url(self, method: str, url: str, **kwargs) -> Optional[requests.Response]: + async def _aget_url( + self, + method: str, + url: str, + data: Optional[Union[Dict[str, str], bytes]] = None, + params: Optional[Dict[str, str]] = None, + ) -> bytes: try: - resp = await self._asession.request(method, url, stream=True, **kwargs) + resp = await self._asession.request(method, url, data=data, params=params, stream=True) resp.raise_for_status() - resp_content = await resp.acontent() + resp_content: bytes = await resp.acontent() logger.debug(f"_aget_url() {url} {resp.status_code} {resp.http_version} {resp.elapsed} {len(resp_content)}") if _is_500_in_url(str(resp.url)) or resp.status_code == 202: raise DuckDuckGoSearchException("Ratelimit") @@ -70,12 +79,12 @@ async def _aget_url(self, method: str, url: str, **kwargs) -> Optional[requests. return resp_content except Exception as ex: raise DuckDuckGoSearchException(f"_aget_url() {url} {type(ex).__name__}: {ex}") from ex + raise DuckDuckGoSearchException(f"_aget_url() {url} return None. {params=} {data=}") - async def _aget_vqd(self, keywords: str) -> Optional[str]: + async def _aget_vqd(self, keywords: str) -> str: """Get vqd value for a search query.""" resp_content = await self._aget_url("POST", "https://duckduckgo.com", data={"q": keywords}) - if resp_content: - return _extract_vqd(resp_content, keywords) + return _extract_vqd(resp_content, keywords) async def text( self, @@ -85,7 +94,7 @@ async def text( timelimit: Optional[str] = None, backend: str = "api", max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -120,7 +129,7 @@ async def _text_api( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -131,7 +140,7 @@ async def _text_api( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with search results, or None if there was an error. + List of dictionaries with search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -144,7 +153,6 @@ async def _text_api( "q": keywords, "kl": region, "l": region, - "df": timelimit, "vqd": vqd, "bing_market": region, } @@ -155,20 +163,17 @@ async def _text_api( payload["ex"] = "-2" elif safesearch == "on": # strict payload["p"] = "1" + if timelimit: + payload["df"] = timelimit cache = set() - results = [None] * 1100 + results: List[Optional[Dict[str, str]]] = [None] * 1100 async def _text_api_page(s: int, page: int) -> None: priority = page * 100 - payload["s"] = s + payload["s"] = f"{s}" resp_content = await self._aget_url("GET", "https://links.duckduckgo.com/d.js", params=payload) - if resp_content is None: - return - page_data = _text_extract_json(resp_content, keywords) - if page_data is None: - return for row in page_data: href = row.get("u", None) @@ -190,8 +195,7 @@ async def _text_api_page(s: int, page: int) -> None: tasks.extend(_text_api_page(s, i) for i, s in enumerate(range(23, max_results, 50), start=1)) await asyncio.gather(*tasks) - results = list(islice(filter(None, results), max_results)) - return results + return list(islice(filter(None, results), max_results)) async def _text_html( self, @@ -200,7 +204,7 @@ async def _text_html( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -211,7 +215,7 @@ async def _text_html( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with search results, or None if there was an error. + List of dictionaries with search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -219,27 +223,28 @@ async def _text_html( assert keywords, "keywords is mandatory" self._asession.headers["Referer"] = "https://html.duckduckgo.com/" - safesearch_base = {"on": 1, "moderate": -1, "off": -2} + safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"} payload = { "q": keywords, "kl": region, "p": safesearch_base[safesearch.lower()], - "df": timelimit, "o": "json", "api": "d.js", } + if timelimit: + payload["df"] = timelimit if max_results and max_results > 20: vqd = await self._aget_vqd(keywords) payload["vqd"] = vqd cache = set() - results = [None] * 1100 + results: List[Optional[Dict[str, str]]] = [None] * 1100 async def _text_html_page(s: int, page: int) -> None: priority = page * 100 - payload["s"] = s + payload["s"] = f"{s}" resp_content = await self._aget_url("POST", "https://html.duckduckgo.com/html", data=payload) - if resp_content is None or b"No results." in resp_content: + if b"No results." in resp_content: return tree = await self._asession.loop.run_in_executor(_SHARED_EXECUTOR, html.document_fromstring, resp_content) @@ -260,9 +265,9 @@ async def _text_html_page(s: int, page: int) -> None: priority += 1 result = { - "title": _normalize(title[0]) if title else None, + "title": _normalize(title[0]), "href": _normalize_url(href), - "body": _normalize("".join(body)) if body else None, + "body": _normalize("".join(body)), } results[priority] = result @@ -272,8 +277,7 @@ async def _text_html_page(s: int, page: int) -> None: tasks.extend(_text_html_page(s, i) for i, s in enumerate(range(23, max_results, 50), start=1)) await asyncio.gather(*tasks) - results = list(islice(filter(None, results), max_results)) - return results + return list(islice(filter(None, results), max_results)) async def _text_lite( self, @@ -281,7 +285,7 @@ async def _text_lite( region: str = "wt-wt", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -291,7 +295,7 @@ async def _text_lite( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with search results, or None if there was an error. + List of dictionaries with search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -304,17 +308,18 @@ async def _text_lite( "o": "json", "api": "d.js", "kl": region, - "df": timelimit, } + if timelimit: + payload["df"] = timelimit cache = set() - results = [None] * 1100 + results: List[Optional[Dict[str, str]]] = [None] * 1100 async def _text_lite_page(s: int, page: int) -> None: priority = page * 100 - payload["s"] = s + payload["s"] = f"{s}" resp_content = await self._aget_url("POST", "https://lite.duckduckgo.com/lite/", data=payload) - if resp_content is None or b"No more results." in resp_content: + if b"No more results." in resp_content: return tree = await self._asession.loop.run_in_executor(_SHARED_EXECUTOR, html.document_fromstring, resp_content) @@ -351,8 +356,7 @@ async def _text_lite_page(s: int, page: int) -> None: tasks.extend(_text_lite_page(s, i) for i, s in enumerate(range(23, max_results, 50), start=1)) await asyncio.gather(*tasks) - results = list(islice(filter(None, results), max_results)) - return results + return list(islice(filter(None, results), max_results)) async def images( self, @@ -366,7 +370,7 @@ async def images( layout: Optional[str] = None, license_image: Optional[str] = None, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo images search. Query params: https://duckduckgo.com/params. Args: @@ -387,7 +391,7 @@ async def images( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with images search results, or None if there was an error. + List of dictionaries with images search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -396,7 +400,7 @@ async def images( vqd = await self._aget_vqd(keywords) - safesearch_base = {"on": 1, "moderate": 1, "off": -1} + safesearch_base = {"on": "1", "moderate": "1", "off": "-1"} timelimit = f"time:{timelimit}" if timelimit else "" size = f"size:{size}" if size else "" color = f"color:{color}" if color else "" @@ -413,24 +417,18 @@ async def images( } cache = set() - results = [None] * 600 + results: List[Optional[Dict[str, str]]] = [None] * 600 async def _images_page(s: int, page: int) -> None: priority = page * 100 - payload["s"] = s + payload["s"] = f"{s}" resp_content = await self._aget_url("GET", "https://duckduckgo.com/i.js", params=payload) - if resp_content is None: - return - try: - resp_json = json_loads(resp_content) - except Exception: - return - page_data = resp_json.get("results", None) - if page_data is None: - return + resp_json = json_loads(resp_content) + + page_data = resp_json.get("results", []) for row in page_data: - image_url = row.get("image", None) + image_url = row.get("image") if image_url and image_url not in cache: cache.add(image_url) priority += 1 @@ -451,8 +449,7 @@ async def _images_page(s: int, page: int) -> None: tasks.extend(_images_page(s, i) for i, s in enumerate(range(100, max_results, 100), start=1)) await asyncio.gather(*tasks) - results = list(islice(filter(None, results), max_results)) - return results + return list(islice(filter(None, results), max_results)) async def videos( self, @@ -464,7 +461,7 @@ async def videos( duration: Optional[str] = None, license_videos: Optional[str] = None, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo videos search. Query params: https://duckduckgo.com/params. Args: @@ -478,7 +475,7 @@ async def videos( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with videos search results, or None if there was an error. + List of dictionaries with videos search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -487,7 +484,7 @@ async def videos( vqd = await self._aget_vqd(keywords) - safesearch_base = {"on": 1, "moderate": -1, "off": -2} + safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"} timelimit = f"publishedAfter:{timelimit}" if timelimit else "" resolution = f"videoDefinition:{resolution}" if resolution else "" duration = f"videoDuration:{duration}" if duration else "" @@ -502,21 +499,15 @@ async def videos( } cache = set() - results = [None] * 700 + results: List[Optional[Dict[str, str]]] = [None] * 700 async def _videos_page(s: int, page: int) -> None: priority = page * 100 - payload["s"] = s + payload["s"] = f"{s}" resp_content = await self._aget_url("GET", "https://duckduckgo.com/v.js", params=payload) - if resp_content is None: - return - try: - resp_json = json_loads(resp_content) - except Exception: - return - page_data = resp_json.get("results", None) - if page_data is None: - return + resp_json = json_loads(resp_content) + + page_data = resp_json.get("results", []) for row in page_data: if row["content"] not in cache: @@ -530,8 +521,7 @@ async def _videos_page(s: int, page: int) -> None: tasks.extend(_videos_page(s, i) for i, s in enumerate(range(59, max_results, 59), start=1)) await asyncio.gather(*tasks) - results = list(islice(filter(None, results), max_results)) - return results + return list(islice(filter(None, results), max_results)) async def news( self, @@ -540,7 +530,7 @@ async def news( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo news search. Query params: https://duckduckgo.com/params. Args: @@ -551,7 +541,7 @@ async def news( max_results: max number of results. If None, returns results only from the first response. Defaults to None. Returns: - List of dictionaries with news search results, or None if there was an error. + List of dictionaries with news search results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -560,7 +550,7 @@ async def news( vqd = await self._aget_vqd(keywords) - safesearch_base = {"on": 1, "moderate": -1, "off": -2} + safesearch_base = {"on": "1", "moderate": "-1", "off": "-2"} payload = { "l": region, "o": "json", @@ -568,25 +558,19 @@ async def news( "q": keywords, "vqd": vqd, "p": safesearch_base[safesearch.lower()], - "df": timelimit, } + if timelimit: + payload["df"] = timelimit cache = set() - results = [None] * 700 + results: List[Optional[Dict[str, str]]] = [None] * 700 async def _news_page(s: int, page: int) -> None: priority = page * 100 - payload["s"] = s + payload["s"] = f"{s}" resp_content = await self._aget_url("GET", "https://duckduckgo.com/news.js", params=payload) - if resp_content is None: - return - try: - resp_json = json_loads(resp_content) - except Exception: - return - page_data = resp_json.get("results", None) - if page_data is None: - return + resp_json = json_loads(resp_content) + page_data = resp_json.get("results", []) for row in page_data: if row["url"] not in cache: @@ -609,17 +593,16 @@ async def _news_page(s: int, page: int) -> None: tasks.extend(_news_page(s, i) for i, s in enumerate(range(29, max_results, 29), start=1)) await asyncio.gather(*tasks) - results = list(islice(filter(None, results), max_results)) - return results + return list(islice(filter(None, results), max_results)) - async def answers(self, keywords: str) -> Optional[List[Dict[str, Optional[str]]]]: + async def answers(self, keywords: str) -> List[Dict[str, str]]: """DuckDuckGo instant answers. Query params: https://duckduckgo.com/params. Args: keywords: keywords for query, Returns: - List of dictionaries with instant answers results, or None if there was an error. + List of dictionaries with instant answers results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -630,30 +613,21 @@ async def answers(self, keywords: str) -> Optional[List[Dict[str, Optional[str]] "q": f"what is {keywords}", "format": "json", } - resp_content = await self._aget_url("GET", "https://api.duckduckgo.com/", params=payload) - if not resp_content: - return + page_data = json_loads(resp_content) results = [] - - try: - page_data = json_loads(resp_content) - except Exception: - page_data = None - - if page_data: - answer = page_data.get("AbstractText", None) - url = page_data.get("AbstractURL", None) - if answer: - results.append( - { - "icon": None, - "text": answer, - "topic": None, - "url": url, - } - ) + answer = page_data.get("AbstractText") + url = page_data.get("AbstractURL") + if answer: + results.append( + { + "icon": None, + "text": answer, + "topic": None, + "url": url, + } + ) # related payload = { @@ -661,41 +635,36 @@ async def answers(self, keywords: str) -> Optional[List[Dict[str, Optional[str]] "format": "json", } resp_content = await self._aget_url("GET", "https://api.duckduckgo.com/", params=payload) - if not resp_content: - return - try: - page_data = json_loads(resp_content).get("RelatedTopics", None) - except Exception: - page_data = None + resp_json = json_loads(resp_content) + page_data = resp_json.get("RelatedTopics", []) - if page_data: - for row in page_data: - topic = row.get("Name", None) - if not topic: - icon = row["Icon"].get("URL", None) + for row in page_data: + topic = row.get("Name") + if not topic: + icon = row["Icon"].get("URL") + results.append( + { + "icon": f"https://duckduckgo.com{icon}" if icon else "", + "text": row["Text"], + "topic": None, + "url": row["FirstURL"], + } + ) + else: + for subrow in row["Topics"]: + icon = subrow["Icon"].get("URL") results.append( { - "icon": f"https://duckduckgo.com{icon}" if icon else None, - "text": row["Text"], - "topic": None, - "url": row["FirstURL"], + "icon": f"https://duckduckgo.com{icon}" if icon else "", + "text": subrow["Text"], + "topic": topic, + "url": subrow["FirstURL"], } ) - else: - for subrow in row["Topics"]: - icon = subrow["Icon"].get("URL", None) - results.append( - { - "icon": f"https://duckduckgo.com{icon}" if icon else None, - "text": subrow["Text"], - "topic": topic, - "url": subrow["FirstURL"], - } - ) return results - async def suggestions(self, keywords: str, region: str = "wt-wt") -> Optional[List[Dict[str, Optional[str]]]]: + async def suggestions(self, keywords: str, region: str = "wt-wt") -> List[Dict[str, str]]: """DuckDuckGo suggestions. Query params: https://duckduckgo.com/params. Args: @@ -703,7 +672,7 @@ async def suggestions(self, keywords: str, region: str = "wt-wt") -> Optional[Li region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". Returns: - List of dictionaries with suggestions results, or None if there was an error. + List of dictionaries with suggestions results. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -714,20 +683,9 @@ async def suggestions(self, keywords: str, region: str = "wt-wt") -> Optional[Li "q": keywords, "kl": region, } - resp_content = await self._aget_url("GET", "https://duckduckgo.com/ac", params=payload) - if not resp_content: - return - - results = [] - try: - page_data = json_loads(resp_content) - for r in page_data: - results.append(r) - except Exception: - pass - - return results + page_data = json_loads(resp_content) + return [r for r in page_data] async def maps( self, @@ -743,7 +701,7 @@ async def maps( longitude: Optional[str] = None, radius: int = 0, max_results: Optional[int] = None, - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> List[Dict[str, str]]: """DuckDuckGo maps search. Query params: https://duckduckgo.com/params. Args: @@ -782,38 +740,38 @@ async def maps( # otherwise request about bbox to nominatim api else: if place: - params: Dict[str, Optional[str]] = { + params = { "q": place, "polygon_geojson": "0", "format": "jsonv2", } else: params = { - "street": street, - "city": city, - "county": county, - "state": state, - "country": country, - "postalcode": postalcode, "polygon_geojson": "0", "format": "jsonv2", } - params = {k: v for k, v in params.items() if v is not None} - try: - resp_content = await self._aget_url( - "GET", - "https://nominatim.openstreetmap.org/search.php", - params=params, - ) - if resp_content is None: - return - - coordinates = json_loads(resp_content)[0]["boundingbox"] - lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2]) - lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3]) - except Exception as ex: - logger.debug(f"ddg_maps() keywords={keywords} {type(ex).__name__} {ex}") - return + if street: + params["street"] = street + if city: + params["city"] = city + if county: + params["county"] = county + if state: + params["state"] = state + if country: + params["country"] = country + if postalcode: + params["postalcode"] = postalcode + # request nominatim api to get coordinates box + resp_content = await self._aget_url( + "GET", + "https://nominatim.openstreetmap.org/search.php", + params=params, + ) + resp_json = json_loads(resp_content) + coordinates = resp_json[0]["boundingbox"] + lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2]) + lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3]) # if a radius is specified, expand the search square lat_t += Decimal(radius) * Decimal(0.008983) @@ -823,13 +781,13 @@ async def maps( logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}") cache = set() - results = [] + results: List[Dict[str, str]] = [] async def _maps_page( bbox: Tuple[Decimal, Decimal, Decimal, Decimal], - ) -> Optional[List[Dict[str, Optional[str]]]]: + ) -> Optional[List[Dict[str, str]]]: if max_results and len(results) >= max_results: - return + return None lat_t, lon_l, lat_b, lon_r = bbox params = { "q": keywords, @@ -844,14 +802,8 @@ async def _maps_page( "strict_bbox": "1", } resp_content = await self._aget_url("GET", "https://duckduckgo.com/local.js", params=params) - if resp_content is None: - return - try: - page_data = json_loads(resp_content).get("results", []) - except Exception: - return - if page_data is None: - return + resp_json = json_loads(resp_content) + page_data = resp_json.get("results", []) page_results = [] for res in page_data: @@ -865,17 +817,17 @@ async def _maps_page( "address": res["address"], "country_code": res["country_code"], "url": _normalize_url(res["website"]), - "phone": res["phone"], + "phone": res["phone"] or "", "latitude": res["coordinates"]["latitude"], "longitude": res["coordinates"]["longitude"], "source": _normalize_url(res["url"]), - "image": x.get("image", "") if (x := res["embed"]) else None, - "desc": x.get("description", "") if (x := res["embed"]) else None, - "hours": res["hours"], - "category": res["ddg_category"], - "facebook": f"www.facebook.com/profile.php?id={x}" if (x := res["facebook_id"]) else None, - "instagram": f"https://www.instagram.com/{x}" if (x := res["instagram_id"]) else None, - "twitter": f"https://twitter.com/{x}" if (x := res["twitter_id"]) else None, + "image": x.get("image", "") if (x := res["embed"]) else "", + "desc": x.get("description", "") if (x := res["embed"]) else "", + "hours": res["hours"] or "", + "category": res["ddg_category"] or "", + "facebook": f"www.facebook.com/profile.php?id={x}" if (x := res["facebook_id"]) else "", + "instagram": f"https://www.instagram.com/{x}" if (x := res["instagram_id"]) else "", + "twitter": f"https://twitter.com/{x}" if (x := res["twitter_id"]) else "", } page_results.append(result) @@ -917,8 +869,8 @@ async def _maps_page( return list(islice(results, max_results)) async def translate( - self, keywords: str, from_: Optional[str] = None, to: str = "en" - ) -> Optional[List[Dict[str, Optional[str]]]]: + self, keywords: Union[List[str], str], from_: Optional[str] = None, to: str = "en" + ) -> List[Dict[str, str]]: """DuckDuckGo translate. Args: @@ -927,7 +879,7 @@ async def translate( to: what language to translate. Defaults to "en". Returns: - List od dictionaries with translated keywords, or None if there was an error. + List od dictionaries with translated keywords. Raises: DuckDuckGoSearchException: Raised when there is a generic exception during the API request. @@ -953,19 +905,12 @@ async def _translate_keyword(keyword: str) -> None: params=payload, data=keyword.encode(), ) - if resp_content is None: - return - - try: - page_data = json_loads(resp_content) - page_data["original"] = keyword - except Exception: - page_data = None - - if page_data: - results.append(page_data) + page_data = json_loads(resp_content) + page_data["original"] = keyword + results.append(page_data) - keywords = [keywords] if isinstance(keywords, str) else keywords + if isinstance(keywords, str): + keywords = [keywords] tasks = [_translate_keyword(keyword) for keyword in keywords] await asyncio.gather(*tasks) diff --git a/duckduckgo_search/py.typed b/duckduckgo_search/py.typed new file mode 100644 index 0000000..e5aff4f --- /dev/null +++ b/duckduckgo_search/py.typed @@ -0,0 +1 @@ +# Marker file for PEP 561. \ No newline at end of file diff --git a/duckduckgo_search/utils.py b/duckduckgo_search/utils.py index 7c964fa..ab5e8c6 100644 --- a/duckduckgo_search/utils.py +++ b/duckduckgo_search/utils.py @@ -3,7 +3,7 @@ from decimal import Decimal from html import unescape from math import atan2, cos, radians, sin, sqrt -from typing import Optional +from typing import Any, Dict, List, Union from urllib.parse import unquote from .exceptions import DuckDuckGoSearchException @@ -19,15 +19,21 @@ REGEX_STRIP_TAGS = re.compile("<.*?>") -def json_dumps(obj): - return orjson.dumps(obj).decode("utf-8") if HAS_ORJSON else json.dumps(obj) +def json_dumps(obj: Any) -> str: + try: + return orjson.dumps(obj).decode("utf-8") if HAS_ORJSON else json.dumps(obj) + except Exception as ex: + raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex -def json_loads(obj): - return orjson.loads(obj) if HAS_ORJSON else json.loads(obj) +def json_loads(obj: Union[str, bytes]) -> Any: + try: + return orjson.loads(obj) if HAS_ORJSON else json.loads(obj) + except Exception as ex: + raise DuckDuckGoSearchException(f"{type(ex).__name__}: {ex}") from ex -def _extract_vqd(html_bytes: bytes, keywords: str) -> Optional[str]: +def _extract_vqd(html_bytes: bytes, keywords: str) -> str: """Extract vqd from html bytes.""" for c1, c1_len, c2 in ( (b'vqd="', 5, b'"'), @@ -43,15 +49,17 @@ def _extract_vqd(html_bytes: bytes, keywords: str) -> Optional[str]: raise DuckDuckGoSearchException(f"_extract_vqd() {keywords=} Could not extract vqd.") -def _text_extract_json(html_bytes: bytes, keywords: str) -> Optional[str]: +def _text_extract_json(html_bytes: bytes, keywords: str) -> List[Dict[str, str]]: """text(backend="api") -> extract json from html.""" try: start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24 end = html_bytes.index(b");DDG.duckbar.load(", start) data = html_bytes[start:end] - return json_loads(data) + result: List[Dict[str, str]] = json_loads(data) + return result except Exception as ex: raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} {type(ex).__name__}: {ex}") from ex + raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} return None") def _is_500_in_url(url: str) -> bool: @@ -72,8 +80,8 @@ def _normalize_url(url: str) -> str: def _calculate_distance(lat1: Decimal, lon1: Decimal, lat2: Decimal, lon2: Decimal) -> float: """Calculate distance between two points in km. Haversine formula.""" R = 6371.0087714 # Earth's radius in km - lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2]) - dlon, dlat = lon2 - lon1, lat2 - lat1 - a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2 + rlat1, rlon1, rlat2, rlon2 = map(radians, [float(lat1), float(lon1), float(lat2), float(lon2)]) + dlon, dlat = rlon2 - rlon1, rlat2 - rlat1 + a = sin(dlat / 2) ** 2 + cos(rlat1) * cos(rlat2) * sin(dlon / 2) ** 2 c = 2 * atan2(sqrt(a), sqrt(1 - a)) return R * c diff --git a/pyproject.toml b/pyproject.toml index 9c57ebd..7386487 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,8 +45,9 @@ version = {attr = "duckduckgo_search.version.__version__"} [project.optional-dependencies] dev = [ - "ruff>=0.3.0", + "mypy>=1.8.0", "pytest>=8.0.1", + "ruff>=0.3.0", ] [tool.ruff] @@ -64,3 +65,9 @@ select = [ "I", # isort ] ignore = ["D100"] + +[tool.mypy] +python_version = "3.8" +ignore_missing_imports = true +strict = true +exclude = ['cli\.py$', '__main__\.py$', "tests/", "build/"] diff --git a/requirements-dev.txt b/requirements-dev.txt index 2674537..d4df4b7 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,2 +1,3 @@ -ruff>=0.3.0 +mypy>=1.8.0 pytest>=8.0.1 +ruff>=0.3.0