From 99a045bad3a48cefd80aaee48a27c201e5dce3c5 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:18:45 +0300 Subject: [PATCH 1/5] ruff - remove pydocstyle rule --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 32c141f..e380467 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -63,9 +63,5 @@ select = [ "B", # flake8-bugbear "SIM", # flake8-simplify "I", # isort - "D", # pydocstyle ] ignore = ["D100"] - -[tool.ruff.lint.pydocstyle] -convention = "google" From 23cbe3f84c91f3f3f5343e35a188636ddf4d3240 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:19:36 +0300 Subject: [PATCH 2/5] DDGS - remove code, wrap AsyncDDGS --- duckduckgo_search/duckduckgo_search.py | 884 ++----------------------- 1 file changed, 49 insertions(+), 835 deletions(-) diff --git a/duckduckgo_search/duckduckgo_search.py b/duckduckgo_search/duckduckgo_search.py index 5ba35bf..ae19118 100644 --- a/duckduckgo_search/duckduckgo_search.py +++ b/duckduckgo_search/duckduckgo_search.py @@ -1,848 +1,62 @@ +import asyncio import logging -from collections import deque -from datetime import datetime, timezone -from decimal import Decimal -from itertools import cycle -from typing import Deque, Dict, Iterator, Optional, Set, Tuple +from typing import Dict, Generator, Optional -from curl_cffi import requests -from lxml import html - -from .exceptions import DuckDuckGoSearchException -from .models import MapsResult -from .utils import _extract_vqd, _is_500_in_url, _normalize, _normalize_url, _random_browser, _text_extract_json +from .duckduckgo_search_async import AsyncDDGS logger = logging.getLogger("duckduckgo_search.DDGS") -class DDGS: - """DuckDuckgo_search class to get search results from duckduckgo.com.""" - - def __init__(self, headers=None, proxies=None, timeout=10) -> None: - """Initialize the DDGS object. - - Args: - headers (dict, optional): Dictionary of headers for the HTTP client. Defaults to None. - proxies (Union[dict, str], optional): Proxies for the HTTP client (can be dict or str). Defaults to None. - timeout (int, optional): Timeout value for the HTTP client. Defaults to 10. - """ - self.proxies = proxies if proxies and isinstance(proxies, dict) else {"http": proxies, "https": proxies} - self._session = requests.Session( - headers=headers, proxies=self.proxies, timeout=timeout, impersonate=_random_browser() - ) - self._session.headers["Referer"] = "https://duckduckgo.com/" +class DDGS(AsyncDDGS): + def __init__(self, headers=None, proxies=None, timeout=10): + super().__init__(headers, proxies, timeout) + self._loop = asyncio.new_event_loop() def __enter__(self) -> "DDGS": - """A context manager method that is called when entering the 'with' statement.""" return self def __exit__(self, exc_type, exc_val, exc_tb) -> None: - """Closes the session.""" - self._session.close() - - def _get_url(self, method: str, url: str, **kwargs) -> Optional[requests.Response]: - try: - resp = self._session.request(method, url, **kwargs) - logger.debug(f"_get_url() {url} {resp.status_code} {resp.http_version} {resp.elapsed} {len(resp.content)}") - resp.raise_for_status() - if _is_500_in_url(str(resp.url)) or resp.status_code == 202: - raise DuckDuckGoSearchException("Ratelimit") - if resp.status_code == 200: - return resp - except Exception as ex: - raise DuckDuckGoSearchException(f"_get_url() {url} {type(ex).__name__}: {ex}") from ex - - def _get_vqd(self, keywords: str) -> Optional[str]: - """Get vqd value for a search query.""" - resp = self._get_url("POST", "https://duckduckgo.com", data={"q": keywords}) - if resp: - return _extract_vqd(resp.content, keywords) - - def text( - self, - keywords: str, - region: str = "wt-wt", - safesearch: str = "moderate", - timelimit: Optional[str] = None, - backend: str = "api", - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - safesearch: on, moderate, off. Defaults to "moderate". - timelimit: d, w, m, y. Defaults to None. - backend: api, html, lite. Defaults to api. - api - collect data from https://duckduckgo.com, - html - collect data from https://html.duckduckgo.com, - lite - collect data from https://lite.duckduckgo.com. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with search results. - - """ - if backend == "api": - results = self._text_api(keywords, region, safesearch, timelimit, max_results) - elif backend == "html": - results = self._text_html(keywords, region, safesearch, timelimit, max_results) - elif backend == "lite": - results = self._text_lite(keywords, region, timelimit, max_results) - - if results: - for i, result in enumerate(results, start=1): - yield result - if max_results and i >= max_results: - break - - def _text_api( - self, - keywords: str, - region: str = "wt-wt", - safesearch: str = "moderate", - timelimit: Optional[str] = None, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - safesearch: on, moderate, off. Defaults to "moderate". - timelimit: d, w, m, y. Defaults to None. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with search results. - - """ - assert keywords, "keywords is mandatory" - - vqd = self._get_vqd(keywords) - - payload = { - "q": keywords, - "kl": region, - "l": region, - "bing_market": f"{region.split('-')[0]}-{region.split('-')[-1].upper()}", - "s": "0", - "df": timelimit, - "vqd": vqd, - # "o": "json", - "sp": "0", - } - safesearch = safesearch.lower() - if safesearch == "moderate": - payload["ex"] = "-1" - elif safesearch == "off": - payload["ex"] = "-2" - elif safesearch == "on": # strict - payload["p"] = "1" - - cache = set() - for _ in range(11): - resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload) - if resp is None: - return - - page_data = _text_extract_json(resp.content, keywords) - if page_data is None: - return - - result_exists, next_page_url = False, None - for row in page_data: - href = row.get("u", None) - if href and href not in cache and href != f"http://www.google.com/search?q={keywords}": - cache.add(href) - body = _normalize(row["a"]) - if body: - result_exists = True - yield { - "title": _normalize(row["t"]), - "href": _normalize_url(href), - "body": body, - } - else: - next_page_url = row.get("n", None) - if max_results is None or result_exists is False or next_page_url is None: - return - payload["s"] = next_page_url.split("s=")[1].split("&")[0] - - def _text_html( - self, - keywords: str, - region: str = "wt-wt", - safesearch: str = "moderate", - timelimit: Optional[str] = None, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - safesearch: on, moderate, off. Defaults to "moderate". - timelimit: d, w, m, y. Defaults to None. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with search results. - - """ - assert keywords, "keywords is mandatory" - - self._session.headers["Referer"] = "https://html.duckduckgo.com/" - safesearch_base = {"on": 1, "moderate": -1, "off": -2} - payload = { - "q": keywords, - "s": "0", - "kl": region, - "p": safesearch_base[safesearch.lower()], - "df": timelimit, - "b": "", - } - cache: Set[str] = set() - for _ in range(11): - resp = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload) - if resp is None: - return - - tree = html.fromstring(resp.content) - if tree.xpath('//div[@class="no-results"]/text()'): - return - - result_exists = False - for e in tree.xpath('//div[contains(@class, "results_links")]'): - href = e.xpath('.//a[contains(@class, "result__a")]/@href') - href = href[0] if href else None - if ( - href - and href not in cache - and href != f"http://www.google.com/search?q={keywords}" - and not href.startswith("https://duckduckgo.com/y.js?ad_domain") - ): - cache.add(href) - title = e.xpath('.//a[contains(@class, "result__a")]/text()') - body = e.xpath('.//a[contains(@class, "result__snippet")]//text()') - result_exists = True - yield { - "title": _normalize(title[0]) if title else None, - "href": _normalize_url(href), - "body": _normalize("".join(body)) if body else None, - } - - if max_results is None or result_exists is False: - return - next_page = tree.xpath('.//div[@class="nav-link"]') - next_page = next_page[-1] if next_page else None - if next_page is None: - return - - names = next_page.xpath('.//input[@type="hidden"]/@name') - values = next_page.xpath('.//input[@type="hidden"]/@value') - payload = {n: v for n, v in zip(names, values)} - - def _text_lite( - self, - keywords: str, - region: str = "wt-wt", - timelimit: Optional[str] = None, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - timelimit: d, w, m, y. Defaults to None. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with search results. - - """ - assert keywords, "keywords is mandatory" - - self._session.headers["Referer"] = "https://lite.duckduckgo.com/" - payload = { - "q": keywords, - "s": "0", - "o": "json", - "api": "d.js", - "kl": region, - "df": timelimit, - } - - cache: Set[str] = set() - for _ in range(11): - resp = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload) - if resp is None: - return - - if b"No more results." in resp.content: - return - - tree = html.fromstring(resp.content) - result_exists = False - data = zip(cycle(range(1, 5)), tree.xpath("//table[last()]//tr")) - for i, e in data: - if i == 1: - href = e.xpath(".//a//@href") - href = href[0] if href else None - if ( - href is None - or href in cache - or href == f"http://www.google.com/search?q={keywords}" - or href.startswith("https://duckduckgo.com/y.js?ad_domain") - ): - [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4) - else: - cache.add(href) - title = e.xpath(".//a//text()")[0] - elif i == 2: - body = e.xpath(".//td[@class='result-snippet']//text()") - body = "".join(body).strip() - elif i == 3: - result_exists = True - yield { - "title": _normalize(title), - "href": _normalize_url(href), - "body": _normalize(body), - } - if max_results is None or result_exists is False: - return - next_page_s = tree.xpath("//form[./input[contains(@value, 'ext')]]/input[@name='s']/@value") - if not next_page_s: - return - payload["s"] = next_page_s[0] - payload["vqd"] = _extract_vqd(resp.content, keywords) - - def images( - self, - keywords: str, - region: str = "wt-wt", - safesearch: str = "moderate", - timelimit: Optional[str] = None, - size: Optional[str] = None, - color: Optional[str] = None, - type_image: Optional[str] = None, - layout: Optional[str] = None, - license_image: Optional[str] = None, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo images search. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - safesearch: on, moderate, off. Defaults to "moderate". - timelimit: Day, Week, Month, Year. Defaults to None. - size: Small, Medium, Large, Wallpaper. Defaults to None. - color: color, Monochrome, Red, Orange, Yellow, Green, Blue, - Purple, Pink, Brown, Black, Gray, Teal, White. Defaults to None. - type_image: photo, clipart, gif, transparent, line. - Defaults to None. - layout: Square, Tall, Wide. Defaults to None. - license_image: any (All Creative Commons), Public (PublicDomain), - Share (Free to Share and Use), ShareCommercially (Free to Share and Use Commercially), - Modify (Free to Modify, Share, and Use), ModifyCommercially (Free to Modify, Share, and - Use Commercially). Defaults to None. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with image search results. - - """ - assert keywords, "keywords is mandatory" - - vqd = self._get_vqd(keywords) - - safesearch_base = {"on": 1, "moderate": 1, "off": -1} - timelimit = f"time:{timelimit}" if timelimit else "" - size = f"size:{size}" if size else "" - color = f"color:{color}" if color else "" - type_image = f"type:{type_image}" if type_image else "" - layout = f"layout:{layout}" if layout else "" - license_image = f"license:{license_image}" if license_image else "" - payload = { - "l": region, - "o": "json", - "q": keywords, - "vqd": vqd, - "f": f"{timelimit},{size},{color},{type_image},{layout},{license_image}", - "p": safesearch_base[safesearch.lower()], - } - - cache = set() - for _ in range(10): - resp = self._get_url("GET", "https://duckduckgo.com/i.js", params=payload) - if resp is None: - return - try: - resp_json = resp.json() - except Exception: - return - page_data = resp_json.get("results", None) - if page_data is None: - return - - result_exists = False - for row in page_data: - image_url = row.get("image", None) - if image_url and image_url not in cache: - cache.add(image_url) - result_exists = True - yield { - "title": row["title"], - "image": _normalize_url(image_url), - "thumbnail": _normalize_url(row["thumbnail"]), - "url": _normalize_url(row["url"]), - "height": row["height"], - "width": row["width"], - "source": row["source"], - } - if max_results and len(cache) >= max_results: - return - if max_results is None or result_exists is False: - return - next = resp_json.get("next", None) - if next is None: - return - payload["s"] = next.split("s=")[-1].split("&")[0] - - def videos( - self, - keywords: str, - region: str = "wt-wt", - safesearch: str = "moderate", - timelimit: Optional[str] = None, - resolution: Optional[str] = None, - duration: Optional[str] = None, - license_videos: Optional[str] = None, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo videos search. Query params: https://duckduckgo.com/params. + self._loop.run_until_complete(self.__aexit__(exc_type, exc_val, exc_tb)) - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - safesearch: on, moderate, off. Defaults to "moderate". - timelimit: d, w, m. Defaults to None. - resolution: high, standart. Defaults to None. - duration: short, medium, long. Defaults to None. - license_videos: creativeCommon, youtube. Defaults to None. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with videos search results - - """ - assert keywords, "keywords is mandatory" - - vqd = self._get_vqd(keywords) - - safesearch_base = {"on": 1, "moderate": -1, "off": -2} - timelimit = f"publishedAfter:{timelimit}" if timelimit else "" - resolution = f"videoDefinition:{resolution}" if resolution else "" - duration = f"videoDuration:{duration}" if duration else "" - license_videos = f"videoLicense:{license_videos}" if license_videos else "" - payload = { - "l": region, - "o": "json", - "s": 0, - "q": keywords, - "vqd": vqd, - "f": f"{timelimit},{resolution},{duration},{license_videos}", - "p": safesearch_base[safesearch.lower()], - } - - cache = set() - for _ in range(10): - resp = self._get_url("GET", "https://duckduckgo.com/v.js", params=payload) - if resp is None: - return - try: - resp_json = resp.json() - except Exception: - return - page_data = resp_json.get("results", None) - if page_data is None: - return - - result_exists = False - for row in page_data: - if row["content"] not in cache: - cache.add(row["content"]) - result_exists = True - yield row - if max_results and len(cache) >= max_results: - return - if max_results is None or result_exists is False: - return - next = resp_json.get("next", None) - if next is None: - return - payload["s"] = next.split("s=")[-1].split("&")[0] - - def news( - self, - keywords: str, - region: str = "wt-wt", - safesearch: str = "moderate", - timelimit: Optional[str] = None, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo news search. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - safesearch: on, moderate, off. Defaults to "moderate". - timelimit: d, w, m. Defaults to None. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with news search results. - - """ - assert keywords, "keywords is mandatory" - - vqd = self._get_vqd(keywords) - - safesearch_base = {"on": 1, "moderate": -1, "off": -2} - payload = { - "l": region, - "o": "json", - "noamp": "1", - "q": keywords, - "vqd": vqd, - "p": safesearch_base[safesearch.lower()], - "df": timelimit, - "s": 0, - } - - cache = set() - for _ in range(10): - resp = self._get_url("GET", "https://duckduckgo.com/news.js", params=payload) - if resp is None: - return - try: - resp_json = resp.json() - except Exception: - return - page_data = resp_json.get("results", None) - if page_data is None: - return - - result_exists = False - for row in page_data: - if row["url"] not in cache: - cache.add(row["url"]) - image_url = row.get("image", None) - result_exists = True - yield { - "date": datetime.fromtimestamp(row["date"], timezone.utc).isoformat(), - "title": row["title"], - "body": _normalize(row["excerpt"]), - "url": _normalize_url(row["url"]), - "image": _normalize_url(image_url) if image_url else None, - "source": row["source"], - } - if max_results and len(cache) >= max_results: - return - if max_results is None or result_exists is False: - return - next = resp_json.get("next", None) - if next is None: - return - payload["s"] = next.split("s=")[-1].split("&")[0] - - def answers(self, keywords: str) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo instant answers. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - - Yields: - dict with instant answers results. - - """ - assert keywords, "keywords is mandatory" - - payload = { - "q": f"what is {keywords}", - "format": "json", - } - - resp = self._get_url("GET", "https://api.duckduckgo.com/", params=payload) - if resp is None: - return None - try: - page_data = resp.json() - except Exception: - page_data = None - - if page_data: - answer = page_data.get("AbstractText", None) - url = page_data.get("AbstractURL", None) - if answer: - yield { - "icon": None, - "text": answer, - "topic": None, - "url": url, - } - - # related: - payload = { - "q": f"{keywords}", - "format": "json", - } - resp = self._get_url("GET", "https://api.duckduckgo.com/", params=payload) - if resp is None: - return None - try: - page_data = resp.json().get("RelatedTopics", None) - except Exception: - page_data = None - - if page_data: - for row in page_data: - topic = row.get("Name", None) - if not topic: - icon = row["Icon"].get("URL", None) - yield { - "icon": f"https://duckduckgo.com{icon}" if icon else None, - "text": row["Text"], - "topic": None, - "url": row["FirstURL"], - } - else: - for subrow in row["Topics"]: - icon = subrow["Icon"].get("URL", None) - yield { - "icon": f"https://duckduckgo.com{icon}" if icon else None, - "text": subrow["Text"], - "topic": topic, - "url": subrow["FirstURL"], - } - - def suggestions(self, keywords: str, region: str = "wt-wt") -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo suggestions. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query. - region: wt-wt, us-en, uk-en, ru-ru, etc. Defaults to "wt-wt". - - Yields: - dict with suggestions results. - """ - assert keywords, "keywords is mandatory" - - payload = { - "q": keywords, - "kl": region, - } - resp = self._get_url("GET", "https://duckduckgo.com/ac", params=payload) - if resp is None: - return None - try: - page_data = resp.json() - yield from page_data - except Exception: - pass - - def maps( - self, - keywords: str, - place: Optional[str] = None, - street: Optional[str] = None, - city: Optional[str] = None, - county: Optional[str] = None, - state: Optional[str] = None, - country: Optional[str] = None, - postalcode: Optional[str] = None, - latitude: Optional[str] = None, - longitude: Optional[str] = None, - radius: int = 0, - max_results: Optional[int] = None, - ) -> Iterator[Dict[str, Optional[str]]]: - """DuckDuckGo maps search. Query params: https://duckduckgo.com/params. - - Args: - keywords: keywords for query - place: if set, the other parameters are not used. Defaults to None. - street: house number/street. Defaults to None. - city: city of search. Defaults to None. - county: county of search. Defaults to None. - state: state of search. Defaults to None. - country: country of search. Defaults to None. - postalcode: postalcode of search. Defaults to None. - latitude: geographic coordinate (north–south position). Defaults to None. - longitude: geographic coordinate (east–west position); if latitude and - longitude are set, the other parameters are not used. Defaults to None. - radius: expand the search square by the distance in kilometers. Defaults to 0. - max_results: max number of results. If None, returns results only from the first response. Defaults to None. - - Yields: - dict with maps search results - """ - assert keywords, "keywords is mandatory" - - vqd = self._get_vqd(keywords) - - # if longitude and latitude are specified, skip the request about bbox to the nominatim api - if latitude and longitude: - lat_t = Decimal(latitude.replace(",", ".")) - lat_b = Decimal(latitude.replace(",", ".")) - lon_l = Decimal(longitude.replace(",", ".")) - lon_r = Decimal(longitude.replace(",", ".")) - if radius == 0: - radius = 1 - # otherwise request about bbox to nominatim api - else: - if place: - params: Dict[str, Optional[str]] = { - "q": place, - "polygon_geojson": "0", - "format": "jsonv2", - } - else: - params = { - "street": street, - "city": city, - "county": county, - "state": state, - "country": country, - "postalcode": postalcode, - "polygon_geojson": "0", - "format": "jsonv2", - } + def _iter_over_async(self, ait): + """Iterate over an async generator.""" + ait = ait.__aiter__() + get_next = ait.__anext__ + while True: try: - resp = self._get_url( - "GET", - "https://nominatim.openstreetmap.org/search.php", - params=params, - ) - if resp is None: - return None - - coordinates = resp.json()[0]["boundingbox"] - lat_t, lon_l = Decimal(coordinates[1]), Decimal(coordinates[2]) - lat_b, lon_r = Decimal(coordinates[0]), Decimal(coordinates[3]) - except Exception as ex: - logger.debug(f"ddg_maps() keywords={keywords} {type(ex).__name__} {ex}") - return - - # if a radius is specified, expand the search square - lat_t += Decimal(radius) * Decimal(0.008983) - lat_b -= Decimal(radius) * Decimal(0.008983) - lon_l -= Decimal(radius) * Decimal(0.008983) - lon_r += Decimal(radius) * Decimal(0.008983) - logger.debug(f"bbox coordinates\n{lat_t} {lon_l}\n{lat_b} {lon_r}") - - # сreate a queue of search squares (bboxes) - work_bboxes: Deque[Tuple[Decimal, Decimal, Decimal, Decimal]] = deque() - work_bboxes.append((lat_t, lon_l, lat_b, lon_r)) - - # bbox iterate - cache = set() - while work_bboxes: - lat_t, lon_l, lat_b, lon_r = work_bboxes.pop() - params = { - "q": keywords, - "vqd": vqd, - "tg": "maps_places", - "rt": "D", - "mkexp": "b", - "wiki_info": "1", - "is_requery": "1", - "bbox_tl": f"{lat_t},{lon_l}", - "bbox_br": f"{lat_b},{lon_r}", - "strict_bbox": "1", - } - resp = self._get_url("GET", "https://duckduckgo.com/local.js", params=params) - if resp is None: - return - try: - page_data = resp.json().get("results", []) - except Exception: - return - if page_data is None: - return - - for res in page_data: - result = MapsResult() - result.title = res["name"] - result.address = res["address"] - if f"{result.title} {result.address}" in cache: - continue - else: - cache.add(f"{result.title} {result.address}") - result.country_code = res["country_code"] - result.url = _normalize_url(res["website"]) - result.phone = res["phone"] - result.latitude = res["coordinates"]["latitude"] - result.longitude = res["coordinates"]["longitude"] - result.source = _normalize_url(res["url"]) - if res["embed"]: - result.image = res["embed"].get("image", "") - result.links = res["embed"].get("third_party_links", "") - result.desc = res["embed"].get("description", "") - result.hours = res["hours"] - yield result.__dict__ - if max_results and len(cache) >= max_results: - return - if max_results is None: - return - # divide the square into 4 parts and add to the queue - if len(page_data) >= 15: - lat_middle = (lat_t + lat_b) / 2 - lon_middle = (lon_l + lon_r) / 2 - bbox1 = (lat_t, lon_l, lat_middle, lon_middle) - bbox2 = (lat_t, lon_middle, lat_middle, lon_r) - bbox3 = (lat_middle, lon_l, lat_b, lon_middle) - bbox4 = (lat_middle, lon_middle, lat_b, lon_r) - work_bboxes.extendleft([bbox1, bbox2, bbox3, bbox4]) - - def translate( - self, keywords: str, from_: Optional[str] = None, to: str = "en" - ) -> Optional[Dict[str, Optional[str]]]: - """DuckDuckGo translate. - - Args: - keywords: string or a list of strings to translate - from_: translate from (defaults automatically). Defaults to None. - to: what language to translate. Defaults to "en". - - Returns: - dict with translated keywords. - """ - assert keywords, "keywords is mandatory" - - vqd = self._get_vqd("translate") - - payload = { - "vqd": vqd, - "query": "translate", - "to": to, - } - if from_: - payload["from"] = from_ - - resp = self._get_url( - "POST", - "https://duckduckgo.com/translation.js", - params=payload, - data=keywords.encode(), - ) - if resp is None: - return None - try: - page_data = resp.json() - page_data["original"] = keywords - except Exception: - page_data = None - return page_data + obj = self._loop.run_until_complete(get_next()) + yield obj + except StopAsyncIteration: + break + + def text(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().text(*args, **kwargs) + return self._iter_over_async(async_gen) + + def images(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().images(*args, **kwargs) + return self._iter_over_async(async_gen) + + def videos(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().videos(*args, **kwargs) + return self._iter_over_async(async_gen) + + def news(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().news(*args, **kwargs) + return self._iter_over_async(async_gen) + + def answers(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().answers(*args, **kwargs) + return self._iter_over_async(async_gen) + + def suggestions(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().suggestions(*args, **kwargs) + return self._iter_over_async(async_gen) + + def maps(self, *args, **kwargs) -> Generator[Dict[str, Optional[str]], None, None]: + async_gen = super().maps(*args, **kwargs) + return self._iter_over_async(async_gen) + + def translate(self, *args, **kwargs) -> Optional[Dict[str, Optional[str]]]: + async_coro = super().translate(*args, **kwargs) + return self._loop.run_until_complete(async_coro) From 1bc1a9e22003c58d67ef68cdff4308ad124e9518 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:21:21 +0300 Subject: [PATCH 3/5] AsyncDDGS: inherit doctrings, replace type AsyncIterator with AsyncGenerator --- duckduckgo_search/duckduckgo_search_async.py | 25 ++++++++++---------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/duckduckgo_search/duckduckgo_search_async.py b/duckduckgo_search/duckduckgo_search_async.py index 26cf918..7412587 100644 --- a/duckduckgo_search/duckduckgo_search_async.py +++ b/duckduckgo_search/duckduckgo_search_async.py @@ -6,9 +6,10 @@ from datetime import datetime, timezone from decimal import Decimal from itertools import cycle -from typing import AsyncIterator, Deque, Dict, Optional, Set, Tuple +from typing import AsyncGenerator, Deque, Dict, Optional, Set, Tuple from curl_cffi import requests +from docstring_inheritance import GoogleDocstringInheritanceMeta from lxml import html from .exceptions import DuckDuckGoSearchException @@ -21,7 +22,7 @@ asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) -class AsyncDDGS: +class AsyncDDGS(metaclass=GoogleDocstringInheritanceMeta): """DuckDuckgo_search async class to get search results from duckduckgo.com.""" def __init__(self, headers=None, proxies=None, timeout=10) -> None: @@ -73,7 +74,7 @@ async def text( timelimit: Optional[str] = None, backend: str = "api", max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -108,7 +109,7 @@ async def _text_api( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -183,7 +184,7 @@ async def _text_html( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -256,7 +257,7 @@ async def _text_lite( region: str = "wt-wt", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo text search generator. Query params: https://duckduckgo.com/params. Args: @@ -339,7 +340,7 @@ async def images( layout: Optional[str] = None, license_image: Optional[str] = None, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo images search. Query params: https://duckduckgo.com/params. Args: @@ -430,7 +431,7 @@ async def videos( duration: Optional[str] = None, license_videos: Optional[str] = None, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo videos search. Query params: https://duckduckgo.com/params. Args: @@ -501,7 +502,7 @@ async def news( safesearch: str = "moderate", timelimit: Optional[str] = None, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo news search. Query params: https://duckduckgo.com/params. Args: @@ -567,7 +568,7 @@ async def news( return payload["s"] = next.split("s=")[-1].split("&")[0] - async def answers(self, keywords: str) -> AsyncIterator[Dict[str, Optional[str]]]: + async def answers(self, keywords: str) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo instant answers. Query params: https://duckduckgo.com/params. Args: @@ -637,7 +638,7 @@ async def answers(self, keywords: str) -> AsyncIterator[Dict[str, Optional[str]] "url": subrow["FirstURL"], } - async def suggestions(self, keywords: str, region: str = "wt-wt") -> AsyncIterator[Dict[str, Optional[str]]]: + async def suggestions(self, keywords: str, region: str = "wt-wt") -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo suggestions. Query params: https://duckduckgo.com/params. Args: @@ -677,7 +678,7 @@ async def maps( longitude: Optional[str] = None, radius: int = 0, max_results: Optional[int] = None, - ) -> AsyncIterator[Dict[str, Optional[str]]]: + ) -> AsyncGenerator[Dict[str, Optional[str]], None]: """DuckDuckGo maps search. Query params: https://duckduckgo.com/params. Args: From 4f243024523ecb955a4eb9ab1b5bc172c1efc887 Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:29:13 +0300 Subject: [PATCH 4/5] add docstring_inheritance to requirements.txt --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index f681066..bb11caa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ click>=8.1.7 +curl_cffi>=0.5.10 +docstring_inheritance>=2.1.2 lxml>=4.9.3 -curl_cffi>=0.5.10 \ No newline at end of file From ba1f22dc9580bd01dad0c515487614891d69957f Mon Sep 17 00:00:00 2001 From: deedy5 <65482418+deedy5@users.noreply.github.com> Date: Mon, 15 Jan 2024 12:31:37 +0300 Subject: [PATCH 5/5] add docstring_inheritance to requirements --- pyproject.toml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e380467..396122b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,9 @@ classifiers = [ ] dependencies = [ "click>=8.1.7", - "lxml>=4.9.3", - "curl_cffi>=0.5.10" + "docstring_inheritance>=2.1.2", + "curl_cffi>=0.5.10", + "lxml>=4.9.3" ] dynamic = ["version"]