diff --git a/README.md b/README.md index 1e97a44..b0ecf74 100755 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ Search for words, documents, images, videos, news, maps and text translation usi * [Regions](#regions) * [DDGS and AsyncDDGS classes](#ddgs-and-asyncddgs-classes) * [Using proxy](#using-proxy) +* [Exceptions](#exceptions) * [1. text() - text search](#1-text---text-search-by-duckduckgocom) * [2. answers() - instant answers](#2-answers---instant-answers-by-duckduckgocom) * [3. images() - image search](#3-images---image-search-by-duckduckgocom) @@ -196,7 +197,7 @@ async def main(): if __name__ == "__main__": asyncio.run(main()) ``` -It is important to note that the DDGS and AsyncDDGS classes should always be used as a context manager (with statement). +It is important to note that the DDGS and AsyncDDGS classes should always be used as a context manager (with statement). This ensures proper resource management and cleanup, as the context manager will automatically handle opening and closing the HTTP client connection. [Go To TOP](#TOP) @@ -218,18 +219,18 @@ with DDGS(proxies="socks5://user:password@geo.iproyal.com:32325", timeout=20) as for r in ddgs.text("something you need", max_results=50): print(r) ``` -*3. Async* -```python3 -import asyncio -from duckduckgo_search import AsyncDDGS -async def get_results(): - async with AsyncDDGS(proxies="socks5://user:password@geo.iproyal.com:32325", timeout=20) as ddgs: - async for r in ddgs.text("cat", max_results=50): - print(r) +[Go To TOP](#TOP) -asyncio.run(get_results()) -``` +## Exceptions + +Exceptions: +- `APIException`: Raised when there is an issue with the API request. +- `DuckDuckGoSearchException`: Raised when there is a generic exception during the API request. +- `HTTPException`: Raised when there is an HTTP error during the API request. +- `RateLimitException`: Raised when the API rate limit is exceeded. +- `TimeoutException`: Raised when there is a timeout during the API request. +- `VQDExtractionException`: Raised when there is an error extracting the VQD value for a search query. [Go To TOP](#TOP) @@ -274,19 +275,6 @@ with DDGS() as ddgs: for r in ddgs.text('russia filetype:pdf', region='wt-wt', safesearch='off', timelimit='y', max_results=10): print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for result in ddgs.text("cat", max_results=50): - print(result) - -asyncio.run(get_results()) -``` - [Go To TOP](#TOP) @@ -312,18 +300,6 @@ with DDGS() as ddgs: for r in ddgs.answers("sun"): print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for r in ddgs.answers("sun"): - print(r) - -asyncio.run(get_results()) -``` [Go To TOP](#TOP) @@ -386,18 +362,6 @@ with DDGS() as ddgs: for r in ddgs_images_gen: print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for r in ddgs.images("butterfly", max_results=50): - print(r) - -asyncio.run(get_results()) -``` [Go To TOP](#TOP) @@ -449,19 +413,6 @@ with DDGS() as ddgs: for r in ddgs_videos_gen: print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for r in ddgs.videos("tesla", max_results=50): - print(r) - -asyncio.run(get_results()) -``` - [Go To TOP](#TOP) @@ -505,18 +456,6 @@ with DDGS() as ddgs: for r in ddgs_news_gen: print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for r in ddgs.news("holiday", max_results=15): - print(r) - -asyncio.run(get_results()) -``` [Go To TOP](#TOP) @@ -567,18 +506,6 @@ with DDGS() as ddgs: for r in ddgs.maps("school", place="Uganda", max_results=50): print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for r in ddgs.maps("school", place="Berlin", max_results=50): - print(r) - -asyncio.run(get_results()) -``` [Go To TOP](#TOP) @@ -611,22 +538,9 @@ with DDGS() as ddgs: r = ddgs.translate(keywords, to="de") print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - r = await ddgs.translate("school", to="de"): - print(r) - -asyncio.run(get_results()) -``` [Go To TOP](#TOP) - ## 8. suggestions() - suggestions by duckduckgo.com ```python @@ -652,17 +566,5 @@ with DDGS() as ddgs: for r in ddgs.suggestions("fly"): print(r) ``` -***Async*** -```python -import asyncio -from duckduckgo_search import AsyncDDGS - -async def get_results(): - async with AsyncDDGS() as ddgs: - async for r in ddgs.suggestions("fly"): - print(r) - -asyncio.run(get_results()) -``` [Go To TOP](#TOP) diff --git a/duckduckgo_search/duckduckgo_search.py b/duckduckgo_search/duckduckgo_search.py index 8fa2460..8954d66 100644 --- a/duckduckgo_search/duckduckgo_search.py +++ b/duckduckgo_search/duckduckgo_search.py @@ -43,14 +43,16 @@ def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._models.Re try: resp = self._client.request(method, url, follow_redirects=True, **kwargs) if _is_500_in_url(str(resp.url)) or resp.status_code == 403: - raise APIException(f"_get_url() {url} 500 in url") + raise APIException(f"_get_url() {url}") if resp.status_code == 202: - raise RateLimitException(f"_get_url() {url} RateLimitError: resp.status_code==202") + raise RateLimitException(f"_get_url() {url}") if resp.status_code == 200: return resp resp.raise_for_status() except httpx.TimeoutException as ex: raise TimeoutException(f"_get_url() {url} TimeoutException: {ex}") + except (APIException, RateLimitException): + raise except httpx.HTTPError as ex: raise HTTPException(f"_get_url() {url} HttpError: {ex}") except Exception as ex: @@ -227,7 +229,12 @@ def _text_html( for e in tree.xpath('//div[contains(@class, "results_links")]'): href = e.xpath('.//a[contains(@class, "result__a")]/@href') href = href[0] if href else None - if href and href not in cache and href != f"http://www.google.com/search?q={keywords}": + if ( + href + and href not in cache + and href != f"http://www.google.com/search?q={keywords}" + and not href.startswith("https://duckduckgo.com/y.js?ad_domain") + ): cache.add(href) title = e.xpath('.//a[contains(@class, "result__a")]/text()') body = e.xpath('.//a[contains(@class, "result__snippet")]//text()') @@ -296,7 +303,12 @@ def _text_lite( if i == 1: href = e.xpath(".//a//@href") href = href[0] if href else None - if href is None or href in cache or href == f"http://www.google.com/search?q={keywords}": + if ( + href is None + or href in cache + or href == f"http://www.google.com/search?q={keywords}" + or href.startswith("https://duckduckgo.com/y.js?ad_domain") + ): [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4) else: cache.add(href) diff --git a/duckduckgo_search/duckduckgo_search_async.py b/duckduckgo_search/duckduckgo_search_async.py index c4f17b8..26d65df 100644 --- a/duckduckgo_search/duckduckgo_search_async.py +++ b/duckduckgo_search/duckduckgo_search_async.py @@ -43,14 +43,16 @@ async def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._mod try: resp = await self._client.request(method, url, follow_redirects=True, **kwargs) if _is_500_in_url(str(resp.url)) or resp.status_code == 403: - raise APIException(f"_get_url() {url} 500 in url") + raise APIException(f"_get_url() {url}") if resp.status_code == 202: - raise RateLimitException(f"_get_url() {url} RateLimitError: resp.status_code==202") + raise RateLimitException(f"_get_url() {url}") if resp.status_code == 200: return resp resp.raise_for_status() except httpx.TimeoutException as ex: raise TimeoutException(f"_get_url() {url} TimeoutException: {ex}") + except (APIException, RateLimitException): + raise except httpx.HTTPError as ex: raise HTTPException(f"_get_url() {url} HttpError: {ex}") except Exception as ex: @@ -228,7 +230,12 @@ async def _text_html( for e in tree.xpath('//div[contains(@class, "results_links")]'): href = e.xpath('.//a[contains(@class, "result__a")]/@href') href = href[0] if href else None - if href and href not in cache and href != f"http://www.google.com/search?q={keywords}": + if ( + href + and href not in cache + and href != f"http://www.google.com/search?q={keywords}" + and not href.startswith("https://duckduckgo.com/y.js?ad_domain") + ): cache.add(href) title = e.xpath('.//a[contains(@class, "result__a")]/text()') body = e.xpath('.//a[contains(@class, "result__snippet")]//text()') @@ -297,7 +304,12 @@ async def _text_lite( if i == 1: href = e.xpath(".//a//@href") href = href[0] if href else None - if href is None or href in cache or href == f"http://www.google.com/search?q={keywords}": + if ( + href is None + or href in cache + or href == f"http://www.google.com/search?q={keywords}" + or href.startswith("https://duckduckgo.com/y.js?ad_domain") + ): [next(data, None) for _ in range(3)] # skip block(i=1,2,3,4) else: cache.add(href) diff --git a/duckduckgo_search/version.py b/duckduckgo_search/version.py index 4ad52ad..ae7c2ba 100755 --- a/duckduckgo_search/version.py +++ b/duckduckgo_search/version.py @@ -1 +1 @@ -__version__ = "3.9.8" +__version__ = "3.9.9"