Skip to content

Commit

Permalink
v3.9.9 (#140)
Browse files Browse the repository at this point in the history
1) text(backend="html"/"lite"): removed ad results,
2) README: added exceptions, removed unnecessary async examples.
  • Loading branch information
deedy5 authored Nov 28, 2023
1 parent 8349f9d commit 2deb5bb
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 119 deletions.
122 changes: 12 additions & 110 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ Search for words, documents, images, videos, news, maps and text translation usi
* [Regions](#regions)
* [DDGS and AsyncDDGS classes](#ddgs-and-asyncddgs-classes)
* [Using proxy](#using-proxy)
* [Exceptions](#exceptions)
* [1. text() - text search](#1-text---text-search-by-duckduckgocom)
* [2. answers() - instant answers](#2-answers---instant-answers-by-duckduckgocom)
* [3. images() - image search](#3-images---image-search-by-duckduckgocom)
Expand Down Expand Up @@ -196,7 +197,7 @@ async def main():
if __name__ == "__main__":
asyncio.run(main())
```
It is important to note that the DDGS and AsyncDDGS classes should always be used as a context manager (with statement).
It is important to note that the DDGS and AsyncDDGS classes should always be used as a context manager (with statement).
This ensures proper resource management and cleanup, as the context manager will automatically handle opening and closing the HTTP client connection.

[Go To TOP](#TOP)
Expand All @@ -218,18 +219,18 @@ with DDGS(proxies="socks5://user:password@geo.iproyal.com:32325", timeout=20) as
for r in ddgs.text("something you need", max_results=50):
print(r)
```
*3. Async*
```python3
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS(proxies="socks5://user:password@geo.iproyal.com:32325", timeout=20) as ddgs:
async for r in ddgs.text("cat", max_results=50):
print(r)
[Go To TOP](#TOP)

asyncio.run(get_results())
```
## Exceptions

Exceptions:
- `APIException`: Raised when there is an issue with the API request.
- `DuckDuckGoSearchException`: Raised when there is a generic exception during the API request.
- `HTTPException`: Raised when there is an HTTP error during the API request.
- `RateLimitException`: Raised when the API rate limit is exceeded.
- `TimeoutException`: Raised when there is a timeout during the API request.
- `VQDExtractionException`: Raised when there is an error extracting the VQD value for a search query.

[Go To TOP](#TOP)

Expand Down Expand Up @@ -274,19 +275,6 @@ with DDGS() as ddgs:
for r in ddgs.text('russia filetype:pdf', region='wt-wt', safesearch='off', timelimit='y', max_results=10):
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for result in ddgs.text("cat", max_results=50):
print(result)

asyncio.run(get_results())
```


[Go To TOP](#TOP)

Expand All @@ -312,18 +300,6 @@ with DDGS() as ddgs:
for r in ddgs.answers("sun"):
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for r in ddgs.answers("sun"):
print(r)

asyncio.run(get_results())
```

[Go To TOP](#TOP)

Expand Down Expand Up @@ -386,18 +362,6 @@ with DDGS() as ddgs:
for r in ddgs_images_gen:
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for r in ddgs.images("butterfly", max_results=50):
print(r)

asyncio.run(get_results())
```

[Go To TOP](#TOP)

Expand Down Expand Up @@ -449,19 +413,6 @@ with DDGS() as ddgs:
for r in ddgs_videos_gen:
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for r in ddgs.videos("tesla", max_results=50):
print(r)

asyncio.run(get_results())
```


[Go To TOP](#TOP)

Expand Down Expand Up @@ -505,18 +456,6 @@ with DDGS() as ddgs:
for r in ddgs_news_gen:
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for r in ddgs.news("holiday", max_results=15):
print(r)

asyncio.run(get_results())
```

[Go To TOP](#TOP)

Expand Down Expand Up @@ -567,18 +506,6 @@ with DDGS() as ddgs:
for r in ddgs.maps("school", place="Uganda", max_results=50):
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for r in ddgs.maps("school", place="Berlin", max_results=50):
print(r)

asyncio.run(get_results())
```

[Go To TOP](#TOP)

Expand Down Expand Up @@ -611,22 +538,9 @@ with DDGS() as ddgs:
r = ddgs.translate(keywords, to="de")
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
r = await ddgs.translate("school", to="de"):
print(r)

asyncio.run(get_results())
```

[Go To TOP](#TOP)


## 8. suggestions() - suggestions by duckduckgo.com

```python
Expand All @@ -652,17 +566,5 @@ with DDGS() as ddgs:
for r in ddgs.suggestions("fly"):
print(r)
```
***Async***
```python
import asyncio
from duckduckgo_search import AsyncDDGS

async def get_results():
async with AsyncDDGS() as ddgs:
async for r in ddgs.suggestions("fly"):
print(r)

asyncio.run(get_results())
```

[Go To TOP](#TOP)
20 changes: 16 additions & 4 deletions duckduckgo_search/duckduckgo_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._models.Re
try:
resp = self._client.request(method, url, follow_redirects=True, **kwargs)
if _is_500_in_url(str(resp.url)) or resp.status_code == 403:
raise APIException(f"_get_url() {url} 500 in url")
raise APIException(f"_get_url() {url}")
if resp.status_code == 202:
raise RateLimitException(f"_get_url() {url} RateLimitError: resp.status_code==202")
raise RateLimitException(f"_get_url() {url}")
if resp.status_code == 200:
return resp
resp.raise_for_status()
except httpx.TimeoutException as ex:
raise TimeoutException(f"_get_url() {url} TimeoutException: {ex}")
except (APIException, RateLimitException):
raise
except httpx.HTTPError as ex:
raise HTTPException(f"_get_url() {url} HttpError: {ex}")
except Exception as ex:
Expand Down Expand Up @@ -227,7 +229,12 @@ def _text_html(
for e in tree.xpath('//div[contains(@class, "results_links")]'):
href = e.xpath('.//a[contains(@class, "result__a")]/@href')
href = href[0] if href else None
if href and href not in cache and href != f"http://www.google.com/search?q={keywords}":
if (
href
and href not in cache
and href != f"http://www.google.com/search?q={keywords}"
and not href.startswith("https://duckduckgo.com/y.js?ad_domain")
):
cache.add(href)
title = e.xpath('.//a[contains(@class, "result__a")]/text()')
body = e.xpath('.//a[contains(@class, "result__snippet")]//text()')
Expand Down Expand Up @@ -296,7 +303,12 @@ def _text_lite(
if i == 1:
href = e.xpath(".//a//@href")
href = href[0] if href else None
if href is None or href in cache or href == f"http://www.google.com/search?q={keywords}":
if (
href is None
or href in cache
or href == f"http://www.google.com/search?q={keywords}"
or href.startswith("https://duckduckgo.com/y.js?ad_domain")
):
[next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
else:
cache.add(href)
Expand Down
20 changes: 16 additions & 4 deletions duckduckgo_search/duckduckgo_search_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,16 @@ async def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._mod
try:
resp = await self._client.request(method, url, follow_redirects=True, **kwargs)
if _is_500_in_url(str(resp.url)) or resp.status_code == 403:
raise APIException(f"_get_url() {url} 500 in url")
raise APIException(f"_get_url() {url}")
if resp.status_code == 202:
raise RateLimitException(f"_get_url() {url} RateLimitError: resp.status_code==202")
raise RateLimitException(f"_get_url() {url}")
if resp.status_code == 200:
return resp
resp.raise_for_status()
except httpx.TimeoutException as ex:
raise TimeoutException(f"_get_url() {url} TimeoutException: {ex}")
except (APIException, RateLimitException):
raise
except httpx.HTTPError as ex:
raise HTTPException(f"_get_url() {url} HttpError: {ex}")
except Exception as ex:
Expand Down Expand Up @@ -228,7 +230,12 @@ async def _text_html(
for e in tree.xpath('//div[contains(@class, "results_links")]'):
href = e.xpath('.//a[contains(@class, "result__a")]/@href')
href = href[0] if href else None
if href and href not in cache and href != f"http://www.google.com/search?q={keywords}":
if (
href
and href not in cache
and href != f"http://www.google.com/search?q={keywords}"
and not href.startswith("https://duckduckgo.com/y.js?ad_domain")
):
cache.add(href)
title = e.xpath('.//a[contains(@class, "result__a")]/text()')
body = e.xpath('.//a[contains(@class, "result__snippet")]//text()')
Expand Down Expand Up @@ -297,7 +304,12 @@ async def _text_lite(
if i == 1:
href = e.xpath(".//a//@href")
href = href[0] if href else None
if href is None or href in cache or href == f"http://www.google.com/search?q={keywords}":
if (
href is None
or href in cache
or href == f"http://www.google.com/search?q={keywords}"
or href.startswith("https://duckduckgo.com/y.js?ad_domain")
):
[next(data, None) for _ in range(3)] # skip block(i=1,2,3,4)
else:
cache.add(href)
Expand Down
2 changes: 1 addition & 1 deletion duckduckgo_search/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.9.8"
__version__ = "3.9.9"

0 comments on commit 2deb5bb

Please sign in to comment.