Skip to content

Commit

Permalink
V3.9.10 (#146)
Browse files Browse the repository at this point in the history
1) text(backend="api"): extract json from html,
2) bugfix text(backend="html"): update Referer in headers,
3) removed tests for text(backend="lite") - works only with proxies.
  • Loading branch information
deedy5 authored Dec 7, 2023
1 parent 2deb5bb commit f55675a
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 26 deletions.
12 changes: 6 additions & 6 deletions duckduckgo_search/duckduckgo_search.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import logging
from collections import deque
from datetime import datetime, timezone
Expand All @@ -12,7 +13,7 @@

from .exceptions import APIException, DuckDuckGoSearchException, HTTPException, RateLimitException, TimeoutException
from .models import MapsResult
from .utils import HEADERS, USERAGENTS, _extract_vqd, _is_500_in_url, _normalize, _normalize_url
from .utils import HEADERS, USERAGENTS, _extract_vqd, _is_500_in_url, _normalize, _normalize_url, _text_extract_json

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -140,7 +141,7 @@ def _text_api(
"s": "0",
"df": timelimit,
"vqd": vqd,
"o": "json",
# "o": "json",
"sp": "0",
}
safesearch = safesearch.lower()
Expand All @@ -157,10 +158,7 @@ def _text_api(
if resp is None:
return

try:
page_data = resp.json().get("results", None)
except Exception:
return
page_data = _text_extract_json(resp.content)
if page_data is None:
return

Expand Down Expand Up @@ -207,13 +205,15 @@ def _text_html(
"""
assert keywords, "keywords is mandatory"

self._client.headers["Referer"] = "https://html.duckduckgo.com/"
safesearch_base = {"on": 1, "moderate": -1, "off": -2}
payload = {
"q": keywords,
"s": "0",
"kl": region,
"p": safesearch_base[safesearch.lower()],
"df": timelimit,
"b": "",
}
cache: Set[str] = set()
for _ in range(11):
Expand Down
12 changes: 5 additions & 7 deletions duckduckgo_search/duckduckgo_search_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from .exceptions import APIException, DuckDuckGoSearchException, HTTPException, RateLimitException, TimeoutException
from .models import MapsResult
from .utils import HEADERS, USERAGENTS, _extract_vqd, _is_500_in_url, _normalize, _normalize_url
from .utils import HEADERS, USERAGENTS, _extract_vqd, _is_500_in_url, _normalize, _normalize_url, _text_extract_json

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -141,7 +141,7 @@ async def _text_api(
"s": "0",
"df": timelimit,
"vqd": vqd,
"o": "json",
# "o": "json",
"sp": "0",
}
safesearch = safesearch.lower()
Expand All @@ -158,14 +158,11 @@ async def _text_api(
if resp is None:
return

try:
page_data = resp.json().get("results", None)
except Exception:
return
page_data = _text_extract_json(resp.content)
if page_data is None:
return

result_exists = False
result_exists, next_page_url = False, None
for row in page_data:
href = row.get("u", None)
if href and href not in cache and href != f"http://www.google.com/search?q={keywords}":
Expand Down Expand Up @@ -208,6 +205,7 @@ async def _text_html(
"""
assert keywords, "keywords is mandatory"

self._client.headers["Referer"] = "https://html.duckduckgo.com/"
safesearch_base = {"on": 1, "moderate": -1, "off": -2}
payload = {
"q": keywords,
Expand Down
12 changes: 12 additions & 0 deletions duckduckgo_search/utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import json
import re
from html import unescape
from typing import Optional
Expand Down Expand Up @@ -38,6 +39,17 @@ def _extract_vqd(html_bytes: bytes, keywords: str) -> Optional[str]:
raise VQDExtractionException(f"Could not extract vqd. {keywords=}")


def _text_extract_json(html_bytes: bytes) -> Optional[str]:
"""text(backend="api") -> extract json from html"""
try:
start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24
end = html_bytes.index(b");DDG.duckbar.load(", start)
data = html_bytes[start:end]
return json.loads(data)
except ValueError:
pass


def _is_500_in_url(url: str) -> bool:
"""something like '506-00.js' inside the url"""
return bool(REGEX_500_IN_URL.search(url))
Expand Down
2 changes: 1 addition & 1 deletion duckduckgo_search/version.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "3.9.9"
__version__ = "3.9.10"
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
@pytest.fixture(autouse=True)
def slow_down_tests():
yield
sleep(1)
sleep(2)


def test_version_command():
Expand Down
10 changes: 5 additions & 5 deletions tests/test_duckduckgo_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
@pytest.fixture(autouse=True)
def slow_down_tests():
yield
sleep(1)
sleep(2)


def test_text():
Expand All @@ -27,10 +27,10 @@ def test_text_html():
assert len(results) == 30


def test_text_lite():
with DDGS() as ddgs:
results = [x for x in ddgs.text("dog", backend="lite", max_results=30)]
assert len(results) == 30
# def test_text_lite():
# with DDGS() as ddgs:
# results = [x for x in ddgs.text("dog", backend="lite", max_results=30)]
# assert len(results) == 30


def test_images():
Expand Down
12 changes: 6 additions & 6 deletions tests/test_duckduckgo_search_async.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
@pytest.fixture(autouse=True)
def slow_down_tests():
yield
sleep(1)
sleep(2)


@pytest.mark.asyncio
Expand All @@ -31,11 +31,11 @@ async def test_text_html():
assert len(results) == 30


@pytest.mark.asyncio
async def test_text_lite():
async with AsyncDDGS() as ddgs:
results = [x async for x in ddgs.text("dog", backend="lite", max_results=30)]
assert len(results) == 30
# @pytest.mark.asyncio
# async def test_text_lite():
# async with AsyncDDGS() as ddgs:
# results = [x async for x in ddgs.text("dog", backend="lite", max_results=30)]
# assert len(results) == 30


@pytest.mark.asyncio
Expand Down

0 comments on commit f55675a

Please sign in to comment.