diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e628b71c..81e4dfde 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -13,7 +13,7 @@ jobs: strategy: matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12" ] + python-version: ["3.9", "3.10", "3.11", "3.12", "3.13" ] env: CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 947122cf..9a3bf37c 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -16,7 +16,7 @@ jobs: - uses: "actions/checkout@v4" - uses: "actions/setup-python@v5" with: - python-version: 3.8 + python-version: 3.13 - name: "Install dependencies" run: "pip install -r requirements.txt" - name: "Build" diff --git a/CHANGELOG.md b/CHANGELOG.md index f289437e..79c75068 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,27 @@ # Changelog -## development +## 0.1.1 (2nd Nov, 2024) + +- FIx typig extensions nor found. (#290) + +## 0.1.0 (2nd Nov, 2024) + +- Add support for Python 3.12 / drop Python 3.8. (#286) +- Specify usedforsecurity=False in blake2b. (#285) + +## 0.0.33 (4th Oct, 2024) + +- Added a [Logging](https://hishel.com/advanced/logging/) section to the documentation. + +## 0.0.32 (27th Sep, 2024) + +- Don't raise an exception if the `Date` header is not present. (#273) + +## 0.0.31 (22nd Sep, 2024) - Ignore file not found error when cleaning up a file storage. (#264) +- Fix `AssertionError` on `client.close()` when use SQLiteStorage. (#269) +- Fix ignored flags when use `force_cache`. (#271) ## 0.0.30 (12th July, 2024) diff --git a/README.md b/README.md index bfdbd96c..dcae9bc5 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,15 @@ -

- HTTPX +

+ + + + Logo + +

+

Hishel - An elegant HTTP Cache implementation for httpx and httpcore.

diff --git a/docs/advanced/logging.md b/docs/advanced/logging.md new file mode 100644 index 00000000..b6aa646b --- /dev/null +++ b/docs/advanced/logging.md @@ -0,0 +1,73 @@ +--- +icon: material/file-document-edit +--- + +[Logging](https://en.wikipedia.org/wiki/Logging_(computing)) is an important part of every application that helps developers better understand how the program operates. Hishel supports a variety of logs that can show you how the library impacts your program. + +Hishel will support several loggers for different parts of the program. Currently, we support only one logger called `hishel.controller`, which logs any event related to the cache. For example, it logs when a response is considered stale, when revalidation occurs, when a response is used from the cache, and more. + +## Controller logs + +The [controller](./controllers.md) is a part of the Hishel library that interprets the caching specification. It determines whether a response can be cached or retrieved from the cache. + +You can configure the controller logger for debugging purposes or to better understand how caching works. It can also be crucial when you're just starting out and want to understand why a particular response isn't being cached. + +For example, let's enable logging and see what gets logged when making an HTTP request to the Hishel documentation. + +```python +import logging +import hishel + +logging.basicConfig( + level=logging.WARNING, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", +) +logging.getLogger("hishel.controller").setLevel(logging.DEBUG) + +client = hishel.CacheClient() + +response = client.get( + "https://hishel.com", +) +``` + +Here is what Hishel will log for this program: + +``` +2024-09-30 16:32:34,799 - hishel.controller - DEBUG - Considering the resource located at https://hishel.com/ as cachable since it meets the criteria for being stored in the cache. +``` + +If you run this program a second time, you will receive the response from the cache because hishel.com sends all the necessary caching headers. So, for the second run, you will see a log entry about the successfully reused response. + +``` +2024-09-30 16:35:14,102 - hishel.controller - DEBUG - Considering the resource located at https://hishel.com/ as valid for cache use since it is fresh. +``` + +If we wait some time, the cached response will, of course, become stale. After some time, you can run this program again and see that the response needs to be revalidated from the server to obtain the most recent data. The logs could look like this: + +``` +2024-09-30 16:39:42,502 - hishel.controller - DEBUG - Considering the resource located at https://hishel.com/ as needing revalidation since it is not fresh. +2024-09-30 16:39:42,502 - hishel.controller - DEBUG - Adding the 'If-Modified-Since' header with the value of 'Fri, 27 Sep 2024 07:42:28 GMT' to the request for the resource located at https://hishel.com/. +``` + +The controller will indicate not only that the response was cached but also why it was considered cacheable. + +Examples: + +- For permanent redirects +``` +2024-09-30 16:43:04,672 - hishel.controller - DEBUG - Considering the resource located at https://www.github.com/ as cachable since its status code is a permanent redirect. +``` + +- When [force_cache](./extensions.md#force_cache) is enabled +``` +2024-09-30 16:45:10,468 - hishel.controller - DEBUG - Considering the resource located at https://www.google.com/ as valid for cache use since the request is forced to use the cache. +``` + +Or when it's considered as not cachable + +``` +2024-09-30 17:02:24,961 - hishel.controller - DEBUG - Considering the resource located at https://www.python.org/ as not cachable since it does not contain any of the required cache directives. +``` + +[Here](https://github.com/karpetrosyan/hishel/pull/275) you can find a full list of the controller logs. Note that this is the list of initial logs; any logs added later will not be updated in this list. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 099fd69c..52207b16 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,5 +1,8 @@ -

- HTTPX +

+ HTTPX + HTTPX +

diff --git a/docs/static/Shelkopryad_350x250_black.png b/docs/static/Shelkopryad_350x250_black.png new file mode 100644 index 00000000..9c5cc436 Binary files /dev/null and b/docs/static/Shelkopryad_350x250_black.png differ diff --git a/docs/static/Shelkopryad_350x250_yellow.png b/docs/static/Shelkopryad_350x250_yellow.png new file mode 100644 index 00000000..43feb6af Binary files /dev/null and b/docs/static/Shelkopryad_350x250_yellow.png differ diff --git a/hishel/__init__.py b/hishel/__init__.py index bd335812..137ced8f 100644 --- a/hishel/__init__.py +++ b/hishel/__init__.py @@ -14,4 +14,4 @@ def install_cache() -> None: # pragma: no cover httpx.Client = CacheClient # type: ignore -__version__ = "0.0.30" +__version__ = "0.1.1" diff --git a/hishel/_async/_storages.py b/hishel/_async/_storages.py index 5a18316c..03dc2574 100644 --- a/hishel/_async/_storages.py +++ b/hishel/_async/_storages.py @@ -23,7 +23,9 @@ anysqlite = None # type: ignore from httpcore import Request, Response -from typing_extensions import TypeAlias + +if t.TYPE_CHECKING: # pragma: no cover + from typing_extensions import TypeAlias from hishel._serializers import BaseSerializer, clone_model @@ -377,8 +379,8 @@ async def retrieve(self, key: str) -> tp.Optional[StoredResponse]: return self._serializer.loads(cached_response) async def aclose(self) -> None: # pragma: no cover - assert self._connection - await self._connection.close() + if self._connection is not None: + await self._connection.close() async def _remove_expired_caches(self) -> None: assert self._connection diff --git a/hishel/_controller.py b/hishel/_controller.py index c995e58d..b227b5b8 100644 --- a/hishel/_controller.py +++ b/hishel/_controller.py @@ -1,3 +1,4 @@ +import logging import typing as tp from httpcore import Request, Response @@ -10,10 +11,13 @@ extract_header_values, extract_header_values_decoded, generate_key, + get_safe_url, header_presents, parse_date, ) +logger = logging.getLogger("hishel.controller") + HEURISTICALLY_CACHEABLE_STATUS_CODES = (200, 203, 204, 206, 300, 301, 308, 404, 405, 410, 414, 501) HTTP_METHODS = ["GET", "HEAD", "POST", "PUT", "DELETE", "CONNECT", "OPTIONS", "TRACE", "PATCH"] @@ -79,8 +83,10 @@ def get_heuristic_freshness(response: Response, clock: "BaseClock") -> int: def get_age(response: Response, clock: "BaseClock") -> int: - if not header_presents(response.headers, b"date"): # pragma: no cover - raise RuntimeError("The `Date` header is missing in the response.") + if not header_presents(response.headers, b"date"): + # If the response does not have a date header, then it is impossible to calculate the age. + # Instead of raising an exception, we return infinity to be sure that the response is not considered fresh. + return float("inf") # type: ignore date = parse_date(extract_header_values_decoded(response.headers, b"date")[0]) @@ -149,39 +155,96 @@ def is_cachable(self, request: Request, response: Response) -> bool: method = request.method.decode("ascii") force_cache = request.extensions.get("force_cache", None) - if force_cache if force_cache is not None else self._force_cache: - return True - if response.status not in self._cacheable_status_codes: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + f"as not cachable since its status code ({response.status})" + " is not in the list of cacheable status codes." + ) + ) return False if response.status in (301, 308): + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as cachable since its status code is a permanent redirect." + ) + ) return True # the request method is understood by the cache if method not in self._cacheable_methods: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + f"as not cachable since the request method ({method}) is not in the list of cacheable methods." + ) + ) return False + if force_cache if force_cache is not None else self._force_cache: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as cachable since the request is forced to use the cache." + ) + ) + return True + response_cache_control = parse_cache_control(extract_header_values_decoded(response.headers, b"cache-control")) request_cache_control = parse_cache_control(extract_header_values_decoded(request.headers, b"cache-control")) # the response status code is final if response.status // 100 == 1: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as not cachable since its status code is informational." + ) + ) return False # the no-store cache directive is not present (see Section 5.2.2.5) if request_cache_control.no_store: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as not cachable since the request contains the no-store directive." + ) + ) return False # note that the must-understand cache directive overrides # no-store in certain circumstances; see Section 5.2.2.3. - if response_cache_control.no_store and not response_cache_control.must_understand: - return False + if response_cache_control.no_store: + if response_cache_control.must_understand: + logger.debug( + ( + f"Skipping the no-store directive for the resource located at {get_safe_url(request.url)} " + "since the response contains the must-understand directive." + ) + ) + else: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as not cachable since the response contains the no-store directive." + ) + ) + return False # a shared cache must not store a response with private directive # Note that we do not implement special handling for the qualified form, # which would only forbid storing specified headers. if not self._cache_private and response_cache_control.private: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as not cachable since the response contains the private directive." + ) + ) return False expires_presents = header_presents(response.headers, b"expires") @@ -194,6 +257,12 @@ def is_cachable(self, request: Request, response: Response) -> bool: # - a cache extension that allows it to be cached (see Section 5.2.3); or # - a status code that is defined as heuristically cacheable (see Section 4.2.2). if self._allow_heuristics and response.status in HEURISTICALLY_CACHEABLE_STATUS_CODES: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as cachable since its status code is heuristically cacheable." + ) + ) return True if not any( @@ -204,8 +273,20 @@ def is_cachable(self, request: Request, response: Response) -> bool: response_cache_control.max_age is not None, ] ): + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as not cachable since it does not contain any of the required cache directives." + ) + ) return False + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as cachable since it meets the criteria for being stored in the cache." + ) + ) # response is a cachable! return True @@ -221,11 +302,23 @@ def _make_request_conditional(self, request: Request, response: Response) -> Non if header_presents(response.headers, b"last-modified"): last_modified = extract_header_values(response.headers, b"last-modified", single=True)[0] + logger.debug( + ( + f"Adding the 'If-Modified-Since' header with the value of '{last_modified.decode('ascii')}' " + f"to the request for the resource located at {get_safe_url(request.url)}." + ) + ) else: last_modified = None if header_presents(response.headers, b"etag"): etag = extract_header_values(response.headers, b"etag", single=True)[0] + logger.debug( + ( + f"Adding the 'If-None-Match' header with the value of '{etag.decode('ascii')}' " + f"to the request for the resource located at {get_safe_url(request.url)}." + ) + ) else: etag = None @@ -278,6 +371,12 @@ def construct_response_from_cache( # Use of responses with status codes 301 and 308 is always # legal as long as they don't adhere to any caching rules. if response.status in (301, 308): + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as valid for cache use since its status code is a permanent redirect." + ) + ) return response response_cache_control = parse_cache_control(extract_header_values_decoded(response.headers, b"Cache-Control")) @@ -287,11 +386,23 @@ def construct_response_from_cache( # response (if any) match those presented (see Section 4.1) if not self._validate_vary(request=request, response=response, original_request=original_request): # If the vary headers does not match, then do not use the response + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as invalid for cache use since the vary headers do not match." + ) + ) return None # pragma: no cover # !!! this should be after the "vary" header validation. force_cache = request.extensions.get("force_cache", None) if force_cache if force_cache is not None else self._force_cache: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as valid for cache use since the request is forced to use the cache." + ) + ) return response # the stored response does not contain the @@ -303,15 +414,58 @@ def construct_response_from_cache( or response_cache_control.must_revalidate or request_cache_control.no_cache ): + if self._always_revalidate: + log_text = ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as needing revalidation since the cache is set to always revalidate." + ) + elif response_cache_control.no_cache: + log_text = ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as needing revalidation since the response contains the no-cache directive." + ) + elif response_cache_control.must_revalidate: + log_text = ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as needing revalidation since the response contains the must-revalidate directive." + ) + elif request_cache_control.no_cache: + log_text = ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as needing revalidation since the request contains the no-cache directive." + ) + else: + assert False, "Unreachable code " # pragma: no cover + logger.debug(log_text) self._make_request_conditional(request=request, response=response) return request freshness_lifetime = get_freshness_lifetime(response) if freshness_lifetime is None: + logger.debug( + ( + "Could not determine the freshness lifetime of " + f"the resource located at {get_safe_url(request.url)}, " + "trying to use heuristics to calculate it." + ) + ) if self._allow_heuristics and response.status in HEURISTICALLY_CACHEABLE_STATUS_CODES: freshness_lifetime = get_heuristic_freshness(response=response, clock=self._clock) + logger.debug( + ( + f"Successfully calculated the freshness lifetime of the resource located at " + f"{get_safe_url(request.url)} using heuristics." + ) + ) else: + logger.debug( + ( + "Could not calculate the freshness lifetime of " + f"the resource located at {get_safe_url(request.url)}. " + "Making a conditional request to revalidate the response." + ) + ) # If Freshness cannot be calculated, then send the request self._make_request_conditional(request=request, response=response) return request @@ -326,6 +480,13 @@ def construct_response_from_cache( # be fresh for at least the specified number of seconds. if request_cache_control.min_fresh is not None: if freshness_lifetime < (age + request_cache_control.min_fresh): + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as invalid for cache use since the time left for " + "freshness is less than the min-fresh directive." + ) + ) return None # The max-stale request directive indicates that the @@ -338,7 +499,21 @@ def construct_response_from_cache( exceeded_freshness_lifetime = age - freshness_lifetime if request_cache_control.max_stale < exceeded_freshness_lifetime: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as invalid for cache use since the freshness lifetime has been exceeded more than max-stale." + ) + ) return None + else: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as valid for cache use since the freshness lifetime has been exceeded less than max-stale." + ) + ) + return response # The max-age request directive indicates that # the client prefers a response whose age is @@ -347,9 +522,12 @@ def construct_response_from_cache( # the client does not wish to receive a stale response. if request_cache_control.max_age is not None: if request_cache_control.max_age < age: - return None - - if request_cache_control.max_stale is None and not is_fresh: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as invalid for cache use since the age of the response exceeds the max-age directive." + ) + ) return None # the stored response is one of the following: @@ -357,8 +535,20 @@ def construct_response_from_cache( # allowed to be served stale (see Section 4.2.4), or # successfully validated (see Section 4.3). if is_fresh: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as valid for cache use since it is fresh." + ) + ) return response else: + logger.debug( + ( + f"Considering the resource located at {get_safe_url(request.url)} " + "as needing revalidation since it is not fresh." + ) + ) # Otherwise, make a conditional request self._make_request_conditional(request=request, response=response) return request diff --git a/hishel/_sync/_storages.py b/hishel/_sync/_storages.py index 6f5ee4db..2a67a4b1 100644 --- a/hishel/_sync/_storages.py +++ b/hishel/_sync/_storages.py @@ -23,7 +23,9 @@ sqlite3 = None # type: ignore from httpcore import Request, Response -from typing_extensions import TypeAlias + +if t.TYPE_CHECKING: # pragma: no cover + from typing_extensions import TypeAlias from hishel._serializers import BaseSerializer, clone_model @@ -377,8 +379,8 @@ def retrieve(self, key: str) -> tp.Optional[StoredResponse]: return self._serializer.loads(cached_response) def close(self) -> None: # pragma: no cover - assert self._connection - self._connection.close() + if self._connection is not None: + self._connection.close() def _remove_expired_caches(self) -> None: assert self._connection diff --git a/hishel/_utils.py b/hishel/_utils.py index 2c3d4ea3..91dd1e59 100644 --- a/hishel/_utils.py +++ b/hishel/_utils.py @@ -6,6 +6,7 @@ import anyio import httpcore +import httpx HEADERS_ENCODING = "iso-8859-1" @@ -33,12 +34,22 @@ def normalized_url(url: tp.Union[httpcore.URL, str, bytes]) -> str: assert False, "Invalid type for `normalized_url`" # pragma: no cover +def get_safe_url(url: httpcore.URL) -> str: + httpx_url = httpx.URL(bytes(url).decode("ascii")) + + schema = httpx_url.scheme + host = httpx_url.host + path = httpx_url.path + + return f"{schema}://{host}{path}" + + def generate_key(request: httpcore.Request, body: bytes = b"") -> str: encoded_url = normalized_url(request.url).encode("ascii") key_parts = [request.method, encoded_url, body] - key = blake2b(digest_size=16) + key = blake2b(digest_size=16, usedforsecurity=False) for part in key_parts: key.update(part) return key.hexdigest() diff --git a/mkdocs.yml b/mkdocs.yml index f8b4fbaa..28e8dcfe 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -3,6 +3,7 @@ repo_url: https://github.com/karpetrosyan/hishel theme: name: material + custom_dir: overrides features: - content.code.copy - toc.integrate @@ -50,6 +51,7 @@ nav: - Controllers: advanced/controllers.md - HTTP Headers: advanced/http_headers.md - Extensions: advanced/extensions.md + - Logging: advanced/logging.md - Examples: - GitHub: examples/github.md - FastAPI: examples/fastapi.md diff --git a/overrides/partials/footer.html b/overrides/partials/footer.html new file mode 100644 index 00000000..ef9c52a1 --- /dev/null +++ b/overrides/partials/footer.html @@ -0,0 +1,6 @@ + + \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a109cef5..4c95130b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ name = "hishel" dynamic = ["readme", "version"] description = "Persistent cache implementation for httpx and httpcore" license = "BSD-3-Clause" -requires-python = ">=3.8" +requires-python = ">=3.9" authors = [ { name = "Kar Petrosyan", email = "kar.petrosyanpy@gmail.com" }, ] @@ -21,16 +21,15 @@ classifiers = [ "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Internet :: WWW/HTTP", ] dependencies = [ "httpx>=0.22.0", - "typing_extensions>=4.8.0" ] [project.optional-dependencies] diff --git a/requirements.txt b/requirements.txt index b3cf45b1..b49b0917 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ ruff==0.6.3 mypy==1.10.0 # docs -mkdocs==1.5.3 +mkdocs==1.6.1 mkdocs-material==9.5.1 # tests diff --git a/tests/_async/test_client.py b/tests/_async/test_client.py index 71435092..f77f91c3 100644 --- a/tests/_async/test_client.py +++ b/tests/_async/test_client.py @@ -195,7 +195,11 @@ async def test_force_cache(): ] ) - async with hishel.AsyncCacheClient(storage=hishel.AsyncInMemoryStorage(), transport=transport) as client: + async with hishel.AsyncCacheClient( + storage=hishel.AsyncInMemoryStorage(), + controller=hishel.Controller(cacheable_methods=["HEAD"]), + transport=transport, + ) as client: response = await client.head("https://example.com") assert response.status_code == 200 assert not response.extensions["from_cache"] diff --git a/tests/_sync/test_client.py b/tests/_sync/test_client.py index 40132e74..1223a46b 100644 --- a/tests/_sync/test_client.py +++ b/tests/_sync/test_client.py @@ -195,7 +195,11 @@ def test_force_cache(): ] ) - with hishel.CacheClient(storage=hishel.InMemoryStorage(), transport=transport) as client: + with hishel.CacheClient( + storage=hishel.InMemoryStorage(), + controller=hishel.Controller(cacheable_methods=["HEAD"]), + transport=transport, + ) as client: response = client.head("https://example.com") assert response.status_code == 200 assert not response.extensions["from_cache"] diff --git a/tests/test_controller.py b/tests/test_controller.py index cbd5f94c..09fbd1ab 100644 --- a/tests/test_controller.py +++ b/tests/test_controller.py @@ -1,3 +1,4 @@ +import logging import re import pytest @@ -28,7 +29,7 @@ def test_is_cachable_for_cachables(): def test_force_cache_property_for_is_cachable(): - controller = Controller(force_cache=True) + controller = Controller(force_cache=True, cacheable_status_codes=[400]) request = Request("GET", "https://example.com", extensions={"force_cache": False}) uncachable_response = Response(status=400) @@ -76,34 +77,54 @@ def now(self) -> int: ) -def test_is_cachable_for_non_cachables(): +def test_is_cachable_for_non_cachables(caplog): controller = Controller() request = Request(b"GET", b"https://example.com", headers=[]) response = Response(200, headers=[]) - assert not controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert not controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as not cachable " + "since it does not contain any of the required cache directives." + ] -def test_is_cachable_for_heuristically_cachable(): +def test_is_cachable_for_heuristically_cachable(caplog): controller = Controller(allow_heuristics=True) request = Request(b"GET", b"https://example.com", headers=[]) response = Response(200, headers=[]) - assert controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as " + "cachable since its status code is heuristically cacheable." + ] -def test_is_cachable_for_invalid_method(): +def test_is_cachable_for_invalid_method(caplog): controller = Controller(cacheable_methods=["GET"]) request = Request(b"POST", b"https://example.com", headers=[]) response = Response(200, headers=[]) - assert not controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert not controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + ( + "Considering the resource located at https://example.com/ " + "as not cachable since the request method (POST) is not in the list of cacheable methods." + ) + ] def test_is_cachable_for_post(): @@ -130,34 +151,54 @@ def test_controller_with_unsupported_method(): Controller(cacheable_methods=["INVALID_METHOD"]) -def test_is_cachable_for_unsupported_status(): +def test_is_cachable_for_unsupported_status(caplog): controller = Controller(cacheable_status_codes=[301]) request = Request(b"GET", b"https://example.com", headers=[]) response = Response(200, headers=[(b"Expires", b"some-date")]) - assert not controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert not controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + ( + "Considering the resource located at https://example.com/ " + "as not cachable since its status code (200) is not in the list of cacheable status codes." + ) + ] -def test_is_cachable_for_not_final(): +def test_is_cachable_for_not_final(caplog): controller = Controller(cacheable_status_codes=[100]) request = Request(b"GET", b"https://example.com", headers=[]) response = Response(100, headers=[(b"Expires", b"some-date")]) - assert not controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert not controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as " + "not cachable since its status code is informational." + ] -def test_is_cachable_for_no_store(): +def test_is_cachable_for_no_store(caplog): controller = Controller(allow_heuristics=True) request = Request(b"GET", b"https://example.com", headers=[]) response = Response(200, headers=[(b"Cache-Control", b"no-store")]) - assert not controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert not controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as not cachable" + " since the response contains the no-store directive." + ] def test_is_cachable_for_shared_cache(): @@ -178,14 +219,20 @@ def test_is_cachable_for_shared_cache(): assert not controller.is_cachable(request=request, response=response) -def test_is_cachable_for_private_cache(): +def test_is_cachable_for_private_cache(caplog): controller = Controller() request = Request(b"GET", b"https://example.com", headers=[]) response = Response(200, headers=[(b"Cache-Control", b"private")]) - assert controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as cachable since it" + " meets the criteria for being stored in the cache." + ] def test_get_freshness_lifetime(): @@ -243,6 +290,12 @@ def now(self) -> int: assert age == 86400 # One day +def test_get_age_return_inf_for_invalid_date(): + age = get_age(response=Response(status=200), clock=Clock()) + + assert age == float("inf") + + def test_allowed_stale_no_cache(): response = Response(status=200, headers=[(b"Cache-Control", b"no-cache")]) @@ -266,21 +319,29 @@ def test_clock(): assert Clock().now() > date_07_19_2023 -def test_permanent_redirect_cache(): +def test_permanent_redirect_cache(caplog): controller = Controller() request = Request(b"GET", b"https://example.com") response = Response(status=301) - assert controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + ( + "Considering the resource located at https://example.com/ " + "as cachable since its status code is a permanent redirect." + ) + ] response = Response(status=302) assert not controller.is_cachable(request=request, response=response) -def test_make_conditional_request_with_etag(): +def test_make_conditional_request_with_etag(caplog): controller = Controller() request = Request( @@ -293,15 +354,22 @@ def test_make_conditional_request_with_etag(): response = Response(status=200, headers=[(b"Etag", b"some-etag")]) - controller._make_request_conditional(request=request, response=response) + with caplog.at_level(logging.DEBUG): + controller._make_request_conditional(request=request, response=response) assert request.headers == [ (b"Content-Type", b"application/json"), (b"If-None-Match", b"some-etag"), ] + assert caplog.messages == [ + ( + "Adding the 'If-None-Match' header with the value of 'some-etag' " + "to the request for the resource located at https://example.com/." + ) + ] -def test_make_conditional_request_with_last_modified(): +def test_make_conditional_request_with_last_modified(caplog): controller = Controller() request = Request( @@ -314,22 +382,34 @@ def test_make_conditional_request_with_last_modified(): response = Response(status=200, headers=[(b"Last-Modified", b"Wed, 21 Oct 2015 07:28:00 GMT")]) - controller._make_request_conditional(request=request, response=response) + with caplog.at_level(logging.DEBUG): + controller._make_request_conditional(request=request, response=response) assert request.headers == [ (b"Content-Type", b"application/json"), (b"If-Modified-Since", b"Wed, 21 Oct 2015 07:28:00 GMT"), ] + assert caplog.messages == [ + "Adding the 'If-Modified-Since' header with the value of 'Wed, 21 Oct 2015 07:28:00 GMT' " + "to the request for the resource located at https://example.com/." + ] -def test_construct_response_from_cache_redirect(): +def test_construct_response_from_cache_redirect(caplog): controller = Controller() response = Response(status=301) original_request = Request("GET", "https://example.com") request = Request("GET", "https://example.com") - assert response is controller.construct_response_from_cache( - request=request, response=response, original_request=original_request - ) + + with caplog.at_level(logging.DEBUG): + assert response is controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as valid for cache use since its status code is a permanent redirect." + ] def test_construct_response_from_cache_fresh(): @@ -373,7 +453,79 @@ def now(self) -> int: assert isinstance(conditional_request, Request) -def test_construct_response_from_cache_with_no_cache(): +def test_construct_response_from_cache_with_always_revalidate(caplog): + controller = Controller(always_revalidate=True) + response = Response( + status=200, + headers=[ + (b"Cache-Control", b"max-age=1"), + (b"Date", b"Mon, 25 Aug 2015 12:00:00 GMT"), + ], + ) + original_request = Request("GET", "https://example.com") + request = Request("GET", "https://example.com") + + with caplog.at_level(logging.DEBUG): + conditional_request = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + assert isinstance(conditional_request, Request) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as needing revalidation since the cache is set to always revalidate." + ] + + +def test_construct_response_from_cache_with_must_revalidate(caplog): + controller = Controller() + response = Response( + status=200, + headers=[ + (b"Cache-Control", b"max-age=1, must-revalidate"), + (b"Date", b"Mon, 25 Aug 2015 12:00:00 GMT"), + ], + ) + original_request = Request("GET", "https://example.com") + request = Request("GET", "https://example.com") + + with caplog.at_level(logging.DEBUG): + conditional_request = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + assert isinstance(conditional_request, Request) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as needing revalidation since the response contains the must-revalidate directive." + ] + + +def test_construct_response_from_cache_with_request_no_cache(caplog): + controller = Controller(allow_stale=True) + response = Response( + status=200, + headers=[ + (b"Cache-Control", b"max-age=1"), + (b"Date", b"Mon, 25 Aug 2015 12:00:00 GMT"), + ], + ) + original_request = Request("GET", "https://example.com") + request = Request("GET", "https://example.com", headers=[(b"Cache-Control", b"no-cache")]) + + with caplog.at_level(logging.DEBUG): + conditional_request = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + assert isinstance(conditional_request, Request) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as needing revalidation since the request contains the no-cache directive." + ] + + +def test_construct_response_from_cache_with_no_cache(caplog): controller = Controller(allow_stale=True) response = Response( status=200, @@ -384,13 +536,20 @@ def test_construct_response_from_cache_with_no_cache(): ) original_request = Request("GET", "https://example.com") request = Request("GET", "https://example.com") - conditional_request = controller.construct_response_from_cache( - request=request, response=response, original_request=original_request - ) - assert isinstance(conditional_request, Request) + + with caplog.at_level(logging.DEBUG): + conditional_request = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + assert isinstance(conditional_request, Request) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as needing revalidation since the response contains the no-cache directive." + ] -def test_construct_response_heuristically(): +def test_construct_response_heuristically(caplog): class MockedClock(BaseClock): def now(self) -> int: return 1440590400 # Mon, 26 Aug 2015 12:00:00 GMT @@ -408,10 +567,17 @@ def now(self) -> int: original_request = Request("GET", "https://example.com") request = Request("GET", "https://example.com") - res = controller.construct_response_from_cache( - request=request, response=response, original_request=original_request - ) - + with caplog.at_level(logging.DEBUG): + res = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + assert caplog.messages == [ + "Could not determine the freshness lifetime of the resource located at " + "https://example.com/, trying to use heuristics to calculate it.", + "Successfully calculated the freshness lifetime of the resource " + "located at https://example.com/ using heuristics.", + "Considering the resource located at https://example.com/ as valid for cache use since it is fresh.", + ] assert isinstance(res, Response) # Age more than 7 days @@ -423,9 +589,20 @@ def now(self) -> int: ], ) - res = controller.construct_response_from_cache( - request=request, response=response, original_request=original_request - ) + caplog.clear() + with caplog.at_level(logging.DEBUG): + res = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) + assert caplog.messages == [ + "Could not determine the freshness lifetime of the resource located at " + "https://example.com/, trying to use heuristics to calculate it.", + "Successfully calculated the freshness lifetime of the resource" + " located at https://example.com/ using heuristics.", + "Considering the resource located at https://example.com/ as needing revalidation since it is not fresh.", + "Adding the 'If-Modified-Since' header with the value of 'Mon, 25 Aug 2003 12:00:00 GMT'" + " to the request for the resource located at https://example.com/.", + ] assert not isinstance(res, Response) @@ -498,6 +675,48 @@ def test_vary_validation(): assert not controller._validate_vary(request=request, response=response, original_request=original_request) +def test_construct_response_from_cache_with_vary_mismatch(caplog): + original_request = Request( + method="GET", + url="https://example.com", + headers=[ + (b"Content-Type", b"application/json"), + (b"Content-Language", b"en-US"), + ], + ) + + request = Request( + method="GET", + url="https://example.com", + headers=[ + (b"Content-Type", b"application/xml"), + (b"Content-Language", b"en-US"), + ], + ) + + response = Response( + status=200, + headers=[ + (b"Content-Type", b"application/json"), + (b"Content-Language", b"en-US"), + (b"Vary", b"Content-Type, Content-Language"), + ], + ) + + controller = Controller() + + with caplog.at_level(logging.DEBUG): + cached_response = controller.construct_response_from_cache( + original_request=original_request, request=request, response=response + ) + + assert cached_response is None + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as invalid for cache use since the vary headers do not match." + ] + + def test_vary_validation_value_mismatch(): original_request = Request( method="GET", @@ -564,7 +783,7 @@ def test_vary_validation_value_wildcard(): assert not controller._validate_vary(request=request, response=response, original_request=original_request) -def test_max_age_request_directive(): +def test_max_age_request_directive(caplog): class MockedClock(BaseClock): def now(self) -> int: return 1440507600 # Mon, 25 Aug 2015 13:00:00 GMT @@ -600,13 +819,20 @@ def now(self) -> int: controller = Controller(clock=MockedClock()) - cached_response = controller.construct_response_from_cache( - original_request=original_request, request=request, response=response - ) + with caplog.at_level(logging.DEBUG): + cached_response = controller.construct_response_from_cache( + original_request=original_request, request=request, response=response + ) assert cached_response is None + assert caplog.messages == [ + ( + "Considering the resource located at https://example.com/ " + "as invalid for cache use since the age of the response exceeds the max-age directive." + ) + ] -def test_max_age_request_directive_without_max_stale(): +def test_max_age_request_directive_with_max_stale(caplog): class MockedClock(BaseClock): def now(self) -> int: return 1440507600 # Mon, 25 Aug 2015 13:00:00 GMT @@ -626,7 +852,7 @@ def now(self) -> int: headers=[ (b"Content-Type", b"application/json"), (b"Content-Language", b"en-US"), - (b"Cache-Control", "max-age=3600"), + (b"Cache-Control", "max-age=3600, max-stale=10000"), ], ) @@ -642,13 +868,19 @@ def now(self) -> int: controller = Controller(clock=MockedClock()) - cached_response = controller.construct_response_from_cache( - original_request=original_request, request=request, response=response - ) - assert cached_response is None + with caplog.at_level(logging.DEBUG): + cached_response = controller.construct_response_from_cache( + original_request=original_request, request=request, response=response + ) + + assert isinstance(cached_response, Response) + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as valid for " + "cache use since the freshness lifetime has been exceeded less than max-stale." + ] -def test_max_stale_request_directive(): +def test_max_stale_request_directive(caplog): class MockedClock(BaseClock): def now(self) -> int: return 1440507600 # Mon, 25 Aug 2015 13:00:00 GMT @@ -684,13 +916,18 @@ def now(self) -> int: controller = Controller(clock=MockedClock()) - cached_response = controller.construct_response_from_cache( - original_request=original_request, request=request, response=response - ) + with caplog.at_level(logging.DEBUG): + cached_response = controller.construct_response_from_cache( + original_request=original_request, request=request, response=response + ) assert cached_response is None + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as invalid for" + " cache use since the freshness lifetime has been exceeded more than max-stale." + ] -def test_min_fresh_request_directive(): +def test_min_fresh_request_directive(caplog): class MockedClock(BaseClock): def now(self) -> int: return 1440507600 # Mon, 25 Aug 2015 13:00:00 GMT @@ -726,10 +963,15 @@ def now(self) -> int: controller = Controller(clock=MockedClock()) - cached_response = controller.construct_response_from_cache( - original_request=original_request, request=request, response=response - ) + with caplog.at_level(logging.DEBUG): + cached_response = controller.construct_response_from_cache( + original_request=original_request, request=request, response=response + ) assert cached_response is None + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as invalid for cache" + " use since the time left for freshness is less than the min-fresh directive." + ] def test_no_cache_request_directive(): @@ -821,7 +1063,7 @@ def test_no_store_response_directive(): assert not controller.is_cachable(request=request, response=response) -def test_must_understand_response_directive(): +def test_must_understand_response_directive(caplog): request = Request( method="GET", url="https://example.com", @@ -843,24 +1085,42 @@ def test_must_understand_response_directive(): controller = Controller() - assert controller.is_cachable(request=request, response=response) + with caplog.at_level(logging.DEBUG): + assert controller.is_cachable(request=request, response=response) + + assert caplog.messages == [ + "Skipping the no-store directive for the resource located at https://example.com/" + " since the response contains the must-understand directive.", + "Considering the resource located at https://example.com/ as cachable " + "since it meets the criteria for being stored in the cache.", + ] -def test_freshness_lifetime_invalid_information(): +def test_freshness_lifetime_invalid_information(caplog): controller = Controller() response = Response( status=400, ) original_request = Request("GET", "https://example.com") request = Request("GET", "https://example.com") - conditional_request = controller.construct_response_from_cache( - request=request, response=response, original_request=original_request - ) + + with caplog.at_level(logging.DEBUG): + conditional_request = controller.construct_response_from_cache( + request=request, response=response, original_request=original_request + ) assert isinstance(conditional_request, Request) + assert caplog.messages == [ + "Could not determine the freshness lifetime of the resource located at https://example.com/, " + "trying to use heuristics to calculate it.", + ( + "Could not calculate the freshness lifetime of the resource located at https://example.com/. " + "Making a conditional request to revalidate the response." + ), + ] -def test_force_cache_extension_for_is_cachable(): - controller = Controller() +def test_force_cache_extension_for_is_cachable(caplog): + controller = Controller(cacheable_status_codes=[400]) request = Request("GET", "https://example.com") uncachable_response = Response(status=400) @@ -868,10 +1128,16 @@ def test_force_cache_extension_for_is_cachable(): request = Request("GET", "https://example.com", extensions={"force_cache": True}) - assert controller.is_cachable(request=request, response=uncachable_response) is True + with caplog.at_level(logging.DEBUG): + assert controller.is_cachable(request=request, response=uncachable_response) is True + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as" + " cachable since the request is forced to use the cache." + ] -def test_force_cache_extension_for_construct_response_from_cache(): +def test_force_cache_extension_for_construct_response_from_cache(caplog): class MockedClock(BaseClock): def now(self) -> int: return 1440504001 # Mon, 25 Aug 2015 12:00:01 GMT @@ -887,22 +1153,33 @@ def now(self) -> int: ], ) - assert isinstance( - controller.construct_response_from_cache( - request=request, - response=cachable_response, - original_request=original_request, - ), - Request, - ) + with caplog.at_level(logging.DEBUG): + assert isinstance( + controller.construct_response_from_cache( + request=request, + response=cachable_response, + original_request=original_request, + ), + Request, + ) + assert caplog.messages == [ + "Considering the resource located at https://example.com/ as needing revalidation since it is not fresh." + ] request = Request("Get", "https://example.com", extensions={"force_cache": True}) - assert isinstance( - controller.construct_response_from_cache( - request=request, - response=cachable_response, - original_request=original_request, - ), - Response, - ) + caplog.clear() + with caplog.at_level(logging.DEBUG): + assert isinstance( + controller.construct_response_from_cache( + request=request, + response=cachable_response, + original_request=original_request, + ), + Response, + ) + + assert caplog.messages == [ + "Considering the resource located at https://example.com/ " + "as valid for cache use since the request is forced to use the cache." + ] diff --git a/tests/test_utils.py b/tests/test_utils.py index 166dbec1..5839be18 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,3 +1,5 @@ +import httpcore +import pytest from httpcore import Request from hishel._controller import get_updated_headers @@ -6,6 +8,7 @@ extract_header_values_decoded, float_seconds_to_int_milliseconds, generate_key, + get_safe_url, header_presents, parse_date, ) @@ -90,3 +93,34 @@ def test_float_seconds_to_milliseconds(): seconds = 1.234 milliseconds = float_seconds_to_int_milliseconds(seconds) assert milliseconds == 1234 + + +@pytest.mark.parametrize( + "url, expected", + [ + pytest.param( + "https://example.com/path?query=1", + "https://example.com/path", + id="url_with_query_is_ignored", + ), + pytest.param( + "https://example.com/path", + "https://example.com/path", + id="url_without_query", + ), + pytest.param("https://example.com", "https://example.com/", id="url_without_path"), + pytest.param( + "https://xn--e1afmkfd.xn--p1ag", + "https://пример.ру/", + id="url_with_idna", + ), + ], +) +def test_safe_url( + url: str, + expected: str, +) -> None: + httpcore_url = httpcore.URL(url) + safe_url = get_safe_url(httpcore_url) + + assert safe_url == expected