Skip to content

Commit

Permalink
Merge branch 'master' into unsafe_methods
Browse files Browse the repository at this point in the history
  • Loading branch information
karpetrosyan authored Jan 27, 2024
2 parents dc02a05 + f8bbf7a commit 7090965
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 9 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## Unreleased

- Added option to Controller to permit caching unsafe HTTP methods.
- Added option to Controller to permit caching unsafe HTTP methods. (#159)
- Make `FileStorage` to check staleness of all cache files with set interval. (#169)
- Support AWS S3 storages. (#164)
- Move `typing_extensions` from requirements.txt to pyproject.toml. (#161)

Expand Down
12 changes: 12 additions & 0 deletions docs/advanced/storages.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,18 @@ storage = hishel.FileStorage(ttl=3600)
If you do this, `Hishel` will delete any stored responses whose ttl has expired.
In this example, the stored responses were limited to 1 hour.

#### Check ttl every

In order to avoid excessive memory utilization, `Hishel` must periodically clean the old responses, or responses that are not being used and should be deleted from the cache.
It clears the cache by default every minute, but you may change the interval directly with the `check_ttl_every` argument.

Example:

```python
import hishel

storage = hishel.FileStorage(check_ttl_every=600) # check every 600s (10m)
```

### :material-memory: In-memory storage

Expand Down
20 changes: 17 additions & 3 deletions hishel/_async/_storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,17 @@ class AsyncFileStorage(AsyncBaseStorage):
:type base_path: tp.Optional[Path], optional
:param ttl: Specifies the maximum number of seconds that the response can be cached, defaults to None
:type ttl: tp.Optional[tp.Union[int, float]], optional
:param check_ttl_every: How often in seconds to check staleness of **all** cache files.
Makes sense only with set `ttl`, defaults to 60
:type check_ttl_every: tp.Union[int, float]
"""

def __init__(
self,
serializer: tp.Optional[BaseSerializer] = None,
base_path: tp.Optional[Path] = None,
ttl: tp.Optional[tp.Union[int, float]] = None,
check_ttl_every: tp.Union[int, float] = 60,
) -> None:
super().__init__(serializer, ttl)

Expand All @@ -84,6 +88,8 @@ def __init__(

self._file_manager = AsyncFileManager(is_binary=self._serializer.is_binary)
self._lock = AsyncLock()
self._check_ttl_every = check_ttl_every
self._last_cleaned = time.monotonic()

async def store(self, key: str, response: Response, request: Request, metadata: Metadata) -> None:
"""
Expand All @@ -105,7 +111,7 @@ async def store(self, key: str, response: Response, request: Request, metadata:
str(response_path),
self._serializer.dumps(response=response, request=request, metadata=metadata),
)
await self._remove_expired_caches()
await self._remove_expired_caches(response_path)

async def retrieve(self, key: str) -> tp.Optional[StoredResponse]:
"""
Expand All @@ -119,7 +125,7 @@ async def retrieve(self, key: str) -> tp.Optional[StoredResponse]:

response_path = self._base_path / key

await self._remove_expired_caches()
await self._remove_expired_caches(response_path)
async with self._lock:
if response_path.exists():
return self._serializer.loads(await self._file_manager.read_from(str(response_path)))
Expand All @@ -128,10 +134,18 @@ async def retrieve(self, key: str) -> tp.Optional[StoredResponse]:
async def aclose(self) -> None: # pragma: no cover
return

async def _remove_expired_caches(self) -> None:
async def _remove_expired_caches(self, response_path: Path) -> None:
if self._ttl is None:
return

if time.monotonic() - self._last_cleaned < self._check_ttl_every:
if response_path.is_file():
age = time.time() - response_path.stat().st_mtime
if age > self._ttl:
response_path.unlink()
return

self._last_cleaned = time.monotonic()
async with self._lock:
for file in self._base_path.iterdir():
if file.is_file():
Expand Down
20 changes: 17 additions & 3 deletions hishel/_sync/_storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,17 @@ class FileStorage(BaseStorage):
:type base_path: tp.Optional[Path], optional
:param ttl: Specifies the maximum number of seconds that the response can be cached, defaults to None
:type ttl: tp.Optional[tp.Union[int, float]], optional
:param check_ttl_every: How often in seconds to check staleness of **all** cache files.
Makes sense only with set `ttl`, defaults to 60
:type check_ttl_every: tp.Union[int, float]
"""

def __init__(
self,
serializer: tp.Optional[BaseSerializer] = None,
base_path: tp.Optional[Path] = None,
ttl: tp.Optional[tp.Union[int, float]] = None,
check_ttl_every: tp.Union[int, float] = 60,
) -> None:
super().__init__(serializer, ttl)

Expand All @@ -84,6 +88,8 @@ def __init__(

self._file_manager = FileManager(is_binary=self._serializer.is_binary)
self._lock = Lock()
self._check_ttl_every = check_ttl_every
self._last_cleaned = time.monotonic()

def store(self, key: str, response: Response, request: Request, metadata: Metadata) -> None:
"""
Expand All @@ -105,7 +111,7 @@ def store(self, key: str, response: Response, request: Request, metadata: Metada
str(response_path),
self._serializer.dumps(response=response, request=request, metadata=metadata),
)
self._remove_expired_caches()
self._remove_expired_caches(response_path)

def retrieve(self, key: str) -> tp.Optional[StoredResponse]:
"""
Expand All @@ -119,7 +125,7 @@ def retrieve(self, key: str) -> tp.Optional[StoredResponse]:

response_path = self._base_path / key

self._remove_expired_caches()
self._remove_expired_caches(response_path)
with self._lock:
if response_path.exists():
return self._serializer.loads(self._file_manager.read_from(str(response_path)))
Expand All @@ -128,10 +134,18 @@ def retrieve(self, key: str) -> tp.Optional[StoredResponse]:
def close(self) -> None: # pragma: no cover
return

def _remove_expired_caches(self) -> None:
def _remove_expired_caches(self, response_path: Path) -> None:
if self._ttl is None:
return

if time.monotonic() - self._last_cleaned < self._check_ttl_every:
if response_path.is_file():
age = time.time() - response_path.stat().st_mtime
if age > self._ttl:
response_path.unlink()
return

self._last_cleaned = time.monotonic()
with self._lock:
for file in self._base_path.iterdir():
if file.is_file():
Expand Down
28 changes: 27 additions & 1 deletion tests/_async/test_storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ async def test_inmemorystorage():

@pytest.mark.asyncio
async def test_filestorage_expired(use_temp_dir):
storage = AsyncFileStorage(ttl=0.1)
storage = AsyncFileStorage(ttl=0.2, check_ttl_every=0.1)
first_request = Request(b"GET", "https://example.com")
second_request = Request(b"GET", "https://anotherexample.com")

Expand All @@ -136,6 +136,32 @@ async def test_filestorage_expired(use_temp_dir):
assert await storage.retrieve(first_key) is None


@pytest.mark.asyncio
async def test_filestorage_timer(use_temp_dir):
storage = AsyncFileStorage(ttl=0.2, check_ttl_every=0.2)

first_request = Request(b"GET", "https://example.com")
second_request = Request(b"GET", "https://anotherexample.com")

first_key = generate_key(first_request)
second_key = generate_key(second_request)

response = Response(200, headers=[], content=b"test")
await response.aread()

await storage.store(first_key, response=response, request=first_request, metadata=dummy_metadata)
assert await storage.retrieve(first_key) is not None
await asleep(0.1)
assert await storage.retrieve(first_key) is not None
await storage.store(second_key, response=response, request=second_request, metadata=dummy_metadata)
assert await storage.retrieve(second_key) is not None
await asleep(0.1)
assert await storage.retrieve(first_key) is None
assert await storage.retrieve(second_key) is not None
await asleep(0.1)
assert await storage.retrieve(second_key) is None


@pytest.mark.asyncio
async def test_redisstorage_expired():
if await is_redis_down(): # pragma: no cover
Expand Down
28 changes: 27 additions & 1 deletion tests/_sync/test_storages.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def test_inmemorystorage():


def test_filestorage_expired(use_temp_dir):
storage = FileStorage(ttl=0.1)
storage = FileStorage(ttl=0.2, check_ttl_every=0.1)
first_request = Request(b"GET", "https://example.com")
second_request = Request(b"GET", "https://anotherexample.com")

Expand All @@ -137,6 +137,32 @@ def test_filestorage_expired(use_temp_dir):



def test_filestorage_timer(use_temp_dir):
storage = FileStorage(ttl=0.2, check_ttl_every=0.2)

first_request = Request(b"GET", "https://example.com")
second_request = Request(b"GET", "https://anotherexample.com")

first_key = generate_key(first_request)
second_key = generate_key(second_request)

response = Response(200, headers=[], content=b"test")
response.read()

storage.store(first_key, response=response, request=first_request, metadata=dummy_metadata)
assert storage.retrieve(first_key) is not None
sleep(0.1)
assert storage.retrieve(first_key) is not None
storage.store(second_key, response=response, request=second_request, metadata=dummy_metadata)
assert storage.retrieve(second_key) is not None
sleep(0.1)
assert storage.retrieve(first_key) is None
assert storage.retrieve(second_key) is not None
sleep(0.1)
assert storage.retrieve(second_key) is None



def test_redisstorage_expired():
if is_redis_down(): # pragma: no cover
pytest.fail("Redis server was not found")
Expand Down

0 comments on commit 7090965

Please sign in to comment.