From 736ef8aec17b7cff125001853357221c67b5bc15 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 27 Sep 2024 16:13:47 +0200 Subject: [PATCH 01/24] add path attribute to stores; migrate localstore to the new protocol --- src/zarr/abc/store.py | 24 +++++++++++-- src/zarr/store/local.py | 56 ++++++++++++++----------------- src/zarr/store/memory.py | 7 ++-- src/zarr/testing/store.py | 5 ++- tests/v3/test_store/test_local.py | 15 +++++---- 5 files changed, 65 insertions(+), 42 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 5f50360554..0dab6fc26d 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -42,10 +42,21 @@ def from_literal(cls, mode: AccessModeLiteral) -> Self: class Store(ABC): _mode: AccessMode _is_open: bool + path: str - def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> None: - self._is_open = False - self._mode = AccessMode.from_literal(mode) + # TODO: Make store immutable + # def __setattr__(self, *args: Any, **kwargs: Any) -> None: + # msg = ( + # 'Stores are immutable. To modify a Store object, create a new one with the desired' + # 'attributes') + # raise NotImplementedError(msg) + + def __init__( + self, path: str = "", mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any + ) -> None: + object.__setattr__(self, "_is_open", False) + object.__setattr__(self, "_mode", AccessMode.from_literal(mode)) + object.__setattr__(self, "path", path) @classmethod async def open(cls, *args: Any, **kwargs: Any) -> Self: @@ -279,6 +290,13 @@ async def _get_many( for req in requests: yield (req[0], await self.get(*req)) + def with_path(self, path: str) -> Self: + """ + Return a copy of this store with a new path attribute + """ + # TODO: implement this + return self + @runtime_checkable class ByteGetter(Protocol): diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index f1bce769d2..131bcfd044 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -18,7 +18,7 @@ def _get( - path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None + path: str, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None ) -> Buffer: """ Fetch a contiguous region of bytes from a file. @@ -33,6 +33,7 @@ def _get( and the second value specifies the total number of bytes to read. If the total value is `None`, then the entire file after the first byte will be read. """ + target = Path(path) if byte_range is not None: if byte_range[0] is None: start = 0 @@ -41,8 +42,8 @@ def _get( end = (start + byte_range[1]) if byte_range[1] is not None else None else: - return prototype.buffer.from_bytes(path.read_bytes()) - with path.open("rb") as f: + return prototype.buffer.from_bytes(target.read_bytes()) + with target.open("rb") as f: size = f.seek(0, io.SEEK_END) if start is not None: if start >= 0: @@ -77,23 +78,17 @@ class LocalStore(Store): supports_partial_writes: bool = True supports_listing: bool = True - root: Path - - def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None: - super().__init__(mode=mode) - if isinstance(root, str): - root = Path(root) - assert isinstance(root, Path) - self.root = root + def __init__(self, path: Path | str, *, mode: AccessModeLiteral = "r") -> None: + super().__init__(mode=mode, path=str(path)) async def clear(self) -> None: self._check_writable() - shutil.rmtree(self.root) - self.root.mkdir() + shutil.rmtree(self.path) + os.mkdir(self.path) async def empty(self) -> bool: try: - with os.scandir(self.root) as it: + with os.scandir(self.path) as it: for entry in it: if entry.is_file(): # stop once a file is found @@ -104,13 +99,13 @@ async def empty(self) -> bool: return True def __str__(self) -> str: - return f"file://{self.root}" + return f"file://{self.path}" def __repr__(self) -> str: return f"LocalStore({str(self)!r})" def __eq__(self, other: object) -> bool: - return isinstance(other, type(self)) and self.root == other.root + return isinstance(other, type(self)) and self.path == other.path async def get( self, @@ -121,7 +116,7 @@ async def get( if not self._is_open: await self._open() assert isinstance(key, str) - path = self.root / key + path = os.path.join(self.path, key) try: return await to_thread(_get, path, prototype, byte_range) @@ -147,7 +142,7 @@ async def get_partial_values( args = [] for key, byte_range in key_ranges: assert isinstance(key, str) - path = self.root / key + path = os.path.join(self.path, key) args.append((_get, path, prototype, byte_range)) return await concurrent_map(args, to_thread, limit=None) # TODO: fix limit @@ -158,7 +153,7 @@ async def set(self, key: str, value: Buffer) -> None: assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError("LocalStore.set(): `value` must a Buffer instance") - path = self.root / key + path = Path(self.path) / key await to_thread(_put, path, value) async def set_partial_values( @@ -168,20 +163,20 @@ async def set_partial_values( args = [] for key, start, value in key_start_values: assert isinstance(key, str) - path = self.root / key + path = os.path.join(self.path, key) args.append((_put, path, value, start)) await concurrent_map(args, to_thread, limit=None) # TODO: fix limit async def delete(self, key: str) -> None: self._check_writable() - path = self.root / key + path = Path(self.path) / key if path.is_dir(): # TODO: support deleting directories? shutil.rmtree? shutil.rmtree(path) else: await to_thread(path.unlink, True) # Q: we may want to raise if path is missing async def exists(self, key: str) -> bool: - path = self.root / key + path = Path(self.path) / key return await to_thread(path.is_file) async def list(self) -> AsyncGenerator[str, None]: @@ -191,10 +186,11 @@ async def list(self) -> AsyncGenerator[str, None]: ------- AsyncGenerator[str, None] """ - to_strip = str(self.root) + "/" - for p in list(self.root.rglob("*")): + # TODO: just invoke list_prefix with the prefix "/" + to_strip = self.path + "/" + for p in Path(self.path).rglob("*"): if p.is_file(): - yield str(p).replace(to_strip, "") + yield str(p.relative_to(to_strip)) async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """ @@ -209,8 +205,8 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: ------- AsyncGenerator[str, None] """ - to_strip = os.path.join(str(self.root / prefix)) - for p in (self.root / prefix).rglob("*"): + to_strip = os.path.join(self.path, prefix) + for p in (Path(self.path) / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) @@ -228,12 +224,12 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: AsyncGenerator[str, None] """ - base = self.root / prefix + base = os.path.join(self.path, prefix) to_strip = str(base) + "/" try: - key_iter = base.iterdir() + key_iter = Path(base).iterdir() for key in key_iter: - yield str(key).replace(to_strip, "") + yield str(key.relative_to(to_strip)) except (FileNotFoundError, NotADirectoryError): pass diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index ee4107b0ab..6b4c06d9e1 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -26,14 +26,17 @@ class MemoryStore(Store): def __init__( self, + path: str = "", store_dict: MutableMapping[str, Buffer] | None = None, *, mode: AccessModeLiteral = "r", ) -> None: - super().__init__(mode=mode) + super().__init__(mode=mode, path=path) + if store_dict is None: store_dict = {} - self._store_dict = store_dict + + object.__setattr__(self, "_store_dict", store_dict) async def empty(self) -> bool: return not self._store_dict diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 5c75007347..0f856c8a9f 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -39,7 +39,7 @@ def get(self, store: S, key: str) -> Buffer: @pytest.fixture def store_kwargs(self) -> dict[str, Any]: - return {"mode": "r+"} + return {"mode": "r+", "path": ""} @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> Store: @@ -62,6 +62,9 @@ def test_serializable_store(self, store: S) -> None: foo = pickle.dumps(store) assert pickle.loads(foo) == store + def test_store_path(self, store: S, store_kwargs: dict[str, Any]) -> None: + assert store.path == store_kwargs["path"] + def test_store_mode(self, store: S, store_kwargs: dict[str, Any]) -> None: assert store.mode == AccessMode.from_literal("r+") assert not store.mode.readonly diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index bdd909c285..8148241dbc 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -1,5 +1,7 @@ from __future__ import annotations +from pathlib import Path + import pytest from zarr.core.buffer import Buffer, cpu @@ -12,20 +14,21 @@ class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]): buffer_cls = cpu.Buffer def get(self, store: LocalStore, key: str) -> Buffer: - return self.buffer_cls.from_bytes((store.root / key).read_bytes()) + return self.buffer_cls.from_bytes((Path(store.path) / key).read_bytes()) def set(self, store: LocalStore, key: str, value: Buffer) -> None: - parent = (store.root / key).parent + target = Path(store.path) / key + parent = target.parent if not parent.exists(): parent.mkdir(parents=True) - (store.root / key).write_bytes(value.to_bytes()) + target.write_bytes(value.to_bytes()) @pytest.fixture def store_kwargs(self, tmpdir) -> dict[str, str]: - return {"root": str(tmpdir), "mode": "r+"} + return {"path": str(tmpdir), "mode": "r+"} def test_store_repr(self, store: LocalStore) -> None: - assert str(store) == f"file://{store.root!s}" + assert str(store) == f"file://{store.path!s}" def test_store_supports_writes(self, store: LocalStore) -> None: assert store.supports_writes @@ -38,5 +41,5 @@ def test_store_supports_listing(self, store: LocalStore) -> None: async def test_empty_with_empty_subdir(self, store: LocalStore) -> None: assert await store.empty() - (store.root / "foo/bar").mkdir(parents=True) + (Path(store.path) / "foo/bar").mkdir(parents=True) assert await store.empty() From 8189cc77521c0d4bf1f8184111ae366301bc525f Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sat, 28 Sep 2024 17:02:37 -0400 Subject: [PATCH 02/24] add path kwarg to memory test fixture --- tests/v3/test_store/test_memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 4413047178..cde1aa9518 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -22,7 +22,7 @@ def get(self, store: MemoryStore, key: str) -> Buffer: def store_kwargs( self, request: pytest.FixtureRequest ) -> dict[str, str | None | dict[str, Buffer]]: - kwargs = {"store_dict": None, "mode": "r+"} + kwargs = {"store_dict": None, "mode": "r+", "path": ""} if request.param is True: kwargs["store_dict"] = {} return kwargs From fa13860b70f9030b951af90a2256d0a8dd28a102 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 29 Sep 2024 22:18:36 -0400 Subject: [PATCH 03/24] add path to stores --- src/zarr/abc/store.py | 34 ++++++++++++--- src/zarr/store/common.py | 4 +- src/zarr/store/local.py | 5 ++- src/zarr/store/logging.py | 3 +- src/zarr/store/memory.py | 35 +++++++-------- src/zarr/store/remote.py | 3 +- src/zarr/store/zip.py | 70 +++++++++++++++++------------- src/zarr/testing/store.py | 2 +- tests/v3/conftest.py | 2 +- tests/v3/test_array.py | 5 ++- tests/v3/test_store/test_core.py | 8 ++-- tests/v3/test_store/test_memory.py | 4 +- tests/v3/test_store/test_zip.py | 6 +-- 13 files changed, 109 insertions(+), 72 deletions(-) diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 8c929df488..a2759143de 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -51,12 +51,17 @@ class Store(ABC): # 'attributes') # raise NotImplementedError(msg) - def __init__( - self, path: str = "", mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any - ) -> None: + def __init__(self, path: str = "", mode: AccessModeLiteral = "r") -> None: object.__setattr__(self, "_is_open", False) object.__setattr__(self, "_mode", AccessMode.from_literal(mode)) - object.__setattr__(self, "path", path) + object.__setattr__(self, "path", validate_path(path)) + + def resolve_key(self, key: str) -> str: + key = parse_path(key) + if self.path == "": + return key + else: + return f"{self.path}/{key}" @classmethod async def open(cls, *args: Any, **kwargs: Any) -> Self: @@ -335,8 +340,8 @@ def with_path(self, path: str) -> Self: """ Return a copy of this store with a new path attribute """ - # TODO: implement this - return self + # TODO: implement me + raise NotImplementedError @runtime_checkable @@ -364,3 +369,20 @@ async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None: await byte_setter.delete() else: await byte_setter.set(value) + + +def validate_path(path: str) -> str: + """ + Ensure that the input string is a valid relative path in the abstract zarr object storage scheme. + """ + if path.endswith("/"): + raise ValueError(f"Invalid path: {path} ends with '/'.") + if "//" in path: + raise ValueError(f"Invalid path: {path} contains '//'.") + if "\\" in path: + raise ValueError(f"Invalid path: {path} contains '\"'.") + return path + + +def parse_path(path: str) -> str: + return path.rstrip("/").lstrip("/") diff --git a/src/zarr/store/common.py b/src/zarr/store/common.py index 2d9b1e82c2..7f4e05cdf8 100644 --- a/src/zarr/store/common.py +++ b/src/zarr/store/common.py @@ -101,7 +101,7 @@ async def make_store_path( mode = "w" # exception to the default mode = 'r' result = StorePath(await MemoryStore.open(mode=mode)) elif isinstance(store_like, Path): - result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r")) + result = StorePath(await LocalStore.open(path=store_like, mode=mode or "r")) elif isinstance(store_like, str): storage_options = storage_options or {} @@ -111,7 +111,7 @@ async def make_store_path( RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r") ) else: - result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r")) + result = StorePath(await LocalStore.open(path=store_like, mode=mode or "r")) elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. # By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate. diff --git a/src/zarr/store/local.py b/src/zarr/store/local.py index f1614958e0..dda5613ee7 100644 --- a/src/zarr/store/local.py +++ b/src/zarr/store/local.py @@ -4,7 +4,7 @@ import os import shutil from pathlib import Path -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer @@ -12,6 +12,7 @@ if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable + from typing import Self from zarr.core.buffer import BufferPrototype from zarr.core.common import AccessModeLiteral @@ -125,7 +126,7 @@ async def get( ) -> Buffer | None: if not self._is_open: await self._open() - assert isinstance(key, str) + path = os.path.join(self.path, key) try: diff --git a/src/zarr/store/logging.py b/src/zarr/store/logging.py index a9113aabe4..670f0a01c4 100644 --- a/src/zarr/store/logging.py +++ b/src/zarr/store/logging.py @@ -5,13 +5,14 @@ import time from collections import defaultdict from contextlib import contextmanager -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from zarr.abc.store import AccessMode, ByteRangeRequest, Store from zarr.core.buffer import Buffer if TYPE_CHECKING: from collections.abc import AsyncGenerator, Generator, Iterable + from typing import Self from zarr.core.buffer import Buffer, BufferPrototype from zarr.core.common import AccessModeLiteral diff --git a/src/zarr/store/memory.py b/src/zarr/store/memory.py index dc8acccf6d..00d5c507d0 100644 --- a/src/zarr/store/memory.py +++ b/src/zarr/store/memory.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, gpu @@ -9,6 +9,7 @@ if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable, MutableMapping + from typing import Self from zarr.core.buffer import BufferPrototype from zarr.core.common import AccessModeLiteral @@ -26,9 +27,9 @@ class MemoryStore(Store): def __init__( self, - path: str = "", store_dict: MutableMapping[str, Buffer] | None = None, *, + path: str = "", mode: AccessModeLiteral = "r", ) -> None: super().__init__(mode=mode, path=path) @@ -68,9 +69,9 @@ async def get( ) -> Buffer | None: if not self._is_open: await self._open() - assert isinstance(key, str) + try: - value = self._store_dict[key] + value = self._store_dict[self.resolve_key(key)] start, length = _normalize_interval_index(value, byte_range) return prototype.buffer.from_buffer(value[start : start + length]) except KeyError: @@ -88,7 +89,7 @@ async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: return await concurrent_map(key_ranges, _get, limit=None) async def exists(self, key: str) -> bool: - return key in self._store_dict + return self.resolve_key(key) in self._store_dict async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: self._check_writable() @@ -96,23 +97,23 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError(f"Expected Buffer. Got {type(value)}.") - + key_abs = self.resolve_key(key) if byte_range is not None: - buf = self._store_dict[key] + buf = self._store_dict[key_abs] buf[byte_range[0] : byte_range[1]] = value - self._store_dict[key] = buf + self._store_dict[key_abs] = buf else: - self._store_dict[key] = value + self._store_dict[key_abs] = value async def set_if_not_exists(self, key: str, default: Buffer) -> None: self._check_writable() await self._ensure_open() - self._store_dict.setdefault(key, default) + self._store_dict.setdefault(self.resolve_key(key), default) async def delete(self, key: str) -> None: self._check_writable() try: - del self._store_dict[key] + del self._store_dict[self.resolve_key(key)] except KeyError: pass # Q(JH): why not raise? @@ -120,13 +121,14 @@ async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, by raise NotImplementedError async def list(self) -> AsyncGenerator[str, None]: - for key in self._store_dict: - yield key + async for result in self.list_prefix(""): + yield result async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: + prefix_abs = self.resolve_key(prefix) for key in self._store_dict: - if key.startswith(prefix): - yield key.removeprefix(prefix) + if key.startswith(prefix_abs): + yield key.removeprefix(prefix_abs).lstrip("/") async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: """ @@ -141,8 +143,7 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: ------- AsyncGenerator[str, None] """ - if prefix.endswith("/"): - prefix = prefix[:-1] + prefix = self.resolve_key(prefix) if prefix == "": keys_unique = {k.split("/")[0] for k in self._store_dict} diff --git a/src/zarr/store/remote.py b/src/zarr/store/remote.py index 9ef40ae226..a1c9ca37b9 100644 --- a/src/zarr/store/remote.py +++ b/src/zarr/store/remote.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Self +from typing import TYPE_CHECKING, Any import fsspec @@ -10,6 +10,7 @@ if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable + from typing import Self from fsspec.asyn import AsyncFileSystem diff --git a/src/zarr/store/zip.py b/src/zarr/store/zip.py index 116d6de83a..89e360aea9 100644 --- a/src/zarr/store/zip.py +++ b/src/zarr/store/zip.py @@ -5,13 +5,14 @@ import time import zipfile from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal, Self +from typing import TYPE_CHECKING, Literal from zarr.abc.store import ByteRangeRequest, Store from zarr.core.buffer import Buffer, BufferPrototype if TYPE_CHECKING: from collections.abc import AsyncGenerator, Iterable + from typing import Self ZipStoreAccessModeLiteral = Literal["r", "w", "a"] @@ -42,7 +43,7 @@ class ZipStore(Store): supports_partial_writes: bool = False supports_listing: bool = True - path: Path + file_path: Path compression: int allowZip64: bool @@ -51,18 +52,18 @@ class ZipStore(Store): def __init__( self, - path: Path | str, + file_path: Path | str, *, + path: str = "", mode: ZipStoreAccessModeLiteral = "r", compression: int = zipfile.ZIP_STORED, allowZip64: bool = True, ) -> None: - super().__init__(mode=mode) + super().__init__(mode=mode, path=path) - if isinstance(path, str): - path = Path(path) - assert isinstance(path, Path) - self.path = path # root? + if isinstance(file_path, str): + file_path = Path(file_path) + self.file_path = file_path self._zmode = mode self.compression = compression @@ -75,7 +76,7 @@ def _sync_open(self) -> None: self._lock = threading.RLock() self._zf = zipfile.ZipFile( - self.path, + self.file_path, mode=self._zmode, compression=self.compression, allowZip64=self.allowZip64, @@ -86,11 +87,11 @@ def _sync_open(self) -> None: async def _open(self) -> None: self._sync_open() - def __getstate__(self) -> tuple[Path, ZipStoreAccessModeLiteral, int, bool]: - return self.path, self._zmode, self.compression, self.allowZip64 + def __getstate__(self) -> tuple[str, Path, ZipStoreAccessModeLiteral, int, bool]: + return self.path, self.file_path, self._zmode, self.compression, self.allowZip64 - def __setstate__(self, state: Any) -> None: - self.path, self._zmode, self.compression, self.allowZip64 = state + def __setstate__(self, state: tuple[str, Path, ZipStoreAccessModeLiteral, int, bool]) -> None: + self.path, self.file_path, self._zmode, self.compression, self.allowZip64 = state self._is_open = False self._sync_open() @@ -103,9 +104,9 @@ async def clear(self) -> None: with self._lock: self._check_writable() self._zf.close() - os.remove(self.path) + os.remove(self.file_path) self._zf = zipfile.ZipFile( - self.path, mode="w", compression=self.compression, allowZip64=self.allowZip64 + self.file_path, mode="w", compression=self.compression, allowZip64=self.allowZip64 ) async def empty(self) -> bool: @@ -116,13 +117,19 @@ def with_mode(self, mode: ZipStoreAccessModeLiteral) -> Self: # type: ignore[ov raise NotImplementedError("ZipStore cannot be reopened with a new mode.") def __str__(self) -> str: - return f"zip://{self.path}" + # lets try https://github.com/zarr-developers/zeps/pull/48/files + return f"file://{self.file_path}|zip://{self.path}" def __repr__(self) -> str: return f"ZipStore({str(self)!r})" def __eq__(self, other: object) -> bool: - return isinstance(other, type(self)) and self.path == other.path + return ( + isinstance(other, type(self)) + and self.file_path == other.file_path + and self.path == other.path + and self._zmode == other._zmode + ) def _get( self, @@ -131,7 +138,7 @@ def _get( byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: try: - with self._zf.open(key) as f: # will raise KeyError + with self._zf.open(self.resolve_key(key)) as f: # will raise KeyError if byte_range is None: return prototype.buffer.from_bytes(f.read()) start, length = byte_range @@ -156,7 +163,7 @@ async def get( assert isinstance(key, str) with self._lock: - return self._get(key, prototype=prototype, byte_range=byte_range) + return self._get(self.resolve_key(key), prototype=prototype, byte_range=byte_range) async def get_partial_values( self, @@ -166,11 +173,14 @@ async def get_partial_values( out = [] with self._lock: for key, byte_range in key_ranges: - out.append(self._get(key, prototype=prototype, byte_range=byte_range)) + out.append( + self._get(self.resolve_key(key), prototype=prototype, byte_range=byte_range) + ) return out def _set(self, key: str, value: Buffer) -> None: # generally, this should be called inside a lock + # assume that the key has already been made absolute keyinfo = zipfile.ZipInfo(filename=key, date_time=time.localtime(time.time())[:6]) keyinfo.compress_type = self.compression if keyinfo.filename[-1] == os.sep: @@ -186,17 +196,18 @@ async def set(self, key: str, value: Buffer) -> None: if not isinstance(value, Buffer): raise TypeError("ZipStore.set(): `value` must a Buffer instance") with self._lock: - self._set(key, value) + self._set(self.resolve_key(key), value) async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError async def set_if_not_exists(self, key: str, default: Buffer) -> None: + key_abs = self.resolve_key(key) self._check_writable() with self._lock: members = self._zf.namelist() - if key not in members: - self._set(key, default) + if key_abs not in members: + self._set(key_abs, default) async def delete(self, key: str) -> None: raise NotImplementedError @@ -204,7 +215,7 @@ async def delete(self, key: str) -> None: async def exists(self, key: str) -> bool: with self._lock: try: - self._zf.getinfo(key) + self._zf.getinfo(self.resolve_key(key)) except KeyError: return False else: @@ -213,7 +224,7 @@ async def exists(self, key: str) -> bool: async def list(self) -> AsyncGenerator[str, None]: with self._lock: for key in self._zf.namelist(): - yield key + yield key.lstrip("/") async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: """ @@ -233,12 +244,11 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: yield key.removeprefix(prefix) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: - if prefix.endswith("/"): - prefix = prefix[:-1] + prefix_abs = self.resolve_key(prefix) keys = self._zf.namelist() seen = set() - if prefix == "": + if prefix_abs == "": keys_unique = {k.split("/")[0] for k in keys} for key in keys_unique: if key not in seen: @@ -246,8 +256,8 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: yield key else: for key in keys: - if key.startswith(prefix + "/") and key != prefix: - k = key.removeprefix(prefix + "/").split("/")[0] + if key.startswith(prefix_abs + "/") and key != prefix_abs: + k = key.removeprefix(prefix_abs + "/").split("/")[0] if k not in seen: seen.add(k) yield k diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index 8ecbf846f3..d99cdd4420 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -322,7 +322,7 @@ async def test_set_if_not_exists(self, store: S) -> None: await store.set_if_not_exists("k", new) # no error result = await store.get(key, default_buffer_prototype()) - assert result == data_buf + assert result.to_bytes() == data_buf.to_bytes() # type: ignore[union-attr] await store.set_if_not_exists("k2", new) # no error diff --git a/tests/v3/conftest.py b/tests/v3/conftest.py index 87830f11f9..e7c9252838 100644 --- a/tests/v3/conftest.py +++ b/tests/v3/conftest.py @@ -34,7 +34,7 @@ async def parse_store( if store == "remote": return await RemoteStore.open(url=path, mode="w") if store == "zip": - return await ZipStore.open(path + "/zarr.zip", mode="w") + return await ZipStore.open(file_path=path + "/zarr.zip", mode="w") raise AssertionError diff --git a/tests/v3/test_array.py b/tests/v3/test_array.py index 5de4a4d126..6bdf346a29 100644 --- a/tests/v3/test_array.py +++ b/tests/v3/test_array.py @@ -13,7 +13,7 @@ from zarr.core.common import JSON, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import ceildiv -from zarr.core.sync import sync +from zarr.core.sync import _collect_aiterator, sync from zarr.errors import ContainsArrayError, ContainsGroupError from zarr.store import LocalStore, MemoryStore from zarr.store.common import StorePath @@ -246,7 +246,8 @@ async def test_array_v3_nan_fill_value(store: MemoryStore) -> None: assert np.isnan(arr.fill_value) assert arr.fill_value.dtype == arr.dtype # all fill value chunk is an empty chunk, and should not be written - assert len([a async for a in store.list_prefix("/")]) == 0 + contents = await _collect_aiterator(store.list_prefix("/")) + assert contents == ("zarr.json",) @pytest.mark.parametrize("store", ["local"], indirect=["store"]) diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index f401491127..da42954a7a 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -17,22 +17,22 @@ async def test_make_store_path(tmpdir: str) -> None: # str store_path = await make_store_path(str(tmpdir)) assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert Path(store_path.store.path) == Path(tmpdir) # Path store_path = await make_store_path(Path(tmpdir)) assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert Path(store_path.store.path) == Path(tmpdir) # Store store_path = await make_store_path(store_path.store) assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert Path(store_path.store.path) == Path(tmpdir) # StorePath store_path = await make_store_path(store_path) assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert Path(store_path.store.path) == Path(tmpdir) with pytest.raises(TypeError): await make_store_path(1) # type: ignore[arg-type] diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 66bc26decf..c5c9043e27 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -13,10 +13,10 @@ class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): buffer_cls = cpu.Buffer def set(self, store: MemoryStore, key: str, value: Buffer) -> None: - store._store_dict[key] = value + store._store_dict[store.resolve_key(key)] = value def get(self, store: MemoryStore, key: str) -> Buffer: - return store._store_dict[key] + return store._store_dict[store.resolve_key(key)] @pytest.fixture(params=[None, True]) def store_kwargs( diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index e99b921be5..f9b573c92b 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -27,13 +27,13 @@ def store_kwargs(self, request) -> dict[str, str | bool]: fd, temp_path = tempfile.mkstemp() os.close(fd) - return {"path": temp_path, "mode": "w"} + return {"file_path": temp_path, "mode": "w", "path": ""} def get(self, store: ZipStore, key: str) -> Buffer: return store._get(key, prototype=default_buffer_prototype()) def set(self, store: ZipStore, key: str, value: Buffer) -> None: - return store._set(key, value) + return store._set(store.resolve_key(key), value) def test_store_mode(self, store: ZipStore, store_kwargs: dict[str, Any]) -> None: assert store.mode == AccessMode.from_literal(store_kwargs["mode"]) @@ -54,7 +54,7 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> await store.set("foo", cpu.Buffer.from_bytes(b"bar")) def test_store_repr(self, store: ZipStore) -> None: - assert str(store) == f"zip://{store.path!s}" + assert str(store) == f"file://{store.file_path!s}|zip://{store.path}" def test_store_supports_writes(self, store: ZipStore) -> None: assert store.supports_writes From c6c88435e8967c34f2bba2c529053897cae51798 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Oct 2024 14:28:02 +0200 Subject: [PATCH 04/24] add _path attribute to localstore that caches a pathlib.Path version of the path attribute --- src/zarr/storage/local.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index 5a98e300ba..ae03a61225 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -20,7 +20,7 @@ def _get( - path: str, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None + path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None ) -> Buffer: """ Fetch a contiguous region of bytes from a file. @@ -35,7 +35,6 @@ def _get( and the second value specifies the total number of bytes to read. If the total value is `None`, then the entire file after the first byte will be read. """ - target = Path(path) if byte_range is not None: if byte_range[0] is None: start = 0 @@ -44,8 +43,8 @@ def _get( end = (start + byte_range[1]) if byte_range[1] is not None else None else: - return prototype.buffer.from_bytes(target.read_bytes()) - with target.open("rb") as f: + return prototype.buffer.from_bytes(path.read_bytes()) + with path.open("rb") as f: size = f.seek(0, io.SEEK_END) if start is not None: if start >= 0: @@ -105,13 +104,15 @@ class LocalStore(Store): supports_deletes: bool = True supports_partial_writes: bool = True supports_listing: bool = True + _path: Path def __init__(self, path: Path | str, *, mode: AccessModeLiteral = "r") -> None: super().__init__(mode=mode, path=str(path)) + self._path = Path(self.path) async def _open(self) -> None: if not self.mode.readonly: - Path(self.path).mkdir(parents=True, exist_ok=True) + self._path.mkdir(parents=True, exist_ok=True) return await super()._open() async def clear(self) -> None: @@ -156,7 +157,7 @@ async def get( if not self._is_open: await self._open() - path = os.path.join(self.path, key) + path = self._path / key try: return await asyncio.to_thread(_get, path, prototype, byte_range) @@ -172,7 +173,7 @@ async def get_partial_values( args = [] for key, byte_range in key_ranges: assert isinstance(key, str) - path = os.path.join(self.path, key) + path = self._path / key args.append((_get, path, prototype, byte_range)) return await concurrent_map(args, asyncio.to_thread, limit=None) # TODO: fix limit @@ -194,7 +195,7 @@ async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None: assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError("LocalStore.set(): `value` must a Buffer instance") - path = Path(self.path) / key + path = self._path / key await asyncio.to_thread(_put, path, value, start=None, exclusive=exclusive) async def set_partial_values( @@ -212,7 +213,7 @@ async def set_partial_values( async def delete(self, key: str) -> None: # docstring inherited self._check_writable() - path = Path(self.path) / key + path = self._path / key if path.is_dir(): # TODO: support deleting directories? shutil.rmtree? shutil.rmtree(path) else: @@ -220,7 +221,7 @@ async def delete(self, key: str) -> None: async def exists(self, key: str) -> bool: # docstring inherited - path = Path(self.path) / key + path = self._path / key return await asyncio.to_thread(path.is_file) async def list(self) -> AsyncGenerator[str, None]: @@ -232,21 +233,21 @@ async def list(self) -> AsyncGenerator[str, None]: """ # TODO: just invoke list_prefix with the prefix "/" to_strip = self.path + "/" - for p in Path(self.path).rglob("*"): + for p in self._path.rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited to_strip = os.path.join(self.path, prefix) - for p in (Path(self.path) / prefix).rglob("*"): + for p in (self._path / prefix).rglob("*"): if p.is_file(): yield str(p.relative_to(to_strip)) async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited base = os.path.join(self.path, prefix) - to_strip = str(base) + "/" + to_strip = base + "/" try: key_iter = Path(base).iterdir() From b5dd50c47340e2d7b329d6eee200ae5f477d1053 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Oct 2024 15:04:36 +0200 Subject: [PATCH 05/24] remove some old roots --- src/zarr/storage/common.py | 4 ++-- tests/v3/test_store/test_core.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index b640a7729b..711ee4cb36 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -231,7 +231,7 @@ async def make_store_path( result = StorePath(await MemoryStore.open(mode=mode or "w"), path=path_normalized) elif isinstance(store_like, Path): result = StorePath( - await LocalStore.open(root=store_like, mode=mode or "r"), path=path_normalized + await LocalStore.open(path=store_like, mode=mode or "r"), path=path_normalized ) elif isinstance(store_like, str): storage_options = storage_options or {} @@ -244,7 +244,7 @@ async def make_store_path( ) else: result = StorePath( - await LocalStore.open(root=Path(store_like), mode=mode or "r"), path=path_normalized + await LocalStore.open(path=Path(store_like), mode=mode or "r"), path=path_normalized ) elif isinstance(store_like, dict): # We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings. diff --git a/tests/v3/test_store/test_core.py b/tests/v3/test_store/test_core.py index 771dc3c43e..605791dac6 100644 --- a/tests/v3/test_store/test_core.py +++ b/tests/v3/test_store/test_core.py @@ -38,7 +38,7 @@ async def test_make_store_path_local( store_like = store_type(str(tmpdir)) store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert Path(store_path.store.path) == Path(tmpdir) assert store_path.path == normalize_path(path) assert store_path.store.mode.str == mode @@ -55,7 +55,7 @@ async def test_make_store_path_store_path( store_like = StorePath(LocalStore(str(tmpdir)), path="root") store_path = await make_store_path(store_like, path=path, mode=mode) assert isinstance(store_path.store, LocalStore) - assert Path(store_path.store.root) == Path(tmpdir) + assert Path(store_path.store.path) == Path(tmpdir) path_normalized = normalize_path(path) assert store_path.path == (store_like / path_normalized).path From d7fbbdc1202bfc61c0849707e5709a9b5b26690a Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Oct 2024 15:06:53 +0200 Subject: [PATCH 06/24] make store test class get and set async --- tests/v3/test_store/test_local.py | 4 ++-- tests/v3/test_store/test_memory.py | 4 ++-- tests/v3/test_store/test_zip.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index c7ae6d5111..06bd6d016d 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -18,10 +18,10 @@ class TestLocalStore(StoreTests[LocalStore, cpu.Buffer]): store_cls = LocalStore buffer_cls = cpu.Buffer - def get(self, store: LocalStore, key: str) -> Buffer: + async def get(self, store: LocalStore, key: str) -> Buffer: return self.buffer_cls.from_bytes((Path(store.path) / key).read_bytes()) - def set(self, store: LocalStore, key: str, value: Buffer) -> None: + async def set(self, store: LocalStore, key: str, value: Buffer) -> None: target = Path(store.path) / key parent = target.parent if not parent.exists(): diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 3525f941b3..6086817988 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -12,10 +12,10 @@ class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): store_cls = MemoryStore buffer_cls = cpu.Buffer - def set(self, store: MemoryStore, key: str, value: Buffer) -> None: + async def set(self, store: MemoryStore, key: str, value: Buffer) -> None: store._store_dict[store.resolve_key(key)] = value - def get(self, store: MemoryStore, key: str) -> Buffer: + async def get(self, store: MemoryStore, key: str) -> Buffer: return store._store_dict[store.resolve_key(key)] @pytest.fixture(params=[None, True]) diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index 8aadb029eb..3d1751e580 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -32,7 +32,7 @@ def store_kwargs(self, request) -> dict[str, str | bool]: async def get(self, store: ZipStore, key: str) -> Buffer: return store._get(key, prototype=default_buffer_prototype()) - def set(self, store: ZipStore, key: str, value: Buffer) -> None: + async def set(self, store: ZipStore, key: str, value: Buffer) -> None: return store._set(store.resolve_key(key), value) def test_store_mode(self, store: ZipStore, store_kwargs: dict[str, Any]) -> None: From 0b1c454f6a267aa56b40232f1c375fc335c9b342 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Oct 2024 15:07:11 +0200 Subject: [PATCH 07/24] remove an old root --- tests/v3/test_store/test_local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_store/test_local.py b/tests/v3/test_store/test_local.py index 06bd6d016d..5df60844c9 100644 --- a/tests/v3/test_store/test_local.py +++ b/tests/v3/test_store/test_local.py @@ -53,5 +53,5 @@ def test_creates_new_directory(self, tmp_path: pathlib.Path): target = tmp_path.joinpath("a", "b", "c") assert not target.exists() - store = self.store_cls(root=target, mode="w") + store = self.store_cls(path=target, mode="w") zarr.group(store=store) From d378242d88c4636ff5e314c6233747312f19fd18 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Oct 2024 15:07:57 +0200 Subject: [PATCH 08/24] resolve get before _getting in zipstore tests --- src/zarr/storage/zip.py | 3 ++- tests/v3/test_store/test_zip.py | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 2887ae4473..39fe549700 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -153,8 +153,9 @@ def _get( byte_range: ByteRangeRequest | None = None, ) -> Buffer | None: # docstring inherited + # assume the key has already been made absolute try: - with self._zf.open(self.resolve_key(key)) as f: # will raise KeyError + with self._zf.open(key) as f: # will raise KeyError if byte_range is None: return prototype.buffer.from_bytes(f.read()) start, length = byte_range diff --git a/tests/v3/test_store/test_zip.py b/tests/v3/test_store/test_zip.py index 3d1751e580..4c6ac3f7e0 100644 --- a/tests/v3/test_store/test_zip.py +++ b/tests/v3/test_store/test_zip.py @@ -30,7 +30,7 @@ def store_kwargs(self, request) -> dict[str, str | bool]: return {"file_path": temp_path, "mode": "w", "path": ""} async def get(self, store: ZipStore, key: str) -> Buffer: - return store._get(key, prototype=default_buffer_prototype()) + return store._get(store.resolve_key(key), prototype=default_buffer_prototype()) async def set(self, store: ZipStore, key: str, value: Buffer) -> None: return store._set(store.resolve_key(key), value) From 9d0d04bd171bc19599f53b77eb78993f68526948 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Thu, 17 Oct 2024 15:31:56 +0200 Subject: [PATCH 09/24] add path kwarg to gpu memorystore test fixture --- tests/v3/test_store/test_memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v3/test_store/test_memory.py b/tests/v3/test_store/test_memory.py index 6086817988..c038944a6a 100644 --- a/tests/v3/test_store/test_memory.py +++ b/tests/v3/test_store/test_memory.py @@ -62,7 +62,7 @@ async def get(self, store: MemoryStore, key: str) -> Buffer: def store_kwargs( self, request: pytest.FixtureRequest ) -> dict[str, str | None | dict[str, Buffer]]: - kwargs = {"store_dict": None, "mode": "r+"} + kwargs = {"store_dict": None, "mode": "r+", "path": ""} if request.param is True: kwargs["store_dict"] = {} return kwargs From 9a83b86ba2635871a296a245e07b7af266efcfdc Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:41:18 +0200 Subject: [PATCH 10/24] zipstore store_kwargs propagates path --- tests/test_store/test_zip.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index 4c6ac3f7e0..a2c8a1987b 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -22,12 +22,12 @@ class TestZipStore(StoreTests[ZipStore, cpu.Buffer]): store_cls = ZipStore buffer_cls = cpu.Buffer - @pytest.fixture - def store_kwargs(self, request) -> dict[str, str | bool]: + @pytest.fixture(params=["", "test_path"]) + def store_kwargs(self, request: pytest.FixtureRequest) -> dict[str, str | bool]: fd, temp_path = tempfile.mkstemp() os.close(fd) - return {"file_path": temp_path, "mode": "w", "path": ""} + return {"file_path": temp_path, "mode": "w", "path": request.param} async def get(self, store: ZipStore, key: str) -> Buffer: return store._get(store.resolve_key(key), prototype=default_buffer_prototype()) From 84c545f7ac5704331c42f21d7d09cc25df076edc Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:42:02 +0200 Subject: [PATCH 11/24] storetests store_kwargs defaults to not implemented --- src/zarr/testing/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index bb041b28e9..c954562a04 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -40,7 +40,7 @@ async def get(self, store: S, key: str) -> Buffer: @pytest.fixture def store_kwargs(self) -> dict[str, Any]: - return {"mode": "r+", "path": ""} + raise NotImplementedError @pytest.fixture async def store(self, store_kwargs: dict[str, Any]) -> Store: From bd912ef458117b3aa5ed088b440bfdfcd0f977d2 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:42:59 +0200 Subject: [PATCH 12/24] memory store clear is scoped to the path --- src/zarr/storage/memory.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 2f8cdabfb3..a00a30a6f5 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -61,7 +61,9 @@ async def empty(self) -> bool: async def clear(self) -> None: # docstring inherited - self._store_dict.clear() + for k in tuple(self._store_dict.keys()): + if k.startswith(self.path): + del self._store_dict[k] def with_mode(self, mode: AccessModeLiteral) -> Self: # docstring inherited From 0c7ab00b2e0ee757217824b6f195a83a32dccad9 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:43:36 +0200 Subject: [PATCH 13/24] use relative path for zipfile listing --- src/zarr/storage/zip.py | 42 +++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 39fe549700..0d2268b4d1 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -179,9 +179,9 @@ async def get( ) -> Buffer | None: # docstring inherited assert isinstance(key, str) - + key_absolute = self.resolve_key(key) with self._lock: - return self._get(self.resolve_key(key), prototype=prototype, byte_range=byte_range) + return self._get(key_absolute, prototype=prototype, byte_range=byte_range) async def get_partial_values( self, @@ -192,9 +192,8 @@ async def get_partial_values( out = [] with self._lock: for key, byte_range in key_ranges: - out.append( - self._get(self.resolve_key(key), prototype=prototype, byte_range=byte_range) - ) + key_absolute = self.resolve_key(key) + out.append(self._get(key_absolute, prototype=prototype, byte_range=byte_range)) return out def _set(self, key: str, value: Buffer) -> None: @@ -213,21 +212,22 @@ async def set(self, key: str, value: Buffer) -> None: # docstring inherited self._check_writable() assert isinstance(key, str) + key_absolute = self.resolve_key(key) if not isinstance(value, Buffer): raise TypeError("ZipStore.set(): `value` must a Buffer instance") with self._lock: - self._set(self.resolve_key(key), value) + self._set(key_absolute, value) async def set_partial_values(self, key_start_values: Iterable[tuple[str, int, bytes]]) -> None: raise NotImplementedError async def set_if_not_exists(self, key: str, default: Buffer) -> None: - key_abs = self.resolve_key(key) + key_absolute = self.resolve_key(key) self._check_writable() with self._lock: members = self._zf.namelist() - if key_abs not in members: - self._set(key_abs, default) + if key_absolute not in members: + self._set(key_absolute, default) async def delete(self, key: str) -> None: # docstring inherited @@ -235,9 +235,10 @@ async def delete(self, key: str) -> None: async def exists(self, key: str) -> bool: # docstring inherited + key_absolute = self.resolve_key(key) with self._lock: try: - self._zf.getinfo(self.resolve_key(key)) + self._zf.getinfo(key_absolute) except KeyError: return False else: @@ -245,23 +246,24 @@ async def exists(self, key: str) -> bool: async def list(self) -> AsyncGenerator[str, None]: # docstring inherited - with self._lock: - for key in self._zf.namelist(): - yield key.lstrip("/") + async for result in self.list_prefix(""): + yield result async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - async for key in self.list(): - if key.startswith(prefix): - yield key.removeprefix(prefix) + prefix_absolute = self.resolve_key(prefix) + with self._lock: + for key in self._zf.namelist(): + if key.startswith(prefix_absolute): + yield key.removeprefix(prefix_absolute).lstrip("/") async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - prefix_abs = self.resolve_key(prefix) + prefix_absolute = self.resolve_key(prefix) keys = self._zf.namelist() seen = set() - if prefix_abs == "": + if prefix_absolute == "": keys_unique = {k.split("/")[0] for k in keys} for key in keys_unique: if key not in seen: @@ -269,8 +271,8 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: yield key else: for key in keys: - if key.startswith(prefix_abs + "/") and key != prefix_abs: - k = key.removeprefix(prefix_abs + "/").split("/")[0] + if key.startswith(prefix_absolute): + k = key.removeprefix(prefix_absolute).lstrip("/").split("/")[0] if k not in seen: seen.add(k) yield k From d04934d8270324e0e4b12f44c0e7455fe7d51467 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:44:10 +0200 Subject: [PATCH 14/24] use path in with_mode for memorystore --- src/zarr/storage/memory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index a00a30a6f5..f45d278a80 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -67,7 +67,7 @@ async def clear(self) -> None: def with_mode(self, mode: AccessModeLiteral) -> Self: # docstring inherited - return type(self)(store_dict=self._store_dict, mode=mode) + return type(self)(store_dict=self._store_dict, mode=mode, path=self.path) def __str__(self) -> str: return f"memory://{id(self._store_dict)}" From 9e65afb6dcfc71a536b60fdd2b77cd12e7caa1d5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:45:49 +0200 Subject: [PATCH 15/24] call resolve_path at the top of store routines for memorystore --- src/zarr/storage/memory.py | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index f45d278a80..3af67abbd4 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -114,7 +114,8 @@ async def _get(key: str, byte_range: ByteRangeRequest) -> Buffer | None: async def exists(self, key: str) -> bool: # docstring inherited - return self.resolve_key(key) in self._store_dict + key_absolute = self.resolve_key(key) + return key_absolute in self._store_dict async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: # docstring inherited @@ -123,25 +124,27 @@ async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None assert isinstance(key, str) if not isinstance(value, Buffer): raise TypeError(f"Expected Buffer. Got {type(value)}.") - key_abs = self.resolve_key(key) + key_absolute = self.resolve_key(key) if byte_range is not None: - buf = self._store_dict[key_abs] + buf = self._store_dict[key_absolute] buf[byte_range[0] : byte_range[1]] = value - self._store_dict[key_abs] = buf + self._store_dict[key_absolute] = buf else: - self._store_dict[key_abs] = value + self._store_dict[key_absolute] = value async def set_if_not_exists(self, key: str, value: Buffer) -> None: # docstring inherited self._check_writable() await self._ensure_open() - self._store_dict.setdefault(self.resolve_key(key), value) + key_absolute = self.resolve_key(key) + self._store_dict.setdefault(key_absolute, value) async def delete(self, key: str) -> None: # docstring inherited self._check_writable() + key_absolute = self.resolve_key(key) try: - del self._store_dict[self.resolve_key(key)] + del self._store_dict[key_absolute] except KeyError: pass @@ -155,25 +158,25 @@ async def list(self) -> AsyncGenerator[str, None]: async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - prefix_abs = self.resolve_key(prefix) + prefix_absolute = self.resolve_key(prefix) for key in self._store_dict: - if key.startswith(prefix_abs): - yield key.removeprefix(prefix_abs).lstrip("/") + if key.startswith(prefix_absolute): + yield key.removeprefix(prefix_absolute).lstrip("/") async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - prefix = self.resolve_key(prefix) + prefix_absolute = self.resolve_key(prefix) - if prefix == "": + if prefix_absolute == "": keys_unique = {k.split("/")[0] for k in self._store_dict} else: # Our dictionary doesn't contain directory markers, but we want to include # a pseudo directory when there's a nested item and we're listing an # intermediate level. keys_unique = { - key.removeprefix(prefix + "/").split("/")[0] + key.removeprefix(prefix_absolute + "/").split("/")[0] for key in self._store_dict - if key.startswith(prefix + "/") and key != prefix + if key.startswith(prefix_absolute + "/") and key != prefix_absolute } for key in keys_unique: From a5b186b147c8dc8dd55779110faf207cc669f873 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:46:19 +0200 Subject: [PATCH 16/24] use path for gpumemorystore --- src/zarr/storage/memory.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 3af67abbd4..ff67b074a3 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -206,18 +206,19 @@ def __init__( self, store_dict: MutableMapping[str, gpu.Buffer] | None = None, *, + path: str = "", mode: AccessModeLiteral = "r", ) -> None: - super().__init__(store_dict=store_dict, mode=mode) # type: ignore[arg-type] + super().__init__(store_dict=store_dict, path=path, mode=mode) # type: ignore[arg-type] def __str__(self) -> str: - return f"gpumemory://{id(self._store_dict)}" + return f"gpumemory://{id(self._store_dict)}/{self.path}" def __repr__(self) -> str: return f"GpuMemoryStore({str(self)!r})" @classmethod - def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: + def from_dict(cls, store_dict: MutableMapping[str, Buffer], path: str = "") -> Self: """ Create a GpuMemoryStore from a dictionary of buffers at any location. @@ -235,7 +236,7 @@ def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: GpuMemoryStore """ gpu_store_dict = {k: gpu.Buffer.from_buffer(v) for k, v in store_dict.items()} - return cls(gpu_store_dict) + return cls(gpu_store_dict, path=path) async def set(self, key: str, value: Buffer, byte_range: tuple[int, int] | None = None) -> None: # docstring inherited From aab41385170ffb0464db0eb66fbb2bdeffd18275 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:47:07 +0200 Subject: [PATCH 17/24] use relative key in test_list_dir --- src/zarr/testing/store.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/testing/store.py b/src/zarr/testing/store.py index c954562a04..cb42cadd75 100644 --- a/src/zarr/testing/store.py +++ b/src/zarr/testing/store.py @@ -272,7 +272,7 @@ async def test_list_prefix(self, store: S) -> None: assert observed == expected async def test_list_dir(self, store: S) -> None: - root = "foo" + root = store.resolve_key("foo") store_dict = { root + "/zarr.json": self.buffer_cls.from_bytes(b"bar"), root + "/c/1": self.buffer_cls.from_bytes(b"\x01"), From 346d291b24a4fb4ebf3af732b5f437ce91582768 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:47:43 +0200 Subject: [PATCH 18/24] fix warning test --- tests/test_store/test_zip.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index a2c8a1987b..9c2a83baf6 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -80,7 +80,7 @@ def test_api_integration(self, store: ZipStore) -> None: assert np.array_equal(data, z[:]) # you can overwrite existing chunks but zipfile will issue a warning - with pytest.warns(UserWarning, match="Duplicate name: 'foo/c/0/0'"): + with pytest.warns(UserWarning, match=f"Duplicate name: '{store.resolve_key('foo/c/0/0')}'"): z[0, 0] = 100 # TODO: assigning an entire chunk to fill value ends up deleting the chunk which is not supported From e68a3416644c3db0e857463fe725a68f3579b456 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 19:48:56 +0200 Subject: [PATCH 19/24] parameterize over path in memorystore tests --- tests/test_store/test_memory.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/tests/test_store/test_memory.py b/tests/test_store/test_memory.py index c038944a6a..bfd84769e3 100644 --- a/tests/test_store/test_memory.py +++ b/tests/test_store/test_memory.py @@ -1,5 +1,7 @@ from __future__ import annotations +import itertools + import pytest from zarr.core.buffer import Buffer, cpu, gpu @@ -7,6 +9,8 @@ from zarr.testing.store import StoreTests from zarr.testing.utils import gpu_test +memory_store_kwargs = tuple(itertools.product((None, True), ("", "foo"))) + class TestMemoryStore(StoreTests[MemoryStore, cpu.Buffer]): store_cls = MemoryStore @@ -18,19 +22,17 @@ async def set(self, store: MemoryStore, key: str, value: Buffer) -> None: async def get(self, store: MemoryStore, key: str) -> Buffer: return store._store_dict[store.resolve_key(key)] - @pytest.fixture(params=[None, True]) + @pytest.fixture(params=memory_store_kwargs) def store_kwargs( self, request: pytest.FixtureRequest ) -> dict[str, str | None | dict[str, Buffer]]: - kwargs = {"store_dict": None, "mode": "r+", "path": ""} - if request.param is True: + store_dict_req, path = request.param + kwargs = {"store_dict": store_dict_req, "mode": "r+", "path": path} + if store_dict_req is True: + # use a new empty dict each invocation of the function kwargs["store_dict"] = {} return kwargs - @pytest.fixture - def store(self, store_kwargs: str | None | dict[str, Buffer]) -> MemoryStore: - return self.store_cls(**store_kwargs) - def test_store_repr(self, store: MemoryStore) -> None: assert str(store) == f"memory://{id(store._store_dict)}" From 59ac9edd255c79d45e7c5bb9d372576e5dfcb1e5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 21:54:49 +0200 Subject: [PATCH 20/24] lint docstring --- src/zarr/storage/local.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index ae03a61225..899dd2f59d 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import TYPE_CHECKING -from zarr.abc.store import ByteRangeRequest, Store +from zarr.abc.store import Store from zarr.core.buffer import Buffer from zarr.core.common import concurrent_map @@ -15,21 +15,23 @@ from collections.abc import AsyncGenerator, Iterable from typing import Self + from zarr.abc.store import ByteRangeRequest from zarr.core.buffer import BufferPrototype from zarr.core.common import AccessModeLiteral -def _get( - path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None -) -> Buffer: +def _get(path: Path, prototype: BufferPrototype, byte_range: ByteRangeRequest | None) -> Buffer: """ Fetch a contiguous region of bytes from a file. Parameters ---------- - path: Path + + path : Path The file to read bytes from. - byte_range: tuple[int, int | None] | None = None + prototype : BufferPrototype + The buffer prototype to use when reading the bytes. + byte_range : tuple[int | None, int | None] | None = None The range of bytes to read. If `byte_range` is `None`, then the entire file will be read. If `byte_range` is a tuple, the first value specifies the index of the first byte to read, and the second value specifies the total number of bytes to read. If the total value is @@ -86,7 +88,7 @@ class LocalStore(Store): Parameters ---------- - root : str or Path + path : str or Path Directory to use as root of store. mode : str Mode in which to open the store. Either 'r', 'r+', 'a', 'w', 'w-'. @@ -97,7 +99,7 @@ class LocalStore(Store): supports_deletes supports_partial_writes supports_listing - root + path """ supports_writes: bool = True From fc4a780aba70f15d794bcc4e76c37f4d0704fa66 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 22:03:30 +0200 Subject: [PATCH 21/24] put path in memorystore and gpu memorystore reprs --- src/zarr/storage/memory.py | 2 +- tests/test_store/test_memory.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 30a8d03f9c..bd8b01ae6b 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -70,7 +70,7 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: return type(self)(store_dict=self._store_dict, mode=mode, path=self.path) def __str__(self) -> str: - return f"memory://{id(self._store_dict)}" + return f"memory://{id(self._store_dict)}/{self.path}" def __repr__(self) -> str: return f"MemoryStore({str(self)!r})" diff --git a/tests/test_store/test_memory.py b/tests/test_store/test_memory.py index bfd84769e3..3325f292a1 100644 --- a/tests/test_store/test_memory.py +++ b/tests/test_store/test_memory.py @@ -34,7 +34,7 @@ def store_kwargs( return kwargs def test_store_repr(self, store: MemoryStore) -> None: - assert str(store) == f"memory://{id(store._store_dict)}" + assert str(store) == f"memory://{id(store._store_dict)}/{store.path}" def test_store_supports_writes(self, store: MemoryStore) -> None: assert store.supports_writes @@ -74,7 +74,7 @@ def store(self, store_kwargs: str | None | dict[str, gpu.Buffer]) -> GpuMemorySt return self.store_cls(**store_kwargs) def test_store_repr(self, store: GpuMemoryStore) -> None: - assert str(store) == f"gpumemory://{id(store._store_dict)}" + assert str(store) == f"gpumemory://{id(store._store_dict)}/{store.path}" def test_store_supports_writes(self, store: GpuMemoryStore) -> None: assert store.supports_writes From a92cd4ae0b2eef48de975fce8d65d838374a93c5 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 22:05:11 +0200 Subject: [PATCH 22/24] update localstore repr --- src/zarr/storage/local.py | 2 +- tests/test_store/test_local.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/local.py b/src/zarr/storage/local.py index 899dd2f59d..c34e7a2a55 100644 --- a/src/zarr/storage/local.py +++ b/src/zarr/storage/local.py @@ -141,7 +141,7 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: return type(self)(path=self.path, mode=mode) def __str__(self) -> str: - return f"file://{self.path}" + return f"file:///{self.path}" def __repr__(self) -> str: return f"LocalStore({str(self)!r})" diff --git a/tests/test_store/test_local.py b/tests/test_store/test_local.py index 5df60844c9..225bdc647a 100644 --- a/tests/test_store/test_local.py +++ b/tests/test_store/test_local.py @@ -33,7 +33,7 @@ def store_kwargs(self, tmpdir) -> dict[str, str]: return {"path": str(tmpdir), "mode": "r+"} def test_store_repr(self, store: LocalStore) -> None: - assert str(store) == f"file://{store.path!s}" + assert str(store) == f"file:///{store.path}" def test_store_supports_writes(self, store: LocalStore) -> None: assert store.supports_writes From 1bd1714ec6bf0c20decbd4571ae2251ab57fdd82 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Sun, 20 Oct 2024 22:18:07 +0200 Subject: [PATCH 23/24] update zipstore repr --- src/zarr/storage/zip.py | 2 +- tests/test_store/test_zip.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index 0d2268b4d1..b04a6012a8 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -133,7 +133,7 @@ def with_mode(self, mode: ZipStoreAccessModeLiteral) -> Self: # type: ignore[ov def __str__(self) -> str: # lets try https://github.com/zarr-developers/zeps/pull/48/files - return f"file://{self.file_path}|zip://{self.path}" + return f"file:///{self.file_path}|zip://{self.path}" def __repr__(self) -> str: return f"ZipStore({str(self)!r})" diff --git a/tests/test_store/test_zip.py b/tests/test_store/test_zip.py index 9c2a83baf6..74910af7ba 100644 --- a/tests/test_store/test_zip.py +++ b/tests/test_store/test_zip.py @@ -54,7 +54,7 @@ async def test_not_writable_store_raises(self, store_kwargs: dict[str, Any]) -> await store.set("foo", cpu.Buffer.from_bytes(b"bar")) def test_store_repr(self, store: ZipStore) -> None: - assert str(store) == f"file://{store.file_path!s}|zip://{store.path}" + assert str(store) == f"file:///{store.file_path!s}|zip://{store.path}" def test_store_supports_writes(self, store: ZipStore) -> None: assert store.supports_writes From 808e9ff2c654f4e97b86e5b1cbf50ee816626a4c Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Mon, 21 Oct 2024 23:15:54 +0200 Subject: [PATCH 24/24] change default path for remotestore to '' --- src/zarr/storage/remote.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 30f2970797..47181bde59 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -62,7 +62,7 @@ def __init__( self, fs: AsyncFileSystem, mode: AccessModeLiteral = "r", - path: str = "/", + path: str = "", allowed_exceptions: tuple[type[Exception], ...] = ALLOWED_EXCEPTIONS, ) -> None: super().__init__(mode=mode)