Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feat/store paths #2272

Open
wants to merge 32 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
736ef8a
add path attribute to stores; migrate localstore to the new protocol
d-v-b Sep 27, 2024
8189cc7
add path kwarg to memory test fixture
d-v-b Sep 28, 2024
9696222
Merge branch 'v3' of https://github.com/zarr-developers/zarr-python i…
d-v-b Sep 28, 2024
fa13860
add path to stores
d-v-b Sep 30, 2024
c5586ce
Merge branch 'v3' of https://github.com/zarr-developers/zarr-python i…
d-v-b Sep 30, 2024
b4b9c28
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 17, 2024
c6c8843
add _path attribute to localstore that caches a pathlib.Path version …
d-v-b Oct 17, 2024
b5dd50c
remove some old roots
d-v-b Oct 17, 2024
d7fbbdc
make store test class get and set async
d-v-b Oct 17, 2024
0b1c454
remove an old root
d-v-b Oct 17, 2024
d378242
resolve get before _getting in zipstore tests
d-v-b Oct 17, 2024
669e258
Merge branch 'main' into feat/store-paths
d-v-b Oct 17, 2024
9d0d04b
add path kwarg to gpu memorystore test fixture
d-v-b Oct 17, 2024
471740b
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 18, 2024
9a83b86
zipstore store_kwargs propagates path
d-v-b Oct 20, 2024
84c545f
storetests store_kwargs defaults to not implemented
d-v-b Oct 20, 2024
bd912ef
memory store clear is scoped to the path
d-v-b Oct 20, 2024
0c7ab00
use relative path for zipfile listing
d-v-b Oct 20, 2024
d04934d
use path in with_mode for memorystore
d-v-b Oct 20, 2024
9e65afb
call resolve_path at the top of store routines for memorystore
d-v-b Oct 20, 2024
a5b186b
use path for gpumemorystore
d-v-b Oct 20, 2024
aab4138
use relative key in test_list_dir
d-v-b Oct 20, 2024
346d291
fix warning test
d-v-b Oct 20, 2024
e68a341
parameterize over path in memorystore tests
d-v-b Oct 20, 2024
eb93c24
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 20, 2024
59ac9ed
lint docstring
d-v-b Oct 20, 2024
fc4a780
put path in memorystore and gpu memorystore reprs
d-v-b Oct 20, 2024
a92cd4a
update localstore repr
d-v-b Oct 20, 2024
1bd1714
update zipstore repr
d-v-b Oct 20, 2024
808e9ff
change default path for remotestore to ''
d-v-b Oct 21, 2024
3d1d043
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
d-v-b Oct 28, 2024
b5ef33c
Merge branch 'feat/store-paths' of github.com:d-v-b/zarr-python into …
d-v-b Oct 28, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 44 additions & 4 deletions src/zarr/abc/store.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,26 @@ def from_literal(cls, mode: AccessModeLiteral) -> Self:
class Store(ABC):
_mode: AccessMode
_is_open: bool

def __init__(self, mode: AccessModeLiteral = "r", *args: Any, **kwargs: Any) -> None:
self._is_open = False
self._mode = AccessMode.from_literal(mode)
path: str

# TODO: Make store immutable
# def __setattr__(self, *args: Any, **kwargs: Any) -> None:
# msg = (
# 'Stores are immutable. To modify a Store object, create a new one with the desired'
# 'attributes')
# raise NotImplementedError(msg)

def __init__(self, path: str = "", mode: AccessModeLiteral = "r") -> None:
object.__setattr__(self, "_is_open", False)
object.__setattr__(self, "_mode", AccessMode.from_literal(mode))
object.__setattr__(self, "path", validate_path(path))

def resolve_key(self, key: str) -> str:
key = parse_path(key)
if self.path == "":
return key
else:
return f"{self.path}/{key}"

@classmethod
async def open(cls, *args: Any, **kwargs: Any) -> Self:
Expand Down Expand Up @@ -320,6 +336,13 @@ async def _get_many(
for req in requests:
yield (req[0], await self.get(*req))

def with_path(self, path: str) -> Self:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def with_path(self, path: str) -> Self:
@abstractmethod
def with_path(self, path: str) -> Self:

Or do you think think we can do this in a generic way that applies to all stores?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we can make it generic unfortunately, because each store class might have its own extra attributes. maybe we could use __getstate__ to get around this?

"""
Return a copy of this store with a new path attribute
"""
# TODO: implement me
raise NotImplementedError


@runtime_checkable
class ByteGetter(Protocol):
Expand All @@ -346,3 +369,20 @@ async def set_or_delete(byte_setter: ByteSetter, value: Buffer | None) -> None:
await byte_setter.delete()
else:
await byte_setter.set(value)


def validate_path(path: str) -> str:
"""
Ensure that the input string is a valid relative path in the abstract zarr object storage scheme.
"""
if path.endswith("/"):
raise ValueError(f"Invalid path: {path} ends with '/'.")
if "//" in path:
raise ValueError(f"Invalid path: {path} contains '//'.")
if "\\" in path:
raise ValueError(f"Invalid path: {path} contains '\"'.")
return path


def parse_path(path: str) -> str:
return path.rstrip("/").lstrip("/")
4 changes: 2 additions & 2 deletions src/zarr/store/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ async def make_store_path(
mode = "w" # exception to the default mode = 'r'
result = StorePath(await MemoryStore.open(mode=mode))
elif isinstance(store_like, Path):
result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r"))
result = StorePath(await LocalStore.open(path=store_like, mode=mode or "r"))
elif isinstance(store_like, str):
storage_options = storage_options or {}

Expand All @@ -111,7 +111,7 @@ async def make_store_path(
RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r")
)
else:
result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r"))
result = StorePath(await LocalStore.open(path=store_like, mode=mode or "r"))
elif isinstance(store_like, dict):
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
Expand Down
65 changes: 31 additions & 34 deletions src/zarr/store/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@
import os
import shutil
from pathlib import Path
from typing import TYPE_CHECKING, Self
from typing import TYPE_CHECKING

from zarr.abc.store import ByteRangeRequest, Store
from zarr.core.buffer import Buffer
from zarr.core.common import concurrent_map, to_thread

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Iterable
from typing import Self

from zarr.core.buffer import BufferPrototype
from zarr.core.common import AccessModeLiteral


def _get(
path: Path, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None
path: str, prototype: BufferPrototype, byte_range: tuple[int | None, int | None] | None
) -> Buffer:
"""
Fetch a contiguous region of bytes from a file.
Expand All @@ -33,6 +34,7 @@ def _get(
and the second value specifies the total number of bytes to read. If the total value is
`None`, then the entire file after the first byte will be read.
"""
target = Path(path)
if byte_range is not None:
if byte_range[0] is None:
start = 0
Expand All @@ -41,8 +43,8 @@ def _get(

end = (start + byte_range[1]) if byte_range[1] is not None else None
else:
return prototype.buffer.from_bytes(path.read_bytes())
with path.open("rb") as f:
return prototype.buffer.from_bytes(target.read_bytes())
with target.open("rb") as f:
size = f.seek(0, io.SEEK_END)
if start is not None:
if start >= 0:
Expand Down Expand Up @@ -84,23 +86,17 @@ class LocalStore(Store):
supports_partial_writes: bool = True
supports_listing: bool = True

root: Path

def __init__(self, root: Path | str, *, mode: AccessModeLiteral = "r") -> None:
super().__init__(mode=mode)
if isinstance(root, str):
root = Path(root)
assert isinstance(root, Path)
self.root = root
def __init__(self, path: Path | str, *, mode: AccessModeLiteral = "r") -> None:
super().__init__(mode=mode, path=str(path))

async def clear(self) -> None:
self._check_writable()
shutil.rmtree(self.root)
self.root.mkdir()
shutil.rmtree(self.path)
os.mkdir(self.path)

async def empty(self) -> bool:
try:
with os.scandir(self.root) as it:
with os.scandir(self.path) as it:
for entry in it:
if entry.is_file():
# stop once a file is found
Expand All @@ -111,16 +107,16 @@ async def empty(self) -> bool:
return True

def with_mode(self, mode: AccessModeLiteral) -> Self:
return type(self)(root=self.root, mode=mode)
return type(self)(path=self.path, mode=mode)

def __str__(self) -> str:
return f"file://{self.root}"
return f"file://{self.path}"

def __repr__(self) -> str:
return f"LocalStore({str(self)!r})"

def __eq__(self, other: object) -> bool:
return isinstance(other, type(self)) and self.root == other.root
return isinstance(other, type(self)) and self.path == other.path

async def get(
self,
Expand All @@ -130,8 +126,8 @@ async def get(
) -> Buffer | None:
if not self._is_open:
await self._open()
assert isinstance(key, str)
path = self.root / key

path = os.path.join(self.path, key)

try:
return await to_thread(_get, path, prototype, byte_range)
Expand All @@ -157,7 +153,7 @@ async def get_partial_values(
args = []
for key, byte_range in key_ranges:
assert isinstance(key, str)
path = self.root / key
path = os.path.join(self.path, key)
args.append((_get, path, prototype, byte_range))
return await concurrent_map(args, to_thread, limit=None) # TODO: fix limit

Expand All @@ -177,8 +173,8 @@ async def _set(self, key: str, value: Buffer, exclusive: bool = False) -> None:
assert isinstance(key, str)
if not isinstance(value, Buffer):
raise TypeError("LocalStore.set(): `value` must a Buffer instance")
path = self.root / key
await to_thread(_put, path, value, start=None, exclusive=exclusive)
path = Path(self.path) / key
await to_thread(_put, path, value, exclusive=exclusive)

async def set_partial_values(
self, key_start_values: Iterable[tuple[str, int, bytes | bytearray | memoryview]]
Expand All @@ -187,20 +183,20 @@ async def set_partial_values(
args = []
for key, start, value in key_start_values:
assert isinstance(key, str)
path = self.root / key
path = os.path.join(self.path, key)
args.append((_put, path, value, start))
await concurrent_map(args, to_thread, limit=None) # TODO: fix limit

async def delete(self, key: str) -> None:
self._check_writable()
path = self.root / key
path = Path(self.path) / key
if path.is_dir(): # TODO: support deleting directories? shutil.rmtree?
shutil.rmtree(path)
else:
await to_thread(path.unlink, True) # Q: we may want to raise if path is missing

async def exists(self, key: str) -> bool:
path = self.root / key
path = Path(self.path) / key
return await to_thread(path.is_file)

async def list(self) -> AsyncGenerator[str, None]:
Expand All @@ -210,10 +206,11 @@ async def list(self) -> AsyncGenerator[str, None]:
-------
AsyncGenerator[str, None]
"""
to_strip = str(self.root) + "/"
for p in list(self.root.rglob("*")):
# TODO: just invoke list_prefix with the prefix "/"
to_strip = self.path + "/"
for p in Path(self.path).rglob("*"):
if p.is_file():
yield str(p).replace(to_strip, "")
yield str(p.relative_to(to_strip))

async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
"""
Expand All @@ -228,8 +225,8 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]:
-------
AsyncGenerator[str, None]
"""
to_strip = os.path.join(str(self.root / prefix))
for p in (self.root / prefix).rglob("*"):
to_strip = os.path.join(self.path, prefix)
for p in (Path(self.path) / prefix).rglob("*"):
if p.is_file():
yield str(p.relative_to(to_strip))

Expand All @@ -247,12 +244,12 @@ async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]:
AsyncGenerator[str, None]
"""

base = self.root / prefix
base = os.path.join(self.path, prefix)
to_strip = str(base) + "/"

try:
key_iter = base.iterdir()
key_iter = Path(base).iterdir()
for key in key_iter:
yield str(key).replace(to_strip, "")
yield str(key.relative_to(to_strip))
except (FileNotFoundError, NotADirectoryError):
pass
3 changes: 2 additions & 1 deletion src/zarr/store/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,14 @@
import time
from collections import defaultdict
from contextlib import contextmanager
from typing import TYPE_CHECKING, Self
from typing import TYPE_CHECKING

from zarr.abc.store import AccessMode, ByteRangeRequest, Store
from zarr.core.buffer import Buffer

if TYPE_CHECKING:
from collections.abc import AsyncGenerator, Generator, Iterable
from typing import Self

from zarr.core.buffer import Buffer, BufferPrototype
from zarr.core.common import AccessModeLiteral
Expand Down
Loading
Loading