Skip to content

Commit

Permalink
simplify storepath creation
Browse files Browse the repository at this point in the history
  • Loading branch information
d-v-b committed Oct 15, 2024
1 parent 85af0bd commit e331a3b
Show file tree
Hide file tree
Showing 5 changed files with 74 additions and 41 deletions.
33 changes: 9 additions & 24 deletions src/zarr/api/asynchronous.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,7 @@ async def consolidate_metadata(
The group, with the ``consolidated_metadata`` field set to include
the metadata of each child node.
"""
store_path = await make_store_path(store)

if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, path=path)

group = await AsyncGroup.open(store_path, zarr_format=zarr_format, use_consolidated=False)
group.store_path.store._check_writable()
Expand Down Expand Up @@ -291,10 +288,7 @@ async def open(
"""
zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

store_path = await make_store_path(store, mode=mode, storage_options=storage_options)

if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, mode=mode, path=path, storage_options=storage_options)

if "shape" not in kwargs and mode in {"a", "w", "w-"}:
try:
Expand Down Expand Up @@ -401,9 +395,7 @@ async def save_array(
)

mode = kwargs.pop("mode", None)
store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
new = await AsyncArray.create(
store_path,
zarr_format=zarr_format,
Expand Down Expand Up @@ -582,9 +574,7 @@ async def group(

mode = None if isinstance(store, Store) else cast(AccessModeLiteral, "a")

store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

if chunk_store is not None:
warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2)
Expand Down Expand Up @@ -697,9 +687,7 @@ async def open_group(
if chunk_store is not None:
warnings.warn("chunk_store is not yet implemented", RuntimeWarning, stacklevel=2)

store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, mode=mode, storage_options=storage_options, path=path)

if attributes is None:
attributes = {}
Expand Down Expand Up @@ -883,9 +871,7 @@ async def create(
if not isinstance(store, Store | StorePath):
mode = "a"

store_path = await make_store_path(store, mode=mode, storage_options=storage_options)
if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)

return await AsyncArray.create(
store_path,
Expand Down Expand Up @@ -925,6 +911,7 @@ async def empty(
retrieve data from an empty Zarr array, any values may be returned,
and these are not guaranteed to be stable from one access to the next.
"""

return await create(shape=shape, fill_value=None, **kwargs)


Expand Down Expand Up @@ -1044,7 +1031,7 @@ async def open_array(
store: StoreLike | None = None,
zarr_version: ZarrFormat | None = None, # deprecated
zarr_format: ZarrFormat | None = None,
path: PathLike | None = None,
path: PathLike = "",
storage_options: dict[str, Any] | None = None,
**kwargs: Any, # TODO: type kwargs as valid args to save
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
Expand All @@ -1071,9 +1058,7 @@ async def open_array(
"""

mode = kwargs.pop("mode", None)
store_path = await make_store_path(store, mode=mode)
if path is not None:
store_path = store_path / path
store_path = await make_store_path(store, path=path, mode=mode)

zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format)

Expand Down
19 changes: 13 additions & 6 deletions src/zarr/storage/_utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
from __future__ import annotations

from pathlib import Path
from typing import TYPE_CHECKING

from upath import UPath

if TYPE_CHECKING:
from zarr.core.buffer import Buffer


def normalize_path(path: str | bytes | object) -> str:
def normalize_path(path: str | bytes | Path | UPath | None) -> str:
# handle bytes
if isinstance(path, bytes):
if path is None:
result = ""
elif isinstance(path, bytes):
result = str(path, "ascii")

# ensure str
elif not isinstance(path, str):
elif isinstance(path, Path | UPath):
result = str(path)

else:
elif isinstance(path, str):
result = path

else:
raise TypeError(f'Object {path} has an invalid type for "path": {type(path).__name__}')

# convert backslash to forward slash
result = result.replace("\\", "/")

Expand All @@ -44,7 +51,7 @@ def normalize_path(path: str | bytes | object) -> str:
segments = result.split("/")
if any(s in {".", ".."} for s in segments):
raise ValueError(
f"The path {path} is invalid because its string representation contains '.' or '..' segments."
f"The path {path!r} is invalid because its string representation contains '.' or '..' segments."
)

return result
Expand Down
34 changes: 23 additions & 11 deletions src/zarr/storage/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from zarr.core.buffer import Buffer, default_buffer_prototype
from zarr.core.common import ZARR_JSON, ZARRAY_JSON, ZGROUP_JSON, ZarrFormat
from zarr.errors import ContainsArrayAndGroupError, ContainsArrayError, ContainsGroupError
from zarr.storage._utils import normalize_path
from zarr.storage.local import LocalStore
from zarr.storage.memory import MemoryStore

Expand Down Expand Up @@ -41,9 +42,9 @@ class StorePath:
store: Store
path: str

def __init__(self, store: Store, path: str | None = None) -> None:
def __init__(self, store: Store, path: str = "") -> None:
self.store = store
self.path = path or ""
self.path = path

async def get(
self,
Expand Down Expand Up @@ -159,6 +160,7 @@ def __eq__(self, other: object) -> bool:
async def make_store_path(
store_like: StoreLike | None,
*,
path: str | None = "",
mode: AccessModeLiteral | None = None,
storage_options: dict[str, Any] | None = None,
) -> StorePath:
Expand Down Expand Up @@ -189,6 +191,9 @@ async def make_store_path(
----------
store_like : StoreLike | None
The object to convert to a `StorePath` object.
path: str | None, optional
The path to use when creating the `StorePath` object. If None, the
default path is the empty string.
mode : AccessModeLiteral | None, optional
The mode to use when creating the `StorePath` object. If None, the
default mode is 'r'.
Expand All @@ -209,37 +214,44 @@ async def make_store_path(
from zarr.storage.remote import RemoteStore # circular import

used_storage_options = False

path_normalized = normalize_path(path)
if isinstance(store_like, StorePath):
if mode is not None and mode != store_like.store.mode.str:
_store = store_like.store.with_mode(mode)
await _store._ensure_open()
store_like = StorePath(_store)
result = store_like
store_like = StorePath(_store, path=store_like.path) / path_normalized
result = store_like / path_normalized
elif isinstance(store_like, Store):
if mode is not None and mode != store_like.mode.str:
store_like = store_like.with_mode(mode)
await store_like._ensure_open()
result = StorePath(store_like)
result = StorePath(store_like, path=path_normalized)
elif store_like is None:
# mode = "w" is an exception to the default mode = 'r'
result = StorePath(await MemoryStore.open(mode=mode or "w"))
result = StorePath(await MemoryStore.open(mode=mode or "w"), path=path_normalized)
elif isinstance(store_like, Path):
result = StorePath(await LocalStore.open(root=store_like, mode=mode or "r"))
result = StorePath(
await LocalStore.open(root=store_like, mode=mode or "r"), path=path_normalized
)
elif isinstance(store_like, str):
storage_options = storage_options or {}

if _is_fsspec_uri(store_like):
used_storage_options = True
result = StorePath(
RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r")
RemoteStore.from_url(store_like, storage_options=storage_options, mode=mode or "r"),
path=path_normalized,
)
else:
result = StorePath(await LocalStore.open(root=Path(store_like), mode=mode or "r"))
result = StorePath(
await LocalStore.open(root=Path(store_like), mode=mode or "r"), path=path_normalized
)
elif isinstance(store_like, dict):
# We deliberate only consider dict[str, Buffer] here, and not arbitrary mutable mappings.
# By only allowing dictionaries, which are in-memory, we know that MemoryStore appropriate.
result = StorePath(await MemoryStore.open(store_dict=store_like, mode=mode or "r"))
result = StorePath(
await MemoryStore.open(store_dict=store_like, mode=mode or "r"), path=path_normalized
)
else:
msg = f"Unsupported type for store_like: '{type(store_like).__name__}'" # type: ignore[unreachable]
raise TypeError(msg)
Expand Down
1 change: 1 addition & 0 deletions tests/v3/test_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ def test_group_array_creation(
assert empty_array.fill_value == 0
assert empty_array.shape == shape
assert empty_array.store_path.store == store
assert empty_array.store_path.path == "empty"

empty_like_array = group.empty_like(name="empty_like", data=empty_array)
assert isinstance(empty_like_array, Array)
Expand Down
28 changes: 28 additions & 0 deletions tests/v3/test_store/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@
from pathlib import Path

import pytest
from upath import UPath

from zarr.storage._utils import normalize_path
from zarr.storage.common import StoreLike, StorePath, make_store_path
from zarr.storage.local import LocalStore
from zarr.storage.memory import MemoryStore
Expand Down Expand Up @@ -65,3 +67,29 @@ async def test_make_store_path_storage_options_raises(store_like: StoreLike) ->
async def test_unsupported() -> None:
with pytest.raises(TypeError, match="Unsupported type for store_like: 'int'"):
await make_store_path(1) # type: ignore[arg-type]


@pytest.mark.parametrize(
"path",
[
"/foo/bar",
"//foo/bar",
"foo///bar",
"foo/bar///",
Path("foo/bar"),
b"foo/bar",
UPath("foo/bar"),
],
)
def test_normalize_path_valid(path: str | bytes | Path | UPath) -> None:
assert normalize_path(path) == "foo/bar"


def test_normalize_path_none():
assert normalize_path(None) == ""


@pytest.mark.parametrize("path", [".", ".."])
def test_normalize_path_invalid(path: str):
with pytest.raises(ValueError):
normalize_path(path)

0 comments on commit e331a3b

Please sign in to comment.