From e492be23f8c36095a29902953e19a0892dd65c89 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Tue, 26 Mar 2024 21:04:57 +0100 Subject: [PATCH 01/15] feat: functional .children method for groups --- src/zarr/v3/group.py | 37 ++++++++++++++++++++++++++++++++++--- tests/test_group_v3.py | 4 +++- 2 files changed, 37 insertions(+), 4 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index acd5ca0d62..4a84cbed80 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -4,7 +4,17 @@ import asyncio import json import logging -from typing import Any, Dict, Literal, Optional, Union, AsyncIterator, Iterator, List +from typing import ( + Any, + AsyncGenerator, + Dict, + Literal, + Optional, + Union, + AsyncIterator, + Iterator, + List, +) from zarr.v3.abc.metadata import Metadata from zarr.v3.array import AsyncArray, Array @@ -271,8 +281,29 @@ def __repr__(self): async def nchildren(self) -> int: raise NotImplementedError - async def children(self) -> AsyncIterator[AsyncArray, AsyncGroup]: - raise NotImplementedError + async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: + """ + Returns an async iterator over the arrays and groups contained in this group. + """ + if not self.store_path.store.supports_listing: + msg = ( + f"The store associated with this group ({type(self.store_path.store)}) " + "does not support listing, " + "specifically the `list_dir` method. " + "This function requires a store that supports listing." + ) + + raise ValueError(msg) + subkeys = await self.store_path.store.list_dir(self.store_path.path) + # would be nice to make these special keys accessible programmatically, + # and scoped to specific zarr versions + subkeys_filtered = filter(lambda v: v not in ("zarr.json", ".zgroup", ".zattrs"), subkeys) + # might be smarter to wrap this in asyncio gather + for subkey in subkeys_filtered: + try: + yield await self.getitem(subkey) + except ValueError: + pass async def contains(self, child: str) -> bool: raise NotImplementedError diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 1498d6779b..01859bb7ae 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -21,13 +21,15 @@ def test_group(store_path) -> None: runtime_configuration=RuntimeConfiguration(), ) group = Group(agroup) - assert agroup.metadata is group.metadata # create two groups foo = group.create_group("foo") bar = foo.create_group("bar", attributes={"baz": "qux"}) + # check that bar is in the children of foo + assert foo.children == [bar] + # create an array from the "bar" group data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) arr = bar.create_array( From b7f66c7ecea7d79fb911da1a4d48d01f67b225ec Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 12:21:48 +0100 Subject: [PATCH 02/15] changes necessary for correctly generating list of children --- src/zarr/v3/group.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 4a84cbed80..84e0100d81 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -80,7 +80,7 @@ def to_dict(self) -> Dict[str, Any]: class AsyncGroup: metadata: GroupMetadata store_path: StorePath - runtime_configuration: RuntimeConfiguration + runtime_configuration: RuntimeConfiguration = RuntimeConfiguration() @classmethod async def create( @@ -164,11 +164,21 @@ async def getitem( store_path = self.store_path / key + # Note: + # in zarr-python v2, we first check if `key` references an Array, else if `key` references + # a group,using standalone `contains_array` and `contains_group` functions. These functions + # are reusable, but for v3 they would perform redundant I/O operations. + # Not clear how much of that strategy we want to keep here. + + # if `key` names an object in storage, it cannot be an array or group + if await store_path.exists(): + raise KeyError(key) + if self.metadata.zarr_format == 3: zarr_json_bytes = await (store_path / ZARR_JSON).get() if zarr_json_bytes is None: # implicit group? - logger.warning("group at {} is an implicit group", store_path) + logger.warning("group at %s is an implicit group", store_path) zarr_json = { "zarr_format": self.metadata.zarr_format, "node_type": "group", @@ -205,7 +215,7 @@ async def getitem( else: if zgroup_bytes is None: # implicit group? - logger.warning("group at {} is an implicit group", store_path) + logger.warning("group at %s is an implicit group", store_path) zgroup = ( json.loads(zgroup_bytes) if zgroup_bytes is not None @@ -283,13 +293,14 @@ async def nchildren(self) -> int: async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: """ - Returns an async iterator over the arrays and groups contained in this group. + Returns an AsyncGenerator over the arrays and groups contained in this group. + This method requires that `store_path.store` supports directory listing. """ if not self.store_path.store.supports_listing: msg = ( f"The store associated with this group ({type(self.store_path.store)}) " "does not support listing, " - "specifically the `list_dir` method. " + "specifically via the `list_dir` method. " "This function requires a store that supports listing." ) @@ -298,11 +309,13 @@ async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: # would be nice to make these special keys accessible programmatically, # and scoped to specific zarr versions subkeys_filtered = filter(lambda v: v not in ("zarr.json", ".zgroup", ".zattrs"), subkeys) - # might be smarter to wrap this in asyncio gather + # is there a better way to schedule this? for subkey in subkeys_filtered: try: yield await self.getitem(subkey) - except ValueError: + except KeyError: + # keyerror is raised when `subkey``names an object in the store + # in which case `subkey` cannot be the name of a sub-array or sub-group. pass async def contains(self, child: str) -> bool: @@ -436,7 +449,7 @@ def nchildren(self) -> int: return self._sync(self._async_group.nchildren) @property - def children(self) -> List[Array, Group]: + def children(self) -> List[Array | Group]: _children = self._sync_iter(self._async_group.children) return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] From c7b333a3064de462d5c590581f6fb70836202a56 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 12:22:18 +0100 Subject: [PATCH 03/15] add stand-alone test for group.children --- tests/test_group_v3.py | 58 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 54 insertions(+), 4 deletions(-) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 01859bb7ae..3c8e33db8d 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -4,6 +4,8 @@ from zarr.v3.group import AsyncGroup, Group, GroupMetadata from zarr.v3.store import LocalStore, StorePath from zarr.v3.config import RuntimeConfiguration +from zarr.v3.store.remote import RemoteStore +from zarr.v3.sync import sync @pytest.fixture @@ -13,8 +15,59 @@ def store_path(tmpdir): return p -def test_group(store_path) -> None: +@pytest.fixture(scope="function") +def local_store(tmpdir): + return LocalStore(str(tmpdir)) + +@pytest.fixture(scope="function") +def remote_store(): + return RemoteStore() + + +@pytest.mark.parametrize("store_type", ("local_store",)) +def test_group_children(store_type, request): + store: LocalStore | RemoteStore = request.getfixturevalue(store_type) + path = "group" + agroup = AsyncGroup( + metadata=GroupMetadata(), + store_path=StorePath(store=store, path=path), + ) + group = Group(agroup) + + subgroup = group.create_group("subgroup") + # make a sub-sub-subgroup, to ensure that the children calculation doesn't go + # too deep in the hierarchy + _ = subgroup.create_group("subsubgroup") + subarray = group.create_array( + "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True + ) + + # add an extra object to the domain of the group. + # the list of children should ignore this object. + sync(store.set(f"{path}/extra_object", b"000000")) + # add an extra object under a directory-like prefix in the domain of the group. + # this creates an implicit group called implicit_subgroup + sync(store.set(f"{path}/implicit_subgroup/extra_object", b"000000")) + # make the implicit subgroup + implicit_subgroup = Group( + AsyncGroup( + metadata=GroupMetadata(), + store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), + ) + ) + # note: this assertion is order-specific, but it is not clear + # if group.children guarantees a particular order for the children. + # If order is not guaranteed, then the better test is + # to compare two sets, but presently neither the group nor array classes are hashable. + observed = group.children + assert observed == [subarray, implicit_subgroup, subgroup] + + +@pytest.mark.parametrize("store_type", (("local_store",))) +def test_group(store_type, request) -> None: + store = request.getfixturevalue(store_type) + store_path = StorePath(store) agroup = AsyncGroup( metadata=GroupMetadata(), store_path=store_path, @@ -27,9 +80,6 @@ def test_group(store_path) -> None: foo = group.create_group("foo") bar = foo.create_group("bar", attributes={"baz": "qux"}) - # check that bar is in the children of foo - assert foo.children == [bar] - # create an array from the "bar" group data = np.arange(0, 4 * 4, dtype="uint16").reshape((4, 4)) arr = bar.create_array( From a64b3420523165d86946316e60dd46893d909846 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 12:32:35 +0100 Subject: [PATCH 04/15] give type hints a glow-up --- src/zarr/v3/group.py | 59 ++++++++++++++++++++++---------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 84e0100d81..5bce87376c 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -1,20 +1,19 @@ from __future__ import annotations +from typing import TYPE_CHECKING from dataclasses import asdict, dataclass, field, replace import asyncio import json import logging -from typing import ( - Any, - AsyncGenerator, - Dict, - Literal, - Optional, - Union, - AsyncIterator, - Iterator, - List, -) + +if TYPE_CHECKING: + from typing import ( + Any, + AsyncGenerator, + Literal, + AsyncIterator, + Iterator, + ) from zarr.v3.abc.metadata import Metadata from zarr.v3.array import AsyncArray, Array @@ -35,7 +34,7 @@ def parse_zarr_format(data: Any) -> Literal[2, 3]: # todo: convert None to empty dict -def parse_attributes(data: Any) -> Dict[str, Any]: +def parse_attributes(data: Any) -> dict[str, Any]: if data is None: return {} elif isinstance(data, dict) and all(map(lambda v: isinstance(v, str), data.keys())): @@ -46,12 +45,12 @@ def parse_attributes(data: Any) -> Dict[str, Any]: @dataclass(frozen=True) class GroupMetadata(Metadata): - attributes: Dict[str, Any] = field(default_factory=dict) + attributes: dict[str, Any] = field(default_factory=dict) zarr_format: Literal[2, 3] = 3 node_type: Literal["group"] = field(default="group", init=False) # todo: rename this, since it doesn't return bytes - def to_bytes(self) -> Dict[str, bytes]: + def to_bytes(self) -> dict[str, bytes]: if self.zarr_format == 3: return {ZARR_JSON: json.dumps(self.to_dict()).encode()} else: @@ -60,7 +59,7 @@ def to_bytes(self) -> Dict[str, bytes]: ZATTRS_JSON: json.dumps(self.attributes).encode(), } - def __init__(self, attributes: Dict[str, Any] = None, zarr_format: Literal[2, 3] = 3): + def __init__(self, attributes: dict[str, Any] = {}, zarr_format: Literal[2, 3] = 3): attributes_parsed = parse_attributes(attributes) zarr_format_parsed = parse_zarr_format(zarr_format) @@ -68,11 +67,11 @@ def __init__(self, attributes: Dict[str, Any] = None, zarr_format: Literal[2, 3] object.__setattr__(self, "zarr_format", zarr_format_parsed) @classmethod - def from_dict(cls, data: Dict[str, Any]) -> GroupMetadata: + def from_dict(cls, data: dict[str, Any]) -> GroupMetadata: assert data.pop("node_type", None) in ("group", None) return cls(**data) - def to_dict(self) -> Dict[str, Any]: + def to_dict(self) -> dict[str, Any]: return asdict(self) @@ -87,7 +86,7 @@ async def create( cls, store: StoreLike, *, - attributes: Optional[Dict[str, Any]] = None, + attributes: dict[str, Any] = {}, exists_ok: bool = False, zarr_format: Literal[2, 3] = 3, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), @@ -99,7 +98,7 @@ async def create( elif zarr_format == 2: assert not await (store_path / ZGROUP_JSON).exists() group = cls( - metadata=GroupMetadata(attributes=attributes or {}, zarr_format=zarr_format), + metadata=GroupMetadata(attributes=attributes, zarr_format=zarr_format), store_path=store_path, runtime_configuration=runtime_configuration, ) @@ -147,7 +146,7 @@ async def open( def from_dict( cls, store_path: StorePath, - data: Dict[str, Any], + data: dict[str, Any], runtime_configuration: RuntimeConfiguration, ) -> Group: group = cls( @@ -160,7 +159,7 @@ def from_dict( async def getitem( self, key: str, - ) -> Union[AsyncArray, AsyncGroup]: + ) -> AsyncArray | AsyncGroup: store_path = self.store_path / key @@ -267,7 +266,7 @@ async def create_array(self, path: str, **kwargs) -> AsyncArray: **kwargs, ) - async def update_attributes(self, new_attributes: Dict[str, Any]): + async def update_attributes(self, new_attributes: dict[str, Any]): # metadata.attributes is "frozen" so we simply clear and update the dict self.metadata.attributes.clear() self.metadata.attributes.update(new_attributes) @@ -374,7 +373,7 @@ def create( cls, store: StoreLike, *, - attributes: Optional[Dict[str, Any]] = None, + attributes: dict[str, Any] = {}, exists_ok: bool = False, runtime_configuration: RuntimeConfiguration = RuntimeConfiguration(), ) -> Group: @@ -401,7 +400,7 @@ def open( ) return cls(obj) - def __getitem__(self, path: str) -> Union[Array, Group]: + def __getitem__(self, path: str) -> Array | Group: obj = self._sync(self._async_group.getitem(path)) if isinstance(obj, AsyncArray): return Array(obj) @@ -421,7 +420,7 @@ def __setitem__(self, key, value): """__setitem__ is not supported in v3""" raise NotImplementedError - async def update_attributes_async(self, new_attributes: Dict[str, Any]) -> Group: + async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group: new_metadata = replace(self.metadata, attributes=new_attributes) # Write new metadata @@ -440,7 +439,7 @@ def attrs(self) -> Attributes: def info(self): return self._async_group.info - def update_attributes(self, new_attributes: Dict[str, Any]): + def update_attributes(self, new_attributes: dict[str, Any]): self._sync(self._async_group.update_attributes(new_attributes)) return self @@ -449,7 +448,7 @@ def nchildren(self) -> int: return self._sync(self._async_group.nchildren) @property - def children(self) -> List[Array | Group]: + def children(self) -> list[Array | Group]: _children = self._sync_iter(self._async_group.children) return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] @@ -459,14 +458,14 @@ def __contains__(self, child) -> bool: def group_keys(self) -> Iterator[str]: return self._sync_iter(self._async_group.group_keys) - def groups(self) -> List[Group]: + def groups(self) -> list[Group]: # TODO: in v2 this was a generator that return key: Group return [Group(obj) for obj in self._sync_iter(self._async_group.groups)] - def array_keys(self) -> List[str]: + def array_keys(self) -> list[str]: return self._sync_iter(self._async_group.array_keys) - def arrays(self) -> List[Array]: + def arrays(self) -> list[Array]: return [Array(obj) for obj in self._sync_iter(self._async_group.arrays)] def tree(self, expand=False, level=None) -> Any: From 3d11fc0edc175748e7268397dcb14a92bba8c07d Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 14:03:01 +0100 Subject: [PATCH 05/15] test: use separate assert statements to avoid platform-dependent ordering issues --- tests/test_group_v3.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 3c8e33db8d..37f90543b1 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -56,12 +56,15 @@ def test_group_children(store_type, request): store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), ) ) - # note: this assertion is order-specific, but it is not clear + # note: these assertions are order-independent, because it is not clear # if group.children guarantees a particular order for the children. - # If order is not guaranteed, then the better test is + # If order is not guaranteed, then the better version of this test is # to compare two sets, but presently neither the group nor array classes are hashable. observed = group.children - assert observed == [subarray, implicit_subgroup, subgroup] + assert len(observed) == 3 + assert subarray in observed + assert implicit_subgroup in observed + assert subgroup in observed @pytest.mark.parametrize("store_type", (("local_store",))) From cf34afc77cb766fa1d3a3d4c778b63f4d113199c Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 14:39:38 +0100 Subject: [PATCH 06/15] test: put fixtures in conftest, add MemoryStore fixture --- tests/conftest.py | 70 +++++++++++++++++++++++++++++++++++++++++- tests/test_group_v3.py | 36 +++++++++------------- 2 files changed, 83 insertions(+), 23 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index aa73b8691e..38f87a0d9d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,8 +1,76 @@ import pathlib - import pytest +import os + +from zarr.v3.store import LocalStore, StorePath, MemoryStore +from zarr.v3.store.remote import RemoteStore @pytest.fixture(params=[str, pathlib.Path]) def path_type(request): return request.param + + +@pytest.fixture() +def mock_s3(request): + # writable local S3 system + import shlex + import subprocess + import time + + if "BOTO_CONFIG" not in os.environ: # pragma: no cover + os.environ["BOTO_CONFIG"] = "/dev/null" + if "AWS_ACCESS_KEY_ID" not in os.environ: # pragma: no cover + os.environ["AWS_ACCESS_KEY_ID"] = "foo" + if "AWS_SECRET_ACCESS_KEY" not in os.environ: # pragma: no cover + os.environ["AWS_SECRET_ACCESS_KEY"] = "bar" + requests = pytest.importorskip("requests") + s3fs = pytest.importorskip("s3fs") + pytest.importorskip("moto") + port = 5555 + endpoint_uri = "http://127.0.0.1:%d/" % port + proc = subprocess.Popen( + shlex.split("moto_server s3 -p %d" % port), + stderr=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + ) + timeout = 5 + while timeout > 0: + try: + r = requests.get(endpoint_uri) + if r.ok: + break + except Exception: # pragma: no cover + pass + timeout -= 0.1 # pragma: no cover + time.sleep(0.1) # pragma: no cover + s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) + s3 = s3fs.S3FileSystem(anon=False, **s3so) + s3.mkdir("test") + request.cls.s3so = s3so + yield + proc.terminate() + proc.wait() + + +# todo: harmonize this with local_store fixture +@pytest.fixture +def store_path(tmpdir): + store = LocalStore(str(tmpdir)) + p = StorePath(store) + return p + + +@pytest.fixture(scope="function") +def local_store(tmpdir): + return LocalStore(str(tmpdir)) + + +@pytest.fixture(scope="function") +def remote_store(): + return RemoteStore() + + +@pytest.fixture(scope="function") +def memory_store(): + return MemoryStore() diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 37f90543b1..555374f5b3 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -1,33 +1,25 @@ +from __future__ import annotations +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from zarr.v3.store.remote import MemoryStore, LocalStore import pytest import numpy as np from zarr.v3.group import AsyncGroup, Group, GroupMetadata -from zarr.v3.store import LocalStore, StorePath +from zarr.v3.store import StorePath from zarr.v3.config import RuntimeConfiguration -from zarr.v3.store.remote import RemoteStore from zarr.v3.sync import sync - -@pytest.fixture -def store_path(tmpdir): - store = LocalStore(str(tmpdir)) - p = StorePath(store) - return p - - -@pytest.fixture(scope="function") -def local_store(tmpdir): - return LocalStore(str(tmpdir)) - - -@pytest.fixture(scope="function") -def remote_store(): - return RemoteStore() - - -@pytest.mark.parametrize("store_type", ("local_store",)) +# todo: put RemoteStore in here +@pytest.mark.parametrize("store_type", ("local_store", "memory_store")) def test_group_children(store_type, request): - store: LocalStore | RemoteStore = request.getfixturevalue(store_type) + """ + Test that `Group.children` returns correct values, i.e. the arrays and groups + (explicit and implicit) contained in that group. + """ + + store: LocalStore | MemoryStore = request.getfixturevalue(store_type) path = "group" agroup = AsyncGroup( metadata=GroupMetadata(), From 16cb22610781775c96b2d843b7c85ba5ce8e41e3 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 27 Mar 2024 14:48:08 +0100 Subject: [PATCH 07/15] docs: release notes --- docs/release.rst | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/release.rst b/docs/release.rst index 3ed47ff9f5..b78e709c0e 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -18,6 +18,12 @@ Release notes Unreleased (v3) --------------- +Enhancements +~~~~~~~~~~~~ + +* Implement listing of the sub-arrays and sub-groups for a V3 ``Group``. + By :user:`Davis Bennett ` :issue:`1726`. + Maintenance ~~~~~~~~~~~ From b28eaee2f328e8e9ee2aed9f51d14edf9d49630b Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Sat, 30 Mar 2024 18:14:14 +0100 Subject: [PATCH 08/15] test: remove prematurely-added mock s3 fixture --- tests/conftest.py | 43 ------------------------------------------- 1 file changed, 43 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 38f87a0d9d..40275ba62c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,5 @@ import pathlib import pytest -import os from zarr.v3.store import LocalStore, StorePath, MemoryStore from zarr.v3.store.remote import RemoteStore @@ -11,48 +10,6 @@ def path_type(request): return request.param -@pytest.fixture() -def mock_s3(request): - # writable local S3 system - import shlex - import subprocess - import time - - if "BOTO_CONFIG" not in os.environ: # pragma: no cover - os.environ["BOTO_CONFIG"] = "/dev/null" - if "AWS_ACCESS_KEY_ID" not in os.environ: # pragma: no cover - os.environ["AWS_ACCESS_KEY_ID"] = "foo" - if "AWS_SECRET_ACCESS_KEY" not in os.environ: # pragma: no cover - os.environ["AWS_SECRET_ACCESS_KEY"] = "bar" - requests = pytest.importorskip("requests") - s3fs = pytest.importorskip("s3fs") - pytest.importorskip("moto") - port = 5555 - endpoint_uri = "http://127.0.0.1:%d/" % port - proc = subprocess.Popen( - shlex.split("moto_server s3 -p %d" % port), - stderr=subprocess.DEVNULL, - stdout=subprocess.DEVNULL, - ) - timeout = 5 - while timeout > 0: - try: - r = requests.get(endpoint_uri) - if r.ok: - break - except Exception: # pragma: no cover - pass - timeout -= 0.1 # pragma: no cover - time.sleep(0.1) # pragma: no cover - s3so = dict(client_kwargs={"endpoint_url": endpoint_uri}, use_listings_cache=False) - s3 = s3fs.S3FileSystem(anon=False, **s3so) - s3.mkdir("test") - request.cls.s3so = s3so - yield - proc.terminate() - proc.wait() - - # todo: harmonize this with local_store fixture @pytest.fixture def store_path(tmpdir): From b762fa47da38d3a1664e3b74dc8f53c72c2e52fa Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 11:15:29 +0200 Subject: [PATCH 09/15] fix: Rename children to members; AsyncGroup.members yields tuples of (name, AsyncArray / AsyncGroup) pairs; Group.members repackages these into a dict. --- src/zarr/v3/group.py | 39 ++++++++++++++++++++++++++------------- tests/test_group_v3.py | 26 +++++++++++--------------- 2 files changed, 37 insertions(+), 28 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 5bce87376c..a30d7d1702 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -287,13 +287,15 @@ async def update_attributes(self, new_attributes: dict[str, Any]): def __repr__(self): return f"" - async def nchildren(self) -> int: + async def nmembers(self) -> int: raise NotImplementedError - async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: + async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], None]: """ Returns an AsyncGenerator over the arrays and groups contained in this group. This method requires that `store_path.store` supports directory listing. + + The results are not guaranteed to be ordered. """ if not self.store_path.store.supports_listing: msg = ( @@ -311,13 +313,17 @@ async def children(self) -> AsyncGenerator[AsyncArray, AsyncGroup]: # is there a better way to schedule this? for subkey in subkeys_filtered: try: - yield await self.getitem(subkey) + yield (subkey, await self.getitem(subkey)) except KeyError: - # keyerror is raised when `subkey``names an object in the store + # keyerror is raised when `subkey` names an object (in the object storage sense), + # as opposed to a prefix, in the store under the prefix associated with this group # in which case `subkey` cannot be the name of a sub-array or sub-group. + logger.warning( + "Object at %s is not recognized as a component of a Zarr hierarchy.", subkey + ) pass - async def contains(self, child: str) -> bool: + async def contains(self, member: str) -> bool: raise NotImplementedError async def group_keys(self) -> AsyncIterator[str]: @@ -444,16 +450,23 @@ def update_attributes(self, new_attributes: dict[str, Any]): return self @property - def nchildren(self) -> int: - return self._sync(self._async_group.nchildren) + def nmembers(self) -> int: + return self._sync(self._async_group.nmembers) @property - def children(self) -> list[Array | Group]: - _children = self._sync_iter(self._async_group.children) - return [Array(obj) if isinstance(obj, AsyncArray) else Group(obj) for obj in _children] - - def __contains__(self, child) -> bool: - return self._sync(self._async_group.contains(child)) + def members(self) -> dict[str, Array | Group]: + """ + Return the sub-arrays and sub-groups of this group as a `dict` of (name, array | group) + pairs + """ + _members = self._sync_iter(self._async_group.members) + return { + key: Array(value) if isinstance(value, AsyncArray) else Group(value) + for key, value in _members + } + + def __contains__(self, member) -> bool: + return self._sync(self._async_group.contains(member)) def group_keys(self) -> Iterator[str]: return self._sync_iter(self._async_group.group_keys) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 555374f5b3..204c255064 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -13,9 +13,9 @@ # todo: put RemoteStore in here @pytest.mark.parametrize("store_type", ("local_store", "memory_store")) -def test_group_children(store_type, request): +def test_group_members(store_type, request): """ - Test that `Group.children` returns correct values, i.e. the arrays and groups + Test that `Group.members` returns correct values, i.e. the arrays and groups (explicit and implicit) contained in that group. """ @@ -26,12 +26,14 @@ def test_group_children(store_type, request): store_path=StorePath(store=store, path=path), ) group = Group(agroup) + members_expected = {} - subgroup = group.create_group("subgroup") + members_expected["subgroup"] = group.create_group("subgroup") # make a sub-sub-subgroup, to ensure that the children calculation doesn't go # too deep in the hierarchy - _ = subgroup.create_group("subsubgroup") - subarray = group.create_array( + _ = members_expected["subgroup"].create_group("subsubgroup") + + members_expected["subarray"] = group.create_array( "subarray", shape=(100,), dtype="uint8", chunk_shape=(10,), exists_ok=True ) @@ -42,21 +44,15 @@ def test_group_children(store_type, request): # this creates an implicit group called implicit_subgroup sync(store.set(f"{path}/implicit_subgroup/extra_object", b"000000")) # make the implicit subgroup - implicit_subgroup = Group( + members_expected["implicit_subgroup"] = Group( AsyncGroup( metadata=GroupMetadata(), store_path=StorePath(store=store, path=f"{path}/implicit_subgroup"), ) ) - # note: these assertions are order-independent, because it is not clear - # if group.children guarantees a particular order for the children. - # If order is not guaranteed, then the better version of this test is - # to compare two sets, but presently neither the group nor array classes are hashable. - observed = group.children - assert len(observed) == 3 - assert subarray in observed - assert implicit_subgroup in observed - assert subgroup in observed + members_observed = group.members + # members are not guaranteed to be ordered, so sort before comparing + assert sorted(members_observed) == sorted(members_expected) @pytest.mark.parametrize("store_type", (("local_store",))) From 55742269a77f20f19114a7e077b675edf7332d77 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 13:20:32 +0200 Subject: [PATCH 10/15] fix: make Group.members return a tuple of str, Array | Group pairs --- src/zarr/v3/group.py | 10 +++++----- tests/test_group_v3.py | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index a30d7d1702..f2158d3b28 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -454,16 +454,16 @@ def nmembers(self) -> int: return self._sync(self._async_group.nmembers) @property - def members(self) -> dict[str, Array | Group]: + def members(self) -> tuple[tuple[str, Array | Group], ...]: """ - Return the sub-arrays and sub-groups of this group as a `dict` of (name, array | group) + Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ _members = self._sync_iter(self._async_group.members) - return { - key: Array(value) if isinstance(value, AsyncArray) else Group(value) + return tuple( + (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) for key, value in _members - } + ) def __contains__(self, member) -> bool: return self._sync(self._async_group.contains(member)) diff --git a/tests/test_group_v3.py b/tests/test_group_v3.py index 204c255064..6b1f78df60 100644 --- a/tests/test_group_v3.py +++ b/tests/test_group_v3.py @@ -52,7 +52,7 @@ def test_group_members(store_type, request): ) members_observed = group.members # members are not guaranteed to be ordered, so sort before comparing - assert sorted(members_observed) == sorted(members_expected) + assert sorted(dict(members_observed)) == sorted(members_expected) @pytest.mark.parametrize("store_type", (("local_store",))) From d634cbf211f24290f8d282b2479df1566e317f85 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Thu, 11 Apr 2024 20:47:28 +0200 Subject: [PATCH 11/15] fix: revert changes to synchronization code; this is churn that we need to deal with --- src/zarr/v3/group.py | 6 +++++- src/zarr/v3/sync.py | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index ce5a3a3e58..a93f8404e9 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -437,6 +437,10 @@ async def update_attributes_async(self, new_attributes: dict[str, Any]) -> Group async_group = replace(self._async_group, metadata=new_metadata) return replace(self, _async_group=async_group) + @property + def store_path(self) -> StorePath: + return self._async_group.store_path + @property def metadata(self) -> GroupMetadata: return self._async_group.metadata @@ -463,7 +467,7 @@ def members(self) -> tuple[tuple[str, Array | Group], ...]: Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ - _members = self._sync_iter(self._async_group.members) + _members: list[AsyncArray | AsyncGroup] = self._sync_iter(self._async_group.members) return tuple( (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) for key, value in _members diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index 2e94a815cc..592ce8b75b 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -113,7 +113,7 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: async def iter_to_list() -> List[T]: # TODO: replace with generators so we don't materialize the entire iterator at once - async_iterator = await coroutine - return [item async for item in async_iterator] + # async_iterator = await coroutine + return [item async for item in coroutine()] return self._sync(iter_to_list()) From 66f71cc53a4eff414268267562d0b987d5fb55c8 Mon Sep 17 00:00:00 2001 From: Joseph Hamman Date: Mon, 22 Apr 2024 06:01:39 -0700 Subject: [PATCH 12/15] make mypy happy --- src/zarr/v3/group.py | 31 ++++++++++++++++++++++--------- src/zarr/v3/sync.py | 8 ++++---- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index a93f8404e9..12073f8878 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -459,7 +459,7 @@ def update_attributes(self, new_attributes: dict[str, Any]): @property def nmembers(self) -> int: - return self._sync(self._async_group.nmembers) + return self._sync(self._async_group.nmembers()) @property def members(self) -> tuple[tuple[str, Array | Group], ...]: @@ -467,27 +467,40 @@ def members(self) -> tuple[tuple[str, Array | Group], ...]: Return the sub-arrays and sub-groups of this group as a `tuple` of (name, array | group) pairs """ - _members: list[AsyncArray | AsyncGroup] = self._sync_iter(self._async_group.members) - return tuple( - (key, Array(value)) if isinstance(value, AsyncArray) else (key, Group(value)) - for key, value in _members + _members: list[tuple[str, AsyncArray | AsyncGroup]] = self._sync_iter( + self._async_group.members() ) + ret: list[tuple[str, Array | Group]] = [] + for key, value in _members: + if isinstance(value, AsyncArray): + ret.append((key, Array(value))) + else: + ret.append((key, Group(value))) + return tuple(ret) def __contains__(self, member) -> bool: return self._sync(self._async_group.contains(member)) def group_keys(self) -> list[str]: - return self._sync_iter(self._async_group.group_keys()) + # uncomment with AsyncGroup implements this method + # return self._sync_iter(self._async_group.group_keys()) + raise NotImplementedError def groups(self) -> list[Group]: # TODO: in v2 this was a generator that return key: Group - return [Group(obj) for obj in self._sync_iter(self._async_group.groups())] + # uncomment with AsyncGroup implements this method + # return [Group(obj) for obj in self._sync_iter(self._async_group.groups())] + raise NotImplementedError def array_keys(self) -> list[str]: - return self._sync_iter(self._async_group.array_keys) + # uncomment with AsyncGroup implements this method + # return self._sync_iter(self._async_group.array_keys) + raise NotImplementedError def arrays(self) -> list[Array]: - return [Array(obj) for obj in self._sync_iter(self._async_group.arrays)] + # uncomment with AsyncGroup implements this method + # return [Array(obj) for obj in self._sync_iter(self._async_group.arrays)] + raise NotImplementedError def tree(self, expand=False, level=None) -> Any: return self._sync(self._async_group.tree(expand=expand, level=level)) diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py index d8374feaa5..4ee27707f5 100644 --- a/src/zarr/v3/sync.py +++ b/src/zarr/v3/sync.py @@ -111,10 +111,10 @@ def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: # this should allow us to better type the sync wrapper return sync(coroutine, loop=self._sync_configuration.asyncio_loop) - def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: - async def iter_to_list() -> List[T]: + def _sync_iter(self, async_gen: AsyncIterator[T]) -> List[T]: + async def iter_to_list(gen) -> list[T]: # TODO: replace with generators so we don't materialize the entire iterator at once # async_iterator = await coroutine - return [item async for item in coroutine()] + return [item async for item in gen] - return self._sync(iter_to_list()) + return self._sync(iter_to_list(async_gen)) From ad55c3f460f4c371ccc91f4a2513143b7ad27a19 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 24 Apr 2024 13:14:21 +0200 Subject: [PATCH 13/15] feat: implement member-specific iteration methods in asyncgroup --- src/zarr/v3/group.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 12073f8878..d31d5acfea 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -326,17 +326,29 @@ async def members(self) -> AsyncGenerator[tuple[str, AsyncArray | AsyncGroup], N async def contains(self, member: str) -> bool: raise NotImplementedError - async def group_keys(self) -> AsyncIterator[str]: - raise NotImplementedError - - async def groups(self) -> AsyncIterator[AsyncGroup]: - raise NotImplementedError - - async def array_keys(self) -> AsyncIterator[str]: - raise NotImplementedError + # todo: decide if this method should be separate from `groups` + async def group_keys(self) -> AsyncGenerator[str, None]: + async for key, value in self.members(): + if isinstance(value, AsyncGroup): + yield key + + # todo: decide if this method should be separate from `group_keys` + async def groups(self) -> AsyncGenerator[AsyncGroup, None]: + async for key, value in self.members(): + if isinstance(value, AsyncGroup): + yield value + + # todo: decide if this method should be separate from `arrays` + async def array_keys(self) -> AsyncGenerator[str, None]: + async for key, value in self.members(): + if isinstance(value, AsyncArray): + yield key + # todo: decide if this method should be separate from `array_keys` async def arrays(self) -> AsyncIterator[AsyncArray]: - raise NotImplementedError + async for key, value in self.members(): + if isinstance(value, AsyncArray): + yield value async def tree(self, expand=False, level=None) -> Any: raise NotImplementedError From 7637624473137a03436fa9ac0f7f7458e7006132 Mon Sep 17 00:00:00 2001 From: Davis Bennett Date: Wed, 24 Apr 2024 14:32:31 +0200 Subject: [PATCH 14/15] chore: clean up some post-merge issues --- src/zarr/fixture/flat/.zarray | 23 ++++++++++++++++++ src/zarr/fixture/flat/0.0 | Bin 0 -> 48 bytes src/zarr/fixture/flat_legacy/.zarray | 22 +++++++++++++++++ src/zarr/fixture/flat_legacy/0.0 | Bin 0 -> 48 bytes src/zarr/fixture/nested/.zarray | 23 ++++++++++++++++++ src/zarr/fixture/nested/0/0 | Bin 0 -> 48 bytes src/zarr/fixture/nested_legacy/.zarray | 23 ++++++++++++++++++ src/zarr/fixture/nested_legacy/0/0 | Bin 0 -> 48 bytes tests/v2/conftest.py | 25 ------------------- tests/v3/conftest.py | 32 +++++++++++++++++++++++++ 10 files changed, 123 insertions(+), 25 deletions(-) create mode 100644 src/zarr/fixture/flat/.zarray create mode 100644 src/zarr/fixture/flat/0.0 create mode 100644 src/zarr/fixture/flat_legacy/.zarray create mode 100644 src/zarr/fixture/flat_legacy/0.0 create mode 100644 src/zarr/fixture/nested/.zarray create mode 100644 src/zarr/fixture/nested/0/0 create mode 100644 src/zarr/fixture/nested_legacy/.zarray create mode 100644 src/zarr/fixture/nested_legacy/0/0 create mode 100644 tests/v3/conftest.py diff --git a/src/zarr/fixture/flat/.zarray b/src/zarr/fixture/flat/.zarray new file mode 100644 index 0000000000..d1acce7665 --- /dev/null +++ b/src/zarr/fixture/flat/.zarray @@ -0,0 +1,23 @@ +{ + "chunks": [ + 2, + 2 + ], + "compressor": { + "blocksize": 0, + "clevel": 5, + "cname": "lz4", + "id": "blosc", + "shuffle": 1 + }, + "dimension_separator": ".", + "dtype": " Date: Wed, 24 Apr 2024 15:19:11 +0200 Subject: [PATCH 15/15] chore: remove extra directory added by test code --- src/zarr/fixture/flat/.zarray | 23 ----------------------- src/zarr/fixture/flat/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/flat_legacy/.zarray | 22 ---------------------- src/zarr/fixture/flat_legacy/0.0 | Bin 48 -> 0 bytes src/zarr/fixture/nested/.zarray | 23 ----------------------- src/zarr/fixture/nested/0/0 | Bin 48 -> 0 bytes src/zarr/fixture/nested_legacy/.zarray | 23 ----------------------- src/zarr/fixture/nested_legacy/0/0 | Bin 48 -> 0 bytes 8 files changed, 91 deletions(-) delete mode 100644 src/zarr/fixture/flat/.zarray delete mode 100644 src/zarr/fixture/flat/0.0 delete mode 100644 src/zarr/fixture/flat_legacy/.zarray delete mode 100644 src/zarr/fixture/flat_legacy/0.0 delete mode 100644 src/zarr/fixture/nested/.zarray delete mode 100644 src/zarr/fixture/nested/0/0 delete mode 100644 src/zarr/fixture/nested_legacy/.zarray delete mode 100644 src/zarr/fixture/nested_legacy/0/0 diff --git a/src/zarr/fixture/flat/.zarray b/src/zarr/fixture/flat/.zarray deleted file mode 100644 index d1acce7665..0000000000 --- a/src/zarr/fixture/flat/.zarray +++ /dev/null @@ -1,23 +0,0 @@ -{ - "chunks": [ - 2, - 2 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dimension_separator": ".", - "dtype": "