Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…

…at/store-paths
zarr-developers · Oct 18, 2024 · 471740b · 471740b
2 parents 9d0d04b + 5f3a512
commit 471740b
Show file tree

Hide file tree

Showing 44 changed files with 201 additions and 67 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -21,7 +21,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python-version: ['3.11', '3.12']
+        python-version: ['3.11', '3.12', '3.13']
         numpy-version: ['1.25', '1.26', '2.0']
         dependency-set: ["minimal", "optional"]
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -49,6 +49,7 @@ classifiers = [
     'Programming Language :: Python :: 3',
     'Programming Language :: Python :: 3.11',
     'Programming Language :: Python :: 3.12',
+    'Programming Language :: Python :: 3.13',
 ]
 license = {text = "MIT License"}
 keywords = ["Python", "compressed", "ndimensional-arrays", "zarr"]
@@ -132,17 +133,17 @@ dependencies = [
 features = ["test", "extra"]
 
 [[tool.hatch.envs.test.matrix]]
-python = ["3.11", "3.12"]
+python = ["3.11", "3.12", "3.13"]
 numpy = ["1.25", "1.26", "2.0"]
 version = ["minimal"]
 
 [[tool.hatch.envs.test.matrix]]
-python = ["3.11", "3.12"]
+python = ["3.11", "3.12", "3.13"]
 numpy = ["1.25", "1.26", "2.0"]
 features = ["optional"]
 
 [[tool.hatch.envs.test.matrix]]
-python = ["3.11", "3.12"]
+python = ["3.11", "3.12", "3.13"]
 numpy = ["1.25", "1.26", "2.0"]
 features = ["gpu"]
 
@@ -152,7 +153,7 @@ run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pypro
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
-run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*"
+run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
 list-env = "pip list"
 
 [tool.hatch.envs.gputest]
@@ -163,7 +164,7 @@ dependencies = [
 features = ["test", "extra", "gpu"]
 
 [[tool.hatch.envs.gputest.matrix]]
-python = ["3.11", "3.12"]
+python = ["3.11", "3.12", "3.13"]
 numpy = ["1.25", "1.26", "2.0"]
 version = ["minimal"]
 
@@ -172,7 +173,7 @@ run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests"
 run = "run-coverage --no-cov"
 run-verbose = "run-coverage --verbose"
 run-mypy = "mypy src"
-run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*"
+run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
 list-env = "pip list"
 
 [tool.hatch.envs.docs]
@@ -281,18 +282,17 @@ ignore_errors = true
 
 [[tool.mypy.overrides]]
 module = [
-    "tests.v2.*",
-    "tests.v3.package_with_entrypoint.*",
-    "tests.v3.test_codecs.test_codecs",
-    "tests.v3.test_codecs.test_transpose",
-    "tests.v3.test_metadata.*",
-    "tests.v3.test_store.*",
-    "tests.v3.test_config",
-    "tests.v3.test_group",
-    "tests.v3.test_indexing",
-    "tests.v3.test_properties",
-    "tests.v3.test_sync",
-    "tests.v3.test_v2",
+    "tests.package_with_entrypoint.*",
+    "tests.test_codecs.test_codecs",
+    "tests.test_codecs.test_transpose",
+    "tests.test_metadata.*",
+    "tests.test_store.*",
+    "tests.test_config",
+    "tests.test_group",
+    "tests.test_indexing",
+    "tests.test_properties",
+    "tests.test_sync",
+    "tests.test_v2",
 ]
 ignore_errors = true
 

diff --git a/src/zarr/codecs/pipeline.py b/src/zarr/codecs/pipeline.py
@@ -162,7 +162,6 @@ async def decode_batch(
     ) -> Iterable[NDBuffer | None]:
         chunk_bytes_batch: Iterable[Buffer | None]
         chunk_bytes_batch, chunk_specs = _unzip2(chunk_bytes_and_specs)
-
         (
             aa_codecs_with_spec,
             ab_codec_with_spec,

diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
@@ -129,7 +129,7 @@ def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None:
         if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64):
             return None
         else:
-            return (int(chunk_start), int(chunk_start) + int(chunk_len))
+            return (int(chunk_start), int(chunk_len))
 
     def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: slice | None) -> None:
         localized_chunk = self._localize_chunk(chunk_coords)
@@ -203,7 +203,7 @@ def create_empty(
     def __getitem__(self, chunk_coords: ChunkCoords) -> Buffer:
         chunk_byte_slice = self.index.get_chunk_slice(chunk_coords)
         if chunk_byte_slice:
-            return self.buf[chunk_byte_slice[0] : chunk_byte_slice[1]]
+            return self.buf[chunk_byte_slice[0] : (chunk_byte_slice[0] + chunk_byte_slice[1])]
         raise KeyError
 
     def __len__(self) -> int:
@@ -265,7 +265,8 @@ async def finalize(
     ) -> Buffer:
         index_bytes = await index_encoder(self.index)
         if index_location == ShardingCodecIndexLocation.start:
-            self.index.offsets_and_lengths[..., 0] += len(index_bytes)
+            empty_chunks_mask = self.index.offsets_and_lengths[..., 0] == MAX_UINT_64
+            self.index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes)
             index_bytes = await index_encoder(self.index)  # encode again with corrected offsets
             out_buf = index_bytes + self.buf
         else:

diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py
@@ -572,7 +572,10 @@ def _from_bytes_v2(
 
     @classmethod
     def _from_bytes_v3(
-        cls, store_path: StorePath, zarr_json_bytes: Buffer, use_consolidated: bool | None
+        cls,
+        store_path: StorePath,
+        zarr_json_bytes: Buffer,
+        use_consolidated: bool | None,
     ) -> AsyncGroup:
         group_metadata = json.loads(zarr_json_bytes.to_bytes())
         if use_consolidated and group_metadata.get("consolidated_metadata") is None:
@@ -666,14 +669,33 @@ def _getitem_consolidated(
         # the caller needs to verify this!
         assert self.metadata.consolidated_metadata is not None
 
-        try:
-            metadata = self.metadata.consolidated_metadata.metadata[key]
-        except KeyError as e:
-            # The Group Metadata has consolidated metadata, but the key
-            # isn't present. We trust this to mean that the key isn't in
-            # the hierarchy, and *don't* fall back to checking the store.
-            msg = f"'{key}' not found in consolidated metadata."
-            raise KeyError(msg) from e
+        # we support nested getitems like group/subgroup/array
+        indexers = key.split("/")
+        indexers.reverse()
+        metadata: ArrayV2Metadata | ArrayV3Metadata | GroupMetadata = self.metadata
+
+        while indexers:
+            indexer = indexers.pop()
+            if isinstance(metadata, ArrayV2Metadata | ArrayV3Metadata):
+                # we've indexed into an array with group["array/subarray"]. Invalid.
+                raise KeyError(key)
+            if metadata.consolidated_metadata is None:
+                # we've indexed into a group without consolidated metadata.
+                # This isn't normal; typically, consolidated metadata
+                # will include explicit markers for when there are no child
+                # nodes as metadata={}.
+                # We have some freedom in exactly how we interpret this case.
+                # For now, we treat None as the same as {}, i.e. we don't
+                # have any children.
+                raise KeyError(key)
+            try:
+                metadata = metadata.consolidated_metadata.metadata[indexer]
+            except KeyError as e:
+                # The Group Metadata has consolidated metadata, but the key
+                # isn't present. We trust this to mean that the key isn't in
+                # the hierarchy, and *don't* fall back to checking the store.
+                msg = f"'{key}' not found in consolidated metadata."
+                raise KeyError(msg) from e
 
         # update store_path to ensure that AsyncArray/Group.name is correct
         if prefix != "/":
@@ -932,11 +954,7 @@ async def create_array(
 
     @deprecated("Use AsyncGroup.create_array instead.")
     async def create_dataset(
-        self,
-        name: str,
-        *,
-        shape: ShapeLike,
-        **kwargs: Any,
+        self, name: str, **kwargs: Any
     ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
         """Create an array.
 
@@ -947,8 +965,6 @@ async def create_dataset(
         ----------
         name : str
             Array name.
-        shape : int or tuple of ints
-            Array shape.
         kwargs : dict
             Additional arguments passed to :func:`zarr.AsyncGroup.create_array`.
 
@@ -959,7 +975,7 @@ async def create_dataset(
         .. deprecated:: 3.0.0
             The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead.
         """
-        return await self.create_array(name, shape=shape, **kwargs)
+        return await self.create_array(name, **kwargs)
 
     @deprecated("Use AsyncGroup.require_array instead.")
     async def require_dataset(
@@ -1081,6 +1097,8 @@ async def nmembers(
         -------
         count : int
         """
+        # check if we can use consolidated metadata, which requires that we have non-None
+        # consolidated metadata at all points in the hierarchy.
         if self.metadata.consolidated_metadata is not None:
             return len(self.metadata.consolidated_metadata.flattened_metadata)
         # TODO: consider using aioitertools.builtins.sum for this
@@ -1094,7 +1112,8 @@ async def members(
         self,
         max_depth: int | None = 0,
     ) -> AsyncGenerator[
-        tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None
+        tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup],
+        None,
     ]:
         """
         Returns an AsyncGenerator over the arrays and groups contained in this group.
@@ -1125,12 +1144,12 @@ async def members(
     async def _members(
         self, max_depth: int | None, current_depth: int
     ) -> AsyncGenerator[
-        tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None
+        tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup],
+        None,
     ]:
         if self.metadata.consolidated_metadata is not None:
             # we should be able to do members without any additional I/O
             members = self._members_consolidated(max_depth, current_depth)
-
             for member in members:
                 yield member
             return
@@ -1186,7 +1205,8 @@ async def _members(
     def _members_consolidated(
         self, max_depth: int | None, current_depth: int, prefix: str = ""
     ) -> Generator[
-        tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None
+        tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup],
+        None,
     ]:
         consolidated_metadata = self.metadata.consolidated_metadata
 
@@ -1271,7 +1291,11 @@ async def full(
         self, *, name: str, shape: ChunkCoords, fill_value: Any | None, **kwargs: Any
     ) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
         return await async_api.full(
-            shape=shape, fill_value=fill_value, store=self.store_path, path=name, **kwargs
+            shape=shape,
+            fill_value=fill_value,
+            store=self.store_path,
+            path=name,
+            **kwargs,
         )
 
     async def empty_like(
@@ -1627,13 +1651,7 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array:
         return Array(self._sync(self._async_group.create_dataset(name, **kwargs)))
 
     @deprecated("Use Group.require_array instead.")
-    def require_dataset(
-        self,
-        name: str,
-        *,
-        shape: ShapeLike,
-        **kwargs: Any,
-    ) -> Array:
+    def require_dataset(self, name: str, **kwargs: Any) -> Array:
         """Obtain an array, creating if it doesn't exist.
 
         Arrays are known as "datasets" in HDF5 terminology. For compatibility
@@ -1660,15 +1678,9 @@ def require_dataset(
         .. deprecated:: 3.0.0
             The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead.
         """
-        return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs)))
+        return Array(self._sync(self._async_group.require_array(name, **kwargs)))
 
-    def require_array(
-        self,
-        name: str,
-        *,
-        shape: ShapeLike,
-        **kwargs: Any,
-    ) -> Array:
+    def require_array(self, name: str, **kwargs: Any) -> Array:
         """Obtain an array, creating if it doesn't exist.
 
 
@@ -1690,7 +1702,7 @@ def require_array(
         -------
         a : Array
         """
-        return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs)))
+        return Array(self._sync(self._async_group.require_array(name, **kwargs)))
 
     def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array:
         return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs)))

diff --git a/src/zarr/testing/utils.py b/src/zarr/testing/utils.py
@@ -1,6 +1,7 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, cast
+from collections.abc import Callable, Coroutine
+from typing import TYPE_CHECKING, Any, TypeVar, cast
 
 import pytest
 
@@ -37,8 +38,16 @@ def has_cupy() -> bool:
         return False
 
 
+T_Callable = TypeVar("T_Callable", bound=Callable[[], Coroutine[Any, Any, None]])
+
+
 # Decorator for GPU tests
-def gpu_test(func: Any) -> Any:
-    return pytest.mark.gpu(
-        pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(func)
+def gpu_test(func: T_Callable) -> T_Callable:
+    return cast(
+        T_Callable,
+        pytest.mark.gpu(
+            pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(
+                func
+            )
+        ),
     )
diff --git a/tests/v3/conftest.py → tests/conftest.py b/tests/v3/conftest.py → tests/conftest.py
diff --git a/...entrypoint-0.1.dist-info/entry_points.txt → ...entrypoint-0.1.dist-info/entry_points.txt b/...entrypoint-0.1.dist-info/entry_points.txt → ...entrypoint-0.1.dist-info/entry_points.txt
diff --git a/tests/v3/package_with_entrypoint/__init__.py → tests/package_with_entrypoint/__init__.py b/tests/v3/package_with_entrypoint/__init__.py → tests/package_with_entrypoint/__init__.py
diff --git a/tests/v3/test_api.py → tests/test_api.py b/tests/v3/test_api.py → tests/test_api.py
diff --git a/tests/v3/test_array.py → tests/test_array.py b/tests/v3/test_array.py → tests/test_array.py
diff --git a/tests/v3/test_attributes.py → tests/test_attributes.py b/tests/v3/test_attributes.py → tests/test_attributes.py
diff --git a/tests/v3/test_buffer.py → tests/test_buffer.py b/tests/v3/test_buffer.py → tests/test_buffer.py
diff --git a/tests/v3/test_chunk_grids.py → tests/test_chunk_grids.py b/tests/v3/test_chunk_grids.py → tests/test_chunk_grids.py
diff --git a/tests/v3/test_codec_entrypoints.py → tests/test_codec_entrypoints.py b/tests/v3/test_codec_entrypoints.py → tests/test_codec_entrypoints.py
diff --git a/tests/v3/__init__.py → tests/test_codecs/__init__.py b/tests/v3/__init__.py → tests/test_codecs/__init__.py
diff --git a/tests/v3/test_codecs/test_blosc.py → tests/test_codecs/test_blosc.py b/tests/v3/test_codecs/test_blosc.py → tests/test_codecs/test_blosc.py
diff --git a/tests/v3/test_codecs/test_codecs.py → tests/test_codecs/test_codecs.py b/tests/v3/test_codecs/test_codecs.py → tests/test_codecs/test_codecs.py
diff --git a/tests/v3/test_codecs/test_endian.py → tests/test_codecs/test_endian.py b/tests/v3/test_codecs/test_endian.py → tests/test_codecs/test_endian.py
diff --git a/tests/v3/test_codecs/test_gzip.py → tests/test_codecs/test_gzip.py b/tests/v3/test_codecs/test_gzip.py → tests/test_codecs/test_gzip.py