Skip to content

Commit

Permalink
Merge branch 'main' of github.com:zarr-developers/zarr-python into fe…
Browse files Browse the repository at this point in the history
…at/store-paths
  • Loading branch information
d-v-b committed Oct 18, 2024
2 parents 9d0d04b + 5f3a512 commit 471740b
Show file tree
Hide file tree
Showing 44 changed files with 201 additions and 67 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ['3.11', '3.12']
python-version: ['3.11', '3.12', '3.13']
numpy-version: ['1.25', '1.26', '2.0']
dependency-set: ["minimal", "optional"]

Expand Down
36 changes: 18 additions & 18 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ classifiers = [
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3.13',
]
license = {text = "MIT License"}
keywords = ["Python", "compressed", "ndimensional-arrays", "zarr"]
Expand Down Expand Up @@ -132,17 +133,17 @@ dependencies = [
features = ["test", "extra"]

[[tool.hatch.envs.test.matrix]]
python = ["3.11", "3.12"]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
version = ["minimal"]

[[tool.hatch.envs.test.matrix]]
python = ["3.11", "3.12"]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
features = ["optional"]

[[tool.hatch.envs.test.matrix]]
python = ["3.11", "3.12"]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
features = ["gpu"]

Expand All @@ -152,7 +153,7 @@ run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pypro
run = "run-coverage --no-cov"
run-verbose = "run-coverage --verbose"
run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*"
run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
list-env = "pip list"

[tool.hatch.envs.gputest]
Expand All @@ -163,7 +164,7 @@ dependencies = [
features = ["test", "extra", "gpu"]

[[tool.hatch.envs.gputest.matrix]]
python = ["3.11", "3.12"]
python = ["3.11", "3.12", "3.13"]
numpy = ["1.25", "1.26", "2.0"]
version = ["minimal"]

Expand All @@ -172,7 +173,7 @@ run-coverage = "pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov=tests"
run = "run-coverage --no-cov"
run-verbose = "run-coverage --verbose"
run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-profile ci tests/v3/test_properties.py tests/v3/test_store/test_stateful*"
run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
list-env = "pip list"

[tool.hatch.envs.docs]
Expand Down Expand Up @@ -281,18 +282,17 @@ ignore_errors = true

[[tool.mypy.overrides]]
module = [
"tests.v2.*",
"tests.v3.package_with_entrypoint.*",
"tests.v3.test_codecs.test_codecs",
"tests.v3.test_codecs.test_transpose",
"tests.v3.test_metadata.*",
"tests.v3.test_store.*",
"tests.v3.test_config",
"tests.v3.test_group",
"tests.v3.test_indexing",
"tests.v3.test_properties",
"tests.v3.test_sync",
"tests.v3.test_v2",
"tests.package_with_entrypoint.*",
"tests.test_codecs.test_codecs",
"tests.test_codecs.test_transpose",
"tests.test_metadata.*",
"tests.test_store.*",
"tests.test_config",
"tests.test_group",
"tests.test_indexing",
"tests.test_properties",
"tests.test_sync",
"tests.test_v2",
]
ignore_errors = true

Expand Down
1 change: 0 additions & 1 deletion src/zarr/codecs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ async def decode_batch(
) -> Iterable[NDBuffer | None]:
chunk_bytes_batch: Iterable[Buffer | None]
chunk_bytes_batch, chunk_specs = _unzip2(chunk_bytes_and_specs)

(
aa_codecs_with_spec,
ab_codec_with_spec,
Expand Down
7 changes: 4 additions & 3 deletions src/zarr/codecs/sharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def get_chunk_slice(self, chunk_coords: ChunkCoords) -> tuple[int, int] | None:
if (chunk_start, chunk_len) == (MAX_UINT_64, MAX_UINT_64):
return None
else:
return (int(chunk_start), int(chunk_start) + int(chunk_len))
return (int(chunk_start), int(chunk_len))

def set_chunk_slice(self, chunk_coords: ChunkCoords, chunk_slice: slice | None) -> None:
localized_chunk = self._localize_chunk(chunk_coords)
Expand Down Expand Up @@ -203,7 +203,7 @@ def create_empty(
def __getitem__(self, chunk_coords: ChunkCoords) -> Buffer:
chunk_byte_slice = self.index.get_chunk_slice(chunk_coords)
if chunk_byte_slice:
return self.buf[chunk_byte_slice[0] : chunk_byte_slice[1]]
return self.buf[chunk_byte_slice[0] : (chunk_byte_slice[0] + chunk_byte_slice[1])]
raise KeyError

def __len__(self) -> int:
Expand Down Expand Up @@ -265,7 +265,8 @@ async def finalize(
) -> Buffer:
index_bytes = await index_encoder(self.index)
if index_location == ShardingCodecIndexLocation.start:
self.index.offsets_and_lengths[..., 0] += len(index_bytes)
empty_chunks_mask = self.index.offsets_and_lengths[..., 0] == MAX_UINT_64
self.index.offsets_and_lengths[~empty_chunks_mask, 0] += len(index_bytes)
index_bytes = await index_encoder(self.index) # encode again with corrected offsets
out_buf = index_bytes + self.buf
else:
Expand Down
88 changes: 50 additions & 38 deletions src/zarr/core/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -572,7 +572,10 @@ def _from_bytes_v2(

@classmethod
def _from_bytes_v3(
cls, store_path: StorePath, zarr_json_bytes: Buffer, use_consolidated: bool | None
cls,
store_path: StorePath,
zarr_json_bytes: Buffer,
use_consolidated: bool | None,
) -> AsyncGroup:
group_metadata = json.loads(zarr_json_bytes.to_bytes())
if use_consolidated and group_metadata.get("consolidated_metadata") is None:
Expand Down Expand Up @@ -666,14 +669,33 @@ def _getitem_consolidated(
# the caller needs to verify this!
assert self.metadata.consolidated_metadata is not None

try:
metadata = self.metadata.consolidated_metadata.metadata[key]
except KeyError as e:
# The Group Metadata has consolidated metadata, but the key
# isn't present. We trust this to mean that the key isn't in
# the hierarchy, and *don't* fall back to checking the store.
msg = f"'{key}' not found in consolidated metadata."
raise KeyError(msg) from e
# we support nested getitems like group/subgroup/array
indexers = key.split("/")
indexers.reverse()
metadata: ArrayV2Metadata | ArrayV3Metadata | GroupMetadata = self.metadata

while indexers:
indexer = indexers.pop()
if isinstance(metadata, ArrayV2Metadata | ArrayV3Metadata):
# we've indexed into an array with group["array/subarray"]. Invalid.
raise KeyError(key)
if metadata.consolidated_metadata is None:
# we've indexed into a group without consolidated metadata.
# This isn't normal; typically, consolidated metadata
# will include explicit markers for when there are no child
# nodes as metadata={}.
# We have some freedom in exactly how we interpret this case.
# For now, we treat None as the same as {}, i.e. we don't
# have any children.
raise KeyError(key)
try:
metadata = metadata.consolidated_metadata.metadata[indexer]
except KeyError as e:
# The Group Metadata has consolidated metadata, but the key
# isn't present. We trust this to mean that the key isn't in
# the hierarchy, and *don't* fall back to checking the store.
msg = f"'{key}' not found in consolidated metadata."
raise KeyError(msg) from e

# update store_path to ensure that AsyncArray/Group.name is correct
if prefix != "/":
Expand Down Expand Up @@ -932,11 +954,7 @@ async def create_array(

@deprecated("Use AsyncGroup.create_array instead.")
async def create_dataset(
self,
name: str,
*,
shape: ShapeLike,
**kwargs: Any,
self, name: str, **kwargs: Any
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Create an array.
Expand All @@ -947,8 +965,6 @@ async def create_dataset(
----------
name : str
Array name.
shape : int or tuple of ints
Array shape.
kwargs : dict
Additional arguments passed to :func:`zarr.AsyncGroup.create_array`.
Expand All @@ -959,7 +975,7 @@ async def create_dataset(
.. deprecated:: 3.0.0
The h5py compatibility methods will be removed in 3.1.0. Use `AsyncGroup.create_array` instead.
"""
return await self.create_array(name, shape=shape, **kwargs)
return await self.create_array(name, **kwargs)

@deprecated("Use AsyncGroup.require_array instead.")
async def require_dataset(
Expand Down Expand Up @@ -1081,6 +1097,8 @@ async def nmembers(
-------
count : int
"""
# check if we can use consolidated metadata, which requires that we have non-None
# consolidated metadata at all points in the hierarchy.
if self.metadata.consolidated_metadata is not None:
return len(self.metadata.consolidated_metadata.flattened_metadata)
# TODO: consider using aioitertools.builtins.sum for this
Expand All @@ -1094,7 +1112,8 @@ async def members(
self,
max_depth: int | None = 0,
) -> AsyncGenerator[
tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None
tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup],
None,
]:
"""
Returns an AsyncGenerator over the arrays and groups contained in this group.
Expand Down Expand Up @@ -1125,12 +1144,12 @@ async def members(
async def _members(
self, max_depth: int | None, current_depth: int
) -> AsyncGenerator[
tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None
tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup],
None,
]:
if self.metadata.consolidated_metadata is not None:
# we should be able to do members without any additional I/O
members = self._members_consolidated(max_depth, current_depth)

for member in members:
yield member
return
Expand Down Expand Up @@ -1186,7 +1205,8 @@ async def _members(
def _members_consolidated(
self, max_depth: int | None, current_depth: int, prefix: str = ""
) -> Generator[
tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup], None
tuple[str, AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | AsyncGroup],
None,
]:
consolidated_metadata = self.metadata.consolidated_metadata

Expand Down Expand Up @@ -1271,7 +1291,11 @@ async def full(
self, *, name: str, shape: ChunkCoords, fill_value: Any | None, **kwargs: Any
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
return await async_api.full(
shape=shape, fill_value=fill_value, store=self.store_path, path=name, **kwargs
shape=shape,
fill_value=fill_value,
store=self.store_path,
path=name,
**kwargs,
)

async def empty_like(
Expand Down Expand Up @@ -1627,13 +1651,7 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array:
return Array(self._sync(self._async_group.create_dataset(name, **kwargs)))

@deprecated("Use Group.require_array instead.")
def require_dataset(
self,
name: str,
*,
shape: ShapeLike,
**kwargs: Any,
) -> Array:
def require_dataset(self, name: str, **kwargs: Any) -> Array:
"""Obtain an array, creating if it doesn't exist.
Arrays are known as "datasets" in HDF5 terminology. For compatibility
Expand All @@ -1660,15 +1678,9 @@ def require_dataset(
.. deprecated:: 3.0.0
The h5py compatibility methods will be removed in 3.1.0. Use `Group.require_array` instead.
"""
return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs)))
return Array(self._sync(self._async_group.require_array(name, **kwargs)))

def require_array(
self,
name: str,
*,
shape: ShapeLike,
**kwargs: Any,
) -> Array:
def require_array(self, name: str, **kwargs: Any) -> Array:
"""Obtain an array, creating if it doesn't exist.
Expand All @@ -1690,7 +1702,7 @@ def require_array(
-------
a : Array
"""
return Array(self._sync(self._async_group.require_array(name, shape=shape, **kwargs)))
return Array(self._sync(self._async_group.require_array(name, **kwargs)))

def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array:
return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs)))
Expand Down
17 changes: 13 additions & 4 deletions src/zarr/testing/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any, cast
from collections.abc import Callable, Coroutine
from typing import TYPE_CHECKING, Any, TypeVar, cast

import pytest

Expand Down Expand Up @@ -37,8 +38,16 @@ def has_cupy() -> bool:
return False


T_Callable = TypeVar("T_Callable", bound=Callable[[], Coroutine[Any, Any, None]])


# Decorator for GPU tests
def gpu_test(func: Any) -> Any:
return pytest.mark.gpu(
pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(func)
def gpu_test(func: T_Callable) -> T_Callable:
return cast(
T_Callable,
pytest.mark.gpu(
pytest.mark.skipif(not has_cupy(), reason="CuPy not installed or no GPU available")(
func
)
),
)
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 471740b

Please sign in to comment.