diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index c85648e0ff..7d82a95662 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -102,6 +102,11 @@ jobs:
- name: Run Tests
run: |
hatch env run --env ${{ matrix.dependency-set }} run
+ - name: Upload coverage
+ uses: codecov/codecov-action@v5
+ with:
+ token: ${{ secrets.CODECOV_TOKEN }}
+ verbose: true # optional (default = false)
doctests:
name: doctests
diff --git a/changes/2755.bugfix.rst b/changes/2755.bugfix.rst
deleted file mode 100644
index 2555369544..0000000000
--- a/changes/2755.bugfix.rst
+++ /dev/null
@@ -1,3 +0,0 @@
-The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the
-specified fill value, or with zeros if no fill value is provided.
-This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes.
\ No newline at end of file
diff --git a/changes/2758.bugfix.rst b/changes/2758.bugfix.rst
deleted file mode 100644
index 6b80f8a626..0000000000
--- a/changes/2758.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Fix zip-store path checking for stores with directories listed as files.
\ No newline at end of file
diff --git a/changes/2778.bugfix.rst b/changes/2778.bugfix.rst
deleted file mode 100644
index 2968c4441c..0000000000
--- a/changes/2778.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list`
\ No newline at end of file
diff --git a/changes/2781.bugfix.rst b/changes/2781.bugfix.rst
deleted file mode 100644
index 3673eeece7..0000000000
--- a/changes/2781.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Enable automatic removal of `needs release notes` with labeler action
\ No newline at end of file
diff --git a/changes/2785.bugfix.rst b/changes/2785.bugfix.rst
deleted file mode 100644
index 3f2b3111ea..0000000000
--- a/changes/2785.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Use the proper label config
\ No newline at end of file
diff --git a/changes/2799.bugfix.rst b/changes/2799.bugfix.rst
deleted file mode 100644
index f22b7074bb..0000000000
--- a/changes/2799.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types
\ No newline at end of file
diff --git a/changes/2801.bugfix.rst b/changes/2801.bugfix.rst
deleted file mode 100644
index 294934aacf..0000000000
--- a/changes/2801.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests
diff --git a/changes/2804.feature.rst b/changes/2804.feature.rst
deleted file mode 100644
index 5a707752a0..0000000000
--- a/changes/2804.feature.rst
+++ /dev/null
@@ -1 +0,0 @@
-:py:class:`LocalStore` learned to ``delete_dir``. This makes array and group deletes more efficient.
diff --git a/changes/2807.bugfix.rst b/changes/2807.bugfix.rst
deleted file mode 100644
index ae0eb2f6ac..0000000000
--- a/changes/2807.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Fix pickling for ZipStore
diff --git a/changes/2811.bugfix.rst b/changes/2811.bugfix.rst
deleted file mode 100644
index ef4e8eb7ed..0000000000
--- a/changes/2811.bugfix.rst
+++ /dev/null
@@ -1 +0,0 @@
-Update numcodecs to not overwrite codec configuration ever. Closes :issue:`2800`.
diff --git a/docs/developers/contributing.rst b/docs/developers/contributing.rst
index 220e24eced..de10fab2c6 100644
--- a/docs/developers/contributing.rst
+++ b/docs/developers/contributing.rst
@@ -230,6 +230,27 @@ during development at `http://0.0.0.0:8000/ `_. This can b
$ hatch --env docs run serve
+.. _changelog:
+
+Changelog
+~~~~~~~~~
+
+zarr-python uses `towncrier`_ to manage release notes. Most pull requests should
+include at least one news fragment describing the changes. To add a release
+note, you'll need the GitHub issue or pull request number and the type of your
+change (``feature``, ``bugfix``, ``doc``, ``removal``, ``misc``). With that, run
+```towncrier create``` with your development environment, which will prompt you
+for the issue number, change type, and the news text::
+
+ towncrier create
+
+Alternatively, you can manually create the files in the ``changes`` directory
+using the naming convention ``{issue-number}.{change-type}.rst``.
+
+See the `towncrier`_ docs for more.
+
+.. _towncrier: https://towncrier.readthedocs.io/en/stable/tutorial.html
+
Development best practices, policies and procedures
---------------------------------------------------
diff --git a/docs/release-notes.rst b/docs/release-notes.rst
index 08c64eb899..93466a0992 100644
--- a/docs/release-notes.rst
+++ b/docs/release-notes.rst
@@ -3,6 +3,45 @@ Release notes
.. towncrier release notes start
+3.0.3 (2025-02-14)
+------------------
+
+Features
+~~~~~~~~
+
+- Improves performance of FsspecStore.delete_dir for remote filesystems supporting concurrent/batched deletes, e.g., s3fs. (:issue:`2661`)
+- Added :meth:`zarr.config.enable_gpu` to update Zarr's configuration to use GPUs. (:issue:`2751`)
+- Avoid reading chunks during writes where possible. :issue:`757` (:issue:`2784`)
+- :py:class:`LocalStore` learned to ``delete_dir``. This makes array and group deletes more efficient. (:issue:`2804`)
+- Add `zarr.testing.strategies.array_metadata` to generate ArrayV2Metadata and ArrayV3Metadata instances. (:issue:`2813`)
+- Add arbitrary `shards` to Hypothesis strategy for generating arrays. (:issue:`2822`)
+
+
+Bugfixes
+~~~~~~~~
+
+- Fixed bug with Zarr using device memory, instead of host memory, for storing metadata when using GPUs. (:issue:`2751`)
+- The array returned by ``zarr.empty`` and an empty ``zarr.core.buffer.cpu.NDBuffer`` will now be filled with the
+ specified fill value, or with zeros if no fill value is provided.
+ This fixes a bug where Zarr format 2 data with no fill value was written with un-predictable chunk sizes. (:issue:`2755`)
+- Fix zip-store path checking for stores with directories listed as files. (:issue:`2758`)
+- Use removeprefix rather than replace when removing filename prefixes in `FsspecStore.list` (:issue:`2778`)
+- Enable automatic removal of `needs release notes` with labeler action (:issue:`2781`)
+- Use the proper label config (:issue:`2785`)
+- Alters the behavior of ``create_array`` to ensure that any groups implied by the array's name are created if they do not already exist. Also simplifies the type signature for any function that takes an ArrayConfig-like object. (:issue:`2795`)
+- Enitialise empty chunks to the default fill value during writing and add default fill values for datetime, timedelta, structured, and other (void* fixed size) data types (:issue:`2799`)
+- Ensure utf8 compliant strings are used to construct numpy arrays in property-based tests (:issue:`2801`)
+- Fix pickling for ZipStore (:issue:`2807`)
+- Update numcodecs to not overwrite codec configuration ever. Closes :issue:`2800`. (:issue:`2811`)
+- Fix fancy indexing (e.g. arr[5, [0, 1]]) with the sharding codec (:issue:`2817`)
+
+
+Improved Documentation
+~~~~~~~~~~~~~~~~~~~~~~
+
+- Added new user guide on :ref:`user-guide-gpu`. (:issue:`2751`)
+
+
3.0.2 (2025-01-31)
------------------
diff --git a/docs/user-guide/config.rst b/docs/user-guide/config.rst
index 3662f75dff..91ffe50b91 100644
--- a/docs/user-guide/config.rst
+++ b/docs/user-guide/config.rst
@@ -32,6 +32,7 @@ Configuration options include the following:
- Whether empty chunks are written to storage ``array.write_empty_chunks``
- Async and threading options, e.g. ``async.concurrency`` and ``threading.max_workers``
- Selections of implementations of codecs, codec pipelines and buffers
+- Enabling GPU support with ``zarr.config.enable_gpu()``. See :ref:`user-guide-gpu` for more.
For selecting custom implementations of codecs, pipelines, buffers and ndbuffers,
first register the implementations in the registry and then select them in the config.
diff --git a/docs/user-guide/gpu.rst b/docs/user-guide/gpu.rst
new file mode 100644
index 0000000000..4d3492f8bd
--- /dev/null
+++ b/docs/user-guide/gpu.rst
@@ -0,0 +1,37 @@
+.. _user-guide-gpu:
+
+Using GPUs with Zarr
+====================
+
+Zarr can use GPUs to accelerate your workload by running
+:meth:`zarr.config.enable_gpu`.
+
+.. note::
+
+ `zarr-python` currently supports reading the ndarray data into device (GPU)
+ memory as the final stage of the codec pipeline. Data will still be read into
+ or copied to host (CPU) memory for encoding and decoding.
+
+ In the future, codecs will be available compressing and decompressing data on
+ the GPU, avoiding the need to move data between the host and device for
+ compression and decompression.
+
+Reading data into device memory
+-------------------------------
+
+:meth:`zarr.config.enable_gpu` configures Zarr to use GPU memory for the data
+buffers used internally by Zarr.
+
+.. code-block:: python
+
+ >>> import zarr
+ >>> import cupy as cp # doctest: +SKIP
+ >>> zarr.config.enable_gpu() # doctest: +SKIP
+ >>> store = zarr.storage.MemoryStore() # doctest: +SKIP
+ >>> z = zarr.create_array( # doctest: +SKIP
+ ... store=store, shape=(100, 100), chunks=(10, 10), dtype="float32",
+ ... )
+ >>> type(z[:10, :10]) # doctest: +SKIP
+ cupy.ndarray
+
+Note that the output type is a ``cupy.ndarray`` rather than a NumPy array.
diff --git a/docs/user-guide/index.rst b/docs/user-guide/index.rst
index a7bbd12453..c50713332b 100644
--- a/docs/user-guide/index.rst
+++ b/docs/user-guide/index.rst
@@ -23,6 +23,7 @@ Advanced Topics
performance
consolidated_metadata
extending
+ gpu
.. Coming soon
diff --git a/pyproject.toml b/pyproject.toml
index ab285ff7ff..0137927039 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -212,11 +212,7 @@ dependencies = [
'typing_extensions @ git+https://github.com/python/typing_extensions',
'donfig @ git+https://github.com/pytroll/donfig',
# test deps
- 'hypothesis',
- 'pytest',
- 'pytest-cov',
- 'pytest-asyncio',
- 'moto[s3]',
+ 'zarr[test]',
]
[tool.hatch.envs.upstream.env-vars]
@@ -228,6 +224,9 @@ PIP_PRE = "1"
run = "pytest --verbose"
run-mypy = "mypy src"
run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
+run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
+run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
+run-coverage-html = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src"
list-env = "pip list"
[tool.hatch.envs.min_deps]
@@ -247,18 +246,16 @@ dependencies = [
'typing_extensions==4.9.*',
'donfig==0.8.*',
# test deps
- 'hypothesis',
- 'pytest',
- 'pytest-cov',
- 'pytest-asyncio',
- 'moto[s3]',
+ 'zarr[test]',
]
[tool.hatch.envs.min_deps.scripts]
run = "pytest --verbose"
run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*"
list-env = "pip list"
-
+run-coverage = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
+run-coverage-gpu = "pip install cupy-cuda12x && pytest -m gpu --cov-config=pyproject.toml --cov=pkg --cov-report xml --cov=src --junitxml=junit.xml -o junit_family=legacy"
+run-coverage-html = "pytest --cov-config=pyproject.toml --cov=pkg --cov-report html --cov=src"
[tool.ruff]
line-length = 100
diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py
index fabd042dbe..16400f5f4b 100644
--- a/src/zarr/abc/codec.py
+++ b/src/zarr/abc/codec.py
@@ -357,7 +357,7 @@ async def encode(
@abstractmethod
async def read(
self,
- batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
+ batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
out: NDBuffer,
drop_axes: tuple[int, ...] = (),
) -> None:
@@ -379,7 +379,7 @@ async def read(
@abstractmethod
async def write(
self,
- batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
+ batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
value: NDBuffer,
drop_axes: tuple[int, ...] = (),
) -> None:
diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py
index f562a995ce..8c032a7805 100644
--- a/src/zarr/api/asynchronous.py
+++ b/src/zarr/api/asynchronous.py
@@ -10,7 +10,7 @@
from typing_extensions import deprecated
from zarr.core.array import Array, AsyncArray, create_array, get_array_metadata
-from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
+from zarr.core.array_spec import ArrayConfig, ArrayConfigLike, ArrayConfigParams
from zarr.core.buffer import NDArrayLike
from zarr.core.common import (
JSON,
@@ -857,7 +857,7 @@ async def create(
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
**kwargs: Any,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Create an array.
@@ -1019,7 +1019,7 @@ async def create(
mode = "a"
store_path = await make_store_path(store, path=path, mode=mode, storage_options=storage_options)
- config_dict: ArrayConfigLike = {}
+ config_dict: ArrayConfigParams = {}
if write_empty_chunks is not None:
if config is not None:
diff --git a/src/zarr/api/synchronous.py b/src/zarr/api/synchronous.py
index 20c016ab51..b9f8ac0ae7 100644
--- a/src/zarr/api/synchronous.py
+++ b/src/zarr/api/synchronous.py
@@ -25,7 +25,7 @@
SerializerLike,
ShardsLike,
)
- from zarr.core.array_spec import ArrayConfig, ArrayConfigLike
+ from zarr.core.array_spec import ArrayConfigLike
from zarr.core.buffer import NDArrayLike, NDArrayLikeOrScalar
from zarr.core.chunk_key_encodings import ChunkKeyEncoding, ChunkKeyEncodingLike
from zarr.core.common import (
@@ -625,7 +625,7 @@ def create(
codecs: Iterable[Codec | dict[str, JSON]] | None = None,
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
**kwargs: Any,
) -> Array:
"""Create an array.
@@ -695,7 +695,7 @@ def create(
storage_options : dict
If using an fsspec URL to create the store, these will be passed to
the backend implementation. Ignored otherwise.
- config : ArrayConfig or ArrayConfigLike, optional
+ config : ArrayConfigLike, optional
Runtime configuration of the array. If provided, will override the
default values from `zarr.config.array`.
@@ -761,7 +761,7 @@ def create_array(
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
overwrite: bool = False,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> Array:
"""Create an array.
@@ -853,7 +853,7 @@ def create_array(
Ignored otherwise.
overwrite : bool, default False
Whether to overwrite an array with the same name in the store, if one exists.
- config : ArrayConfig or ArrayConfigLike, optional
+ config : ArrayConfigLike, optional
Runtime configuration for the array.
Returns
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index e8730c86dd..42b1313fac 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -455,8 +455,9 @@ async def _decode_single(
chunk_spec,
chunk_selection,
out_selection,
+ is_complete_shard,
)
- for chunk_coords, chunk_selection, out_selection in indexer
+ for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
],
out,
)
@@ -486,7 +487,7 @@ async def _decode_partial_single(
)
indexed_chunks = list(indexer)
- all_chunk_coords = {chunk_coords for chunk_coords, _, _ in indexed_chunks}
+ all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks}
# reading bytes of all requested chunks
shard_dict: ShardMapping = {}
@@ -524,12 +525,17 @@ async def _decode_partial_single(
chunk_spec,
chunk_selection,
out_selection,
+ is_complete_shard,
)
- for chunk_coords, chunk_selection, out_selection in indexer
+ for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
],
out,
)
- return out
+
+ if hasattr(indexer, "sel_shape"):
+ return out.reshape(indexer.sel_shape)
+ else:
+ return out
async def _encode_single(
self,
@@ -558,8 +564,9 @@ async def _encode_single(
chunk_spec,
chunk_selection,
out_selection,
+ is_complete_shard,
)
- for chunk_coords, chunk_selection, out_selection in indexer
+ for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
],
shard_array,
)
@@ -601,8 +608,9 @@ async def _encode_partial_single(
chunk_spec,
chunk_selection,
out_selection,
+ is_complete_shard,
)
- for chunk_coords, chunk_selection, out_selection in indexer
+ for chunk_coords, chunk_selection, out_selection, is_complete_shard in indexer
],
shard_array,
)
diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py
index d6a580320d..0e9d74ba8a 100644
--- a/src/zarr/core/array.py
+++ b/src/zarr/core/array.py
@@ -39,6 +39,7 @@
NDBuffer,
default_buffer_prototype,
)
+from zarr.core.buffer.cpu import buffer_prototype as cpu_buffer_prototype
from zarr.core.chunk_grids import RegularChunkGrid, _auto_partition, normalize_chunks
from zarr.core.chunk_key_encodings import (
ChunkKeyEncoding,
@@ -164,19 +165,20 @@ async def get_array_metadata(
) -> dict[str, JSON]:
if zarr_format == 2:
zarray_bytes, zattrs_bytes = await gather(
- (store_path / ZARRAY_JSON).get(), (store_path / ZATTRS_JSON).get()
+ (store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype),
+ (store_path / ZATTRS_JSON).get(prototype=cpu_buffer_prototype),
)
if zarray_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format == 3:
- zarr_json_bytes = await (store_path / ZARR_JSON).get()
+ zarr_json_bytes = await (store_path / ZARR_JSON).get(prototype=cpu_buffer_prototype)
if zarr_json_bytes is None:
raise FileNotFoundError(store_path)
elif zarr_format is None:
zarr_json_bytes, zarray_bytes, zattrs_bytes = await gather(
- (store_path / ZARR_JSON).get(),
- (store_path / ZARRAY_JSON).get(),
- (store_path / ZATTRS_JSON).get(),
+ (store_path / ZARR_JSON).get(prototype=cpu_buffer_prototype),
+ (store_path / ZARRAY_JSON).get(prototype=cpu_buffer_prototype),
+ (store_path / ZATTRS_JSON).get(prototype=cpu_buffer_prototype),
)
if zarr_json_bytes is not None and zarray_bytes is not None:
# warn and favor v3
@@ -220,7 +222,7 @@ class AsyncArray(Generic[T_ArrayMetadata]):
The metadata of the array.
store_path : StorePath
The path to the Zarr store.
- config : ArrayConfig, optional
+ config : ArrayConfigLike, optional
The runtime configuration of the array, by default None.
Attributes
@@ -245,7 +247,7 @@ def __init__(
self: AsyncArray[ArrayV2Metadata],
metadata: ArrayV2Metadata | ArrayV2MetadataDict,
store_path: StorePath,
- config: ArrayConfig | None = None,
+ config: ArrayConfigLike | None = None,
) -> None: ...
@overload
@@ -253,14 +255,14 @@ def __init__(
self: AsyncArray[ArrayV3Metadata],
metadata: ArrayV3Metadata | ArrayV3MetadataDict,
store_path: StorePath,
- config: ArrayConfig | None = None,
+ config: ArrayConfigLike | None = None,
) -> None: ...
def __init__(
self,
metadata: ArrayMetadata | ArrayMetadataDict,
store_path: StorePath,
- config: ArrayConfig | None = None,
+ config: ArrayConfigLike | None = None,
) -> None:
if isinstance(metadata, dict):
zarr_format = metadata["zarr_format"]
@@ -274,12 +276,11 @@ def __init__(
raise ValueError(f"Invalid zarr_format: {zarr_format}. Expected 2 or 3")
metadata_parsed = parse_array_metadata(metadata)
-
- config = ArrayConfig.from_dict({}) if config is None else config
+ config_parsed = parse_array_config(config)
object.__setattr__(self, "metadata", metadata_parsed)
object.__setattr__(self, "store_path", store_path)
- object.__setattr__(self, "_config", config)
+ object.__setattr__(self, "_config", config_parsed)
object.__setattr__(self, "codec_pipeline", create_codec_pipeline(metadata=metadata_parsed))
# this overload defines the function signature when zarr_format is 2
@@ -303,7 +304,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV2Metadata]: ...
# this overload defines the function signature when zarr_format is 3
@@ -332,7 +333,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV3Metadata]: ...
@overload
@@ -360,7 +361,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV3Metadata]: ...
@overload
@@ -394,7 +395,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]: ...
@classmethod
@@ -429,7 +430,7 @@ async def create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Method to create a new asynchronous array instance.
@@ -507,7 +508,7 @@ async def create(
Whether to raise an error if the store already exists (default is False).
data : npt.ArrayLike, optional
The data to be inserted into the array (default is None).
- config : ArrayConfig or ArrayConfigLike, optional
+ config : ArrayConfigLike, optional
Runtime configuration for the array.
Returns
@@ -570,7 +571,7 @@ async def _create(
# runtime
overwrite: bool = False,
data: npt.ArrayLike | None = None,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Method to create a new asynchronous array instance.
See :func:`AsyncArray.create` for more details.
@@ -1291,8 +1292,9 @@ async def _get_selection(
self.metadata.get_chunk_spec(chunk_coords, _config, prototype=prototype),
chunk_selection,
out_selection,
+ is_complete_chunk,
)
- for chunk_coords, chunk_selection, out_selection in indexer
+ for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
],
out_buffer,
drop_axes=indexer.drop_axes,
@@ -1350,7 +1352,7 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F
"""
Asynchronously save the array metadata.
"""
- to_save = metadata.to_buffer_dict(default_buffer_prototype())
+ to_save = metadata.to_buffer_dict(cpu_buffer_prototype)
awaitables = [set_or_delete(self.store_path / key, value) for key, value in to_save.items()]
if ensure_parents:
@@ -1362,7 +1364,7 @@ async def _save_metadata(self, metadata: ArrayMetadata, ensure_parents: bool = F
[
(parent.store_path / key).set_if_not_exists(value)
for key, value in parent.metadata.to_buffer_dict(
- default_buffer_prototype()
+ cpu_buffer_prototype
).items()
]
)
@@ -1420,8 +1422,9 @@ async def _set_selection(
self.metadata.get_chunk_spec(chunk_coords, _config, prototype),
chunk_selection,
out_selection,
+ is_complete_chunk,
)
- for chunk_coords, chunk_selection, out_selection in indexer
+ for chunk_coords, chunk_selection, out_selection, is_complete_chunk in indexer
],
value_buffer,
drop_axes=indexer.drop_axes,
@@ -1744,7 +1747,7 @@ def create(
compressor: dict[str, JSON] | None = None,
# runtime
overwrite: bool = False,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> Array:
"""Creates a new Array instance from an initialized store.
@@ -1873,7 +1876,7 @@ def _create(
compressor: dict[str, JSON] | None = None,
# runtime
overwrite: bool = False,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
) -> Array:
"""Creates a new Array instance from an initialized store.
See :func:`Array.create` for more details.
@@ -3814,7 +3817,8 @@ async def init_array(
chunk_key_encoding: ChunkKeyEncodingLike | None = None,
dimension_names: Iterable[str] | None = None,
overwrite: bool = False,
-) -> ArrayV3Metadata | ArrayV2Metadata:
+ config: ArrayConfigLike | None,
+) -> AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata]:
"""Create and persist an array metadata document.
Parameters
@@ -3893,11 +3897,13 @@ async def init_array(
Zarr format 3 only. Zarr format 2 arrays should not use this parameter.
overwrite : bool, default False
Whether to overwrite an array with the same name in the store, if one exists.
+ config : ArrayConfigLike or None, optional
+ Configuration for this array.
Returns
-------
- ArrayV3Metadata | ArrayV2Metadata
- The array metadata document.
+ AsyncArray
+ The AsyncArray.
"""
if zarr_format is None:
@@ -3997,14 +4003,9 @@ async def init_array(
attributes=attributes,
)
- # save the metadata to disk
- # TODO: make this easier -- it should be a simple function call that takes a {key: buffer}
- coros = (
- (store_path / key).set(value)
- for key, value in meta.to_buffer_dict(default_buffer_prototype()).items()
- )
- await gather(*coros)
- return meta
+ arr = AsyncArray(metadata=meta, store_path=store_path, config=config)
+ await arr._save_metadata(meta, ensure_parents=True)
+ return arr
async def create_array(
@@ -4027,7 +4028,7 @@ async def create_array(
dimension_names: Iterable[str] | None = None,
storage_options: dict[str, Any] | None = None,
overwrite: bool = False,
- config: ArrayConfig | ArrayConfigLike | None = None,
+ config: ArrayConfigLike | None = None,
write_data: bool = True,
) -> AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata]:
"""Create an array.
@@ -4117,7 +4118,7 @@ async def create_array(
Ignored otherwise.
overwrite : bool, default False
Whether to overwrite an array with the same name in the store, if one exists.
- config : ArrayConfig or ArrayConfigLike, optional
+ config : ArrayConfigLike, optional
Runtime configuration for the array.
write_data : bool
If a pre-existing array-like object was provided to this function via the ``data`` parameter
@@ -4143,13 +4144,12 @@ async def create_array(
"""
mode: Literal["a"] = "a"
- config_parsed = parse_array_config(config)
store_path = await make_store_path(store, path=name, mode=mode, storage_options=storage_options)
data_parsed, shape_parsed, dtype_parsed = _parse_data_params(
data=data, shape=shape, dtype=dtype
)
- meta = await init_array(
+ result = await init_array(
store_path=store_path,
shape=shape_parsed,
dtype=dtype_parsed,
@@ -4165,9 +4165,9 @@ async def create_array(
chunk_key_encoding=chunk_key_encoding,
dimension_names=dimension_names,
overwrite=overwrite,
+ config=config,
)
- result = AsyncArray(metadata=meta, store_path=store_path, config=config_parsed)
if write_data is True and data_parsed is not None:
await result._set_selection(
BasicIndexer(..., shape=result.shape, chunk_grid=result.metadata.chunk_grid),
diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py
index b1a6a3cad0..59d3cc6b40 100644
--- a/src/zarr/core/array_spec.py
+++ b/src/zarr/core/array_spec.py
@@ -21,7 +21,7 @@
from zarr.core.common import ChunkCoords
-class ArrayConfigLike(TypedDict):
+class ArrayConfigParams(TypedDict):
"""
A TypedDict model of the attributes of an ArrayConfig class, but with no required fields.
This allows for partial construction of an ArrayConfig, with the assumption that the unset
@@ -56,13 +56,13 @@ def __init__(self, order: MemoryOrder, write_empty_chunks: bool) -> None:
object.__setattr__(self, "write_empty_chunks", write_empty_chunks_parsed)
@classmethod
- def from_dict(cls, data: ArrayConfigLike) -> Self:
+ def from_dict(cls, data: ArrayConfigParams) -> Self:
"""
Create an ArrayConfig from a dict. The keys of that dict are a subset of the
attributes of the ArrayConfig class. Any keys missing from that dict will be set to the
the values in the ``array`` namespace of ``zarr.config``.
"""
- kwargs_out: ArrayConfigLike = {}
+ kwargs_out: ArrayConfigParams = {}
for f in fields(ArrayConfig):
field_name = cast(Literal["order", "write_empty_chunks"], f.name)
if field_name not in data:
@@ -72,7 +72,10 @@ def from_dict(cls, data: ArrayConfigLike) -> Self:
return cls(**kwargs_out)
-def parse_array_config(data: ArrayConfig | ArrayConfigLike | None) -> ArrayConfig:
+ArrayConfigLike = ArrayConfig | ArrayConfigParams
+
+
+def parse_array_config(data: ArrayConfigLike | None) -> ArrayConfig:
"""
Convert various types of data to an ArrayConfig.
"""
diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py
index 6941c8897e..aac6792cff 100644
--- a/src/zarr/core/buffer/gpu.py
+++ b/src/zarr/core/buffer/gpu.py
@@ -13,6 +13,10 @@
from zarr.core.buffer import core
from zarr.core.buffer.core import ArrayLike, BufferPrototype, NDArrayLike
+from zarr.registry import (
+ register_buffer,
+ register_ndbuffer,
+)
if TYPE_CHECKING:
from collections.abc import Iterable
@@ -215,3 +219,6 @@ def __setitem__(self, key: Any, value: Any) -> None:
buffer_prototype = BufferPrototype(buffer=Buffer, nd_buffer=NDBuffer)
+
+register_buffer(Buffer)
+register_ndbuffer(NDBuffer)
diff --git a/src/zarr/core/codec_pipeline.py b/src/zarr/core/codec_pipeline.py
index a35c5ca210..0c53cda96c 100644
--- a/src/zarr/core/codec_pipeline.py
+++ b/src/zarr/core/codec_pipeline.py
@@ -16,7 +16,7 @@
)
from zarr.core.common import ChunkCoords, concurrent_map
from zarr.core.config import config
-from zarr.core.indexing import SelectorTuple, is_scalar, is_total_slice
+from zarr.core.indexing import SelectorTuple, is_scalar
from zarr.core.metadata.v2 import _default_fill_value
from zarr.registry import register_pipeline
@@ -243,7 +243,7 @@ async def encode_partial_batch(
async def read_batch(
self,
- batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
+ batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
out: NDBuffer,
drop_axes: tuple[int, ...] = (),
) -> None:
@@ -251,10 +251,10 @@ async def read_batch(
chunk_array_batch = await self.decode_partial_batch(
[
(byte_getter, chunk_selection, chunk_spec)
- for byte_getter, chunk_spec, chunk_selection, _ in batch_info
+ for byte_getter, chunk_spec, chunk_selection, *_ in batch_info
]
)
- for chunk_array, (_, chunk_spec, _, out_selection) in zip(
+ for chunk_array, (_, chunk_spec, _, out_selection, _) in zip(
chunk_array_batch, batch_info, strict=False
):
if chunk_array is not None:
@@ -263,22 +263,19 @@ async def read_batch(
out[out_selection] = fill_value_or_default(chunk_spec)
else:
chunk_bytes_batch = await concurrent_map(
- [
- (byte_getter, array_spec.prototype)
- for byte_getter, array_spec, _, _ in batch_info
- ],
+ [(byte_getter, array_spec.prototype) for byte_getter, array_spec, *_ in batch_info],
lambda byte_getter, prototype: byte_getter.get(prototype),
config.get("async.concurrency"),
)
chunk_array_batch = await self.decode_batch(
[
(chunk_bytes, chunk_spec)
- for chunk_bytes, (_, chunk_spec, _, _) in zip(
+ for chunk_bytes, (_, chunk_spec, *_) in zip(
chunk_bytes_batch, batch_info, strict=False
)
],
)
- for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip(
+ for chunk_array, (_, chunk_spec, chunk_selection, out_selection, _) in zip(
chunk_array_batch, batch_info, strict=False
):
if chunk_array is not None:
@@ -296,9 +293,10 @@ def _merge_chunk_array(
out_selection: SelectorTuple,
chunk_spec: ArraySpec,
chunk_selection: SelectorTuple,
+ is_complete_chunk: bool,
drop_axes: tuple[int, ...],
) -> NDBuffer:
- if is_total_slice(chunk_selection, chunk_spec.shape) and value.shape == chunk_spec.shape:
+ if is_complete_chunk and value.shape == chunk_spec.shape:
return value
if existing_chunk_array is None:
chunk_array = chunk_spec.prototype.nd_buffer.create(
@@ -327,7 +325,7 @@ def _merge_chunk_array(
async def write_batch(
self,
- batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
+ batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
value: NDBuffer,
drop_axes: tuple[int, ...] = (),
) -> None:
@@ -337,14 +335,14 @@ async def write_batch(
await self.encode_partial_batch(
[
(byte_setter, value, chunk_selection, chunk_spec)
- for byte_setter, chunk_spec, chunk_selection, out_selection in batch_info
+ for byte_setter, chunk_spec, chunk_selection, out_selection, _ in batch_info
],
)
else:
await self.encode_partial_batch(
[
(byte_setter, value[out_selection], chunk_selection, chunk_spec)
- for byte_setter, chunk_spec, chunk_selection, out_selection in batch_info
+ for byte_setter, chunk_spec, chunk_selection, out_selection, _ in batch_info
],
)
@@ -361,10 +359,10 @@ async def _read_key(
chunk_bytes_batch = await concurrent_map(
[
(
- None if is_total_slice(chunk_selection, chunk_spec.shape) else byte_setter,
+ None if is_complete_chunk else byte_setter,
chunk_spec.prototype,
)
- for byte_setter, chunk_spec, chunk_selection, _ in batch_info
+ for byte_setter, chunk_spec, chunk_selection, _, is_complete_chunk in batch_info
],
_read_key,
config.get("async.concurrency"),
@@ -372,7 +370,7 @@ async def _read_key(
chunk_array_decoded = await self.decode_batch(
[
(chunk_bytes, chunk_spec)
- for chunk_bytes, (_, chunk_spec, _, _) in zip(
+ for chunk_bytes, (_, chunk_spec, *_) in zip(
chunk_bytes_batch, batch_info, strict=False
)
],
@@ -380,14 +378,24 @@ async def _read_key(
chunk_array_merged = [
self._merge_chunk_array(
- chunk_array, value, out_selection, chunk_spec, chunk_selection, drop_axes
- )
- for chunk_array, (_, chunk_spec, chunk_selection, out_selection) in zip(
- chunk_array_decoded, batch_info, strict=False
+ chunk_array,
+ value,
+ out_selection,
+ chunk_spec,
+ chunk_selection,
+ is_complete_chunk,
+ drop_axes,
)
+ for chunk_array, (
+ _,
+ chunk_spec,
+ chunk_selection,
+ out_selection,
+ is_complete_chunk,
+ ) in zip(chunk_array_decoded, batch_info, strict=False)
]
chunk_array_batch: list[NDBuffer | None] = []
- for chunk_array, (_, chunk_spec, _, _) in zip(
+ for chunk_array, (_, chunk_spec, *_) in zip(
chunk_array_merged, batch_info, strict=False
):
if chunk_array is None:
@@ -403,7 +411,7 @@ async def _read_key(
chunk_bytes_batch = await self.encode_batch(
[
(chunk_array, chunk_spec)
- for chunk_array, (_, chunk_spec, _, _) in zip(
+ for chunk_array, (_, chunk_spec, *_) in zip(
chunk_array_batch, batch_info, strict=False
)
],
@@ -418,7 +426,7 @@ async def _write_key(byte_setter: ByteSetter, chunk_bytes: Buffer | None) -> Non
await concurrent_map(
[
(byte_setter, chunk_bytes)
- for chunk_bytes, (byte_setter, _, _, _) in zip(
+ for chunk_bytes, (byte_setter, *_) in zip(
chunk_bytes_batch, batch_info, strict=False
)
],
@@ -446,7 +454,7 @@ async def encode(
async def read(
self,
- batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple]],
+ batch_info: Iterable[tuple[ByteGetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
out: NDBuffer,
drop_axes: tuple[int, ...] = (),
) -> None:
@@ -461,7 +469,7 @@ async def read(
async def write(
self,
- batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple]],
+ batch_info: Iterable[tuple[ByteSetter, ArraySpec, SelectorTuple, SelectorTuple, bool]],
value: NDBuffer,
drop_axes: tuple[int, ...] = (),
) -> None:
diff --git a/src/zarr/core/config.py b/src/zarr/core/config.py
index 051e8c68e1..c565cb0708 100644
--- a/src/zarr/core/config.py
+++ b/src/zarr/core/config.py
@@ -29,10 +29,13 @@
from __future__ import annotations
-from typing import Any, Literal, cast
+from typing import TYPE_CHECKING, Any, Literal, cast
from donfig import Config as DConfig
+if TYPE_CHECKING:
+ from donfig.config_obj import ConfigSet
+
class BadConfigError(ValueError):
_msg = "bad Config: %r"
@@ -56,6 +59,14 @@ def reset(self) -> None:
self.clear()
self.refresh()
+ def enable_gpu(self) -> ConfigSet:
+ """
+ Configure Zarr to use GPUs where possible.
+ """
+ return self.set(
+ {"buffer": "zarr.core.buffer.gpu.Buffer", "ndbuffer": "zarr.core.buffer.gpu.NDBuffer"}
+ )
+
# The default configuration for zarr
config = Config(
diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py
index 41b6c26bb6..998fe156a1 100644
--- a/src/zarr/core/indexing.py
+++ b/src/zarr/core/indexing.py
@@ -321,12 +321,12 @@ class ChunkDimProjection(NamedTuple):
Selection of items from chunk array.
dim_out_sel
Selection of items in target (output) array.
-
"""
dim_chunk_ix: int
dim_chunk_sel: Selector
dim_out_sel: Selector | None
+ is_complete_chunk: bool
@dataclass(frozen=True)
@@ -346,7 +346,8 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
dim_offset = dim_chunk_ix * self.dim_chunk_len
dim_chunk_sel = self.dim_sel - dim_offset
dim_out_sel = None
- yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+ is_complete_chunk = self.dim_chunk_len == 1
+ yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
@dataclass(frozen=True)
@@ -420,7 +421,10 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems)
- yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+ is_complete_chunk = (
+ dim_chunk_sel_start == 0 and (self.stop >= dim_limit) and self.step in [1, None]
+ )
+ yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
def check_selection_length(selection: SelectionNormalized, shape: ChunkCoords) -> None:
@@ -493,12 +497,14 @@ class ChunkProjection(NamedTuple):
Selection of items from chunk array.
out_selection
Selection of items in target (output) array.
-
+ is_complete_chunk:
+ True if a complete chunk is indexed
"""
chunk_coords: ChunkCoords
chunk_selection: tuple[Selector, ...] | npt.NDArray[np.intp]
out_selection: tuple[Selector, ...] | npt.NDArray[np.intp] | slice
+ is_complete_chunk: bool
def is_slice(s: Any) -> TypeGuard[slice]:
@@ -574,8 +580,8 @@ def __iter__(self) -> Iterator[ChunkProjection]:
out_selection = tuple(
p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None
)
-
- yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
+ is_complete_chunk = all(p.is_complete_chunk for p in dim_projections)
+ yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
@dataclass(frozen=True)
@@ -643,8 +649,9 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
start = self.chunk_nitems_cumsum[dim_chunk_ix - 1]
stop = self.chunk_nitems_cumsum[dim_chunk_ix]
dim_out_sel = slice(start, stop)
+ is_complete_chunk = False # TODO
- yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+ yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
class Order(Enum):
@@ -783,8 +790,8 @@ def __iter__(self) -> Iterator[ChunkDimProjection]:
# find region in chunk
dim_offset = dim_chunk_ix * self.dim_chunk_len
dim_chunk_sel = self.dim_sel[start:stop] - dim_offset
-
- yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel)
+ is_complete_chunk = False # TODO
+ yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel, is_complete_chunk)
def slice_to_range(s: slice, length: int) -> range:
@@ -921,7 +928,8 @@ def __iter__(self) -> Iterator[ChunkProjection]:
if not is_basic_selection(out_selection):
out_selection = ix_(out_selection, self.shape)
- yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
+ is_complete_chunk = all(p.is_complete_chunk for p in dim_projections)
+ yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
@dataclass(frozen=True)
@@ -1030,8 +1038,8 @@ def __iter__(self) -> Iterator[ChunkProjection]:
out_selection = tuple(
p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None
)
-
- yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
+ is_complete_chunk = all(p.is_complete_chunk for p in dim_projections)
+ yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
@dataclass(frozen=True)
@@ -1198,7 +1206,8 @@ def __iter__(self) -> Iterator[ChunkProjection]:
for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets, strict=True)
)
- yield ChunkProjection(chunk_coords, chunk_selection, out_selection)
+ is_complete_chunk = False # TODO
+ yield ChunkProjection(chunk_coords, chunk_selection, out_selection, is_complete_chunk)
@dataclass(frozen=True)
@@ -1363,32 +1372,6 @@ def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]:
return itertools.product(*(range(x) for x in chunks_per_shard))
-def is_total_slice(item: Selection, shape: ChunkCoords) -> bool:
- """Determine whether `item` specifies a complete slice of array with the
- given `shape`. Used to optimize __setitem__ operations on the Chunk
- class."""
-
- # N.B., assume shape is normalized
- if item == slice(None):
- return True
- if isinstance(item, slice):
- item = (item,)
- if isinstance(item, tuple):
- return all(
- (isinstance(dim_sel, int) and dim_len == 1)
- or (
- isinstance(dim_sel, slice)
- and (
- (dim_sel == slice(None))
- or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None]))
- )
- )
- for dim_sel, dim_len in zip(item, shape, strict=False)
- )
- else:
- raise TypeError(f"expected slice or tuple of slices, found {item!r}")
-
-
def get_indexer(
selection: SelectionWithFields, shape: ChunkCoords, chunk_grid: ChunkGrid
) -> Indexer:
diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py
index 25697c4545..3d292c81b4 100644
--- a/src/zarr/core/metadata/v2.py
+++ b/src/zarr/core/metadata/v2.py
@@ -353,7 +353,7 @@ def _default_fill_value(dtype: np.dtype[Any]) -> Any:
return dtype.type("nat")
elif dtype.kind == "V":
if dtype.fields is not None:
- default = tuple([_default_fill_value(field[0]) for field in dtype.fields.values()])
+ default = tuple(_default_fill_value(field[0]) for field in dtype.fields.values())
return np.array([default], dtype=dtype)
else:
return np.zeros(1, dtype=dtype)
diff --git a/src/zarr/storage/_fsspec.py b/src/zarr/storage/_fsspec.py
index 92c14fcc76..1cc7039e68 100644
--- a/src/zarr/storage/_fsspec.py
+++ b/src/zarr/storage/_fsspec.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import warnings
+from contextlib import suppress
from typing import TYPE_CHECKING, Any
from zarr.abc.store import (
@@ -286,6 +287,19 @@ async def delete(self, key: str) -> None:
except self.allowed_exceptions:
pass
+ async def delete_dir(self, prefix: str) -> None:
+ # docstring inherited
+ if not self.supports_deletes:
+ raise NotImplementedError(
+ "This method is only available for stores that support deletes."
+ )
+ self._check_writable()
+
+ path_to_delete = _dereference_path(self.path, prefix)
+
+ with suppress(*self.allowed_exceptions):
+ await self.fs._rm(path_to_delete, recursive=True)
+
async def exists(self, key: str) -> bool:
# docstring inherited
path = _dereference_path(self.path, key)
diff --git a/src/zarr/storage/_logging.py b/src/zarr/storage/_logging.py
index e9d6211588..5f1a97acd9 100644
--- a/src/zarr/storage/_logging.py
+++ b/src/zarr/storage/_logging.py
@@ -88,7 +88,7 @@ def log(self, hint: Any = "") -> Generator[None, None, None]:
op = f"{type(self._store).__name__}.{method}"
if hint:
op = f"{op}({hint})"
- self.logger.info("Calling %s", op)
+ self.logger.info(" Calling %s", op)
start_time = time.time()
try:
self.counter[method] += 1
diff --git a/src/zarr/testing/strategies.py b/src/zarr/testing/strategies.py
index 5722f3c99e..8847b49020 100644
--- a/src/zarr/testing/strategies.py
+++ b/src/zarr/testing/strategies.py
@@ -1,5 +1,5 @@
import sys
-from typing import Any
+from typing import Any, Literal
import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
@@ -8,9 +8,13 @@
from hypothesis.strategies import SearchStrategy
import zarr
-from zarr.abc.store import RangeByteRequest
+from zarr.abc.store import RangeByteRequest, Store
+from zarr.codecs.bytes import BytesCodec
from zarr.core.array import Array
+from zarr.core.chunk_grids import RegularChunkGrid
+from zarr.core.chunk_key_encodings import DefaultChunkKeyEncoding
from zarr.core.common import ZarrFormat
+from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
from zarr.core.sync import sync
from zarr.storage import MemoryStore, StoreLike
from zarr.storage._common import _dereference_path
@@ -67,6 +71,11 @@ def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
)
+def clear_store(x: Store) -> Store:
+ sync(x.clear())
+ return x
+
+
# From https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html#node-names
# 1. must not be the empty string ("")
# 2. must not include the character "/"
@@ -85,12 +94,59 @@ def safe_unicode_for_dtype(dtype: np.dtype[np.str_]) -> st.SearchStrategy[str]:
# st.builds will only call a new store constructor for different keyword arguments
# i.e. stores.examples() will always return the same object per Store class.
# So we map a clear to reset the store.
-stores = st.builds(MemoryStore, st.just({})).map(lambda x: sync(x.clear()))
+stores = st.builds(MemoryStore, st.just({})).map(clear_store)
compressors = st.sampled_from([None, "default"])
-zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([2, 3])
+zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2])
array_shapes = npst.array_shapes(max_dims=4, min_side=0)
+@st.composite # type: ignore[misc]
+def dimension_names(draw: st.DrawFn, *, ndim: int | None = None) -> list[None | str] | None:
+ simple_text = st.text(zarr_key_chars, min_size=0)
+ return draw(st.none() | st.lists(st.none() | simple_text, min_size=ndim, max_size=ndim)) # type: ignore[no-any-return]
+
+
+@st.composite # type: ignore[misc]
+def array_metadata(
+ draw: st.DrawFn,
+ *,
+ array_shapes: st.SearchStrategy[tuple[int, ...]] = npst.array_shapes,
+ zarr_formats: st.SearchStrategy[Literal[2, 3]] = zarr_formats,
+ attributes: st.SearchStrategy[dict[str, Any]] = attrs,
+) -> ArrayV2Metadata | ArrayV3Metadata:
+ zarr_format = draw(zarr_formats)
+ # separator = draw(st.sampled_from(['/', '\\']))
+ shape = draw(array_shapes())
+ ndim = len(shape)
+ chunk_shape = draw(array_shapes(min_dims=ndim, max_dims=ndim))
+ dtype = draw(v3_dtypes())
+ fill_value = draw(npst.from_dtype(dtype))
+ if zarr_format == 2:
+ return ArrayV2Metadata(
+ shape=shape,
+ chunks=chunk_shape,
+ dtype=dtype,
+ fill_value=fill_value,
+ order=draw(st.sampled_from(["C", "F"])),
+ attributes=draw(attributes),
+ dimension_separator=draw(st.sampled_from([".", "/"])),
+ filters=None,
+ compressor=None,
+ )
+ else:
+ return ArrayV3Metadata(
+ shape=shape,
+ data_type=dtype,
+ chunk_grid=RegularChunkGrid(chunk_shape=chunk_shape),
+ fill_value=fill_value,
+ attributes=draw(attributes),
+ dimension_names=draw(dimension_names(ndim=ndim)),
+ chunk_key_encoding=DefaultChunkKeyEncoding(separator="/"), # FIXME
+ codecs=[BytesCodec()],
+ storage_transformers=(),
+ )
+
+
@st.composite # type: ignore[misc]
def numpy_arrays(
draw: st.DrawFn,
@@ -110,6 +166,32 @@ def numpy_arrays(
return draw(npst.arrays(dtype=dtype, shape=shapes))
+@st.composite # type: ignore[misc]
+def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]:
+ # We want this strategy to shrink towards arrays with smaller number of chunks
+ # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
+ numchunks = draw(
+ st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape])
+ )
+ # 2. and now generate the chunks tuple
+ return tuple(
+ size // nchunks if nchunks > 0 else 0
+ for size, nchunks in zip(shape, numchunks, strict=True)
+ )
+
+
+@st.composite # type: ignore[misc]
+def shard_shapes(
+ draw: st.DrawFn, *, shape: tuple[int, ...], chunk_shape: tuple[int, ...]
+) -> tuple[int, ...]:
+ # We want this strategy to shrink towards arrays with smaller number of shards
+ # shards must be an integral number of chunks
+ assert all(c != 0 for c in chunk_shape)
+ numchunks = tuple(s // c for s, c in zip(shape, chunk_shape, strict=True))
+ multiples = tuple(draw(st.integers(min_value=1, max_value=nc)) for nc in numchunks)
+ return tuple(m * c for m, c in zip(multiples, chunk_shape, strict=True))
+
+
@st.composite # type: ignore[misc]
def np_array_and_chunks(
draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays
@@ -119,19 +201,7 @@ def np_array_and_chunks(
Returns: a tuple of the array and a suitable random chunking for it.
"""
array = draw(arrays)
- # We want this strategy to shrink towards arrays with smaller number of chunks
- # 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
- numchunks = draw(
- st.tuples(
- *[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in array.shape]
- )
- )
- # 2. and now generate the chunks tuple
- chunks = tuple(
- size // nchunks if nchunks > 0 else 0
- for size, nchunks in zip(array.shape, numchunks, strict=True)
- )
- return (array, chunks)
+ return (array, draw(chunk_shapes(shape=array.shape)))
@st.composite # type: ignore[misc]
@@ -154,7 +224,12 @@ def arrays(
zarr_format = draw(zarr_formats)
if arrays is None:
arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format))
- nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
+ nparray = draw(arrays)
+ chunk_shape = draw(chunk_shapes(shape=nparray.shape))
+ if zarr_format == 3 and all(c > 0 for c in chunk_shape):
+ shard_shape = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunk_shape))
+ else:
+ shard_shape = None
# test that None works too.
fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
# compressor = draw(compressors)
@@ -167,7 +242,8 @@ def arrays(
a = root.create_array(
array_path,
shape=nparray.shape,
- chunks=chunks,
+ chunks=chunk_shape,
+ shards=shard_shape,
dtype=nparray.dtype,
attributes=attributes,
# compressor=compressor, # FIXME
@@ -180,7 +256,8 @@ def arrays(
assert a.name is not None
assert isinstance(root[array_path], Array)
assert nparray.shape == a.shape
- assert chunks == a.chunks
+ assert chunk_shape == a.chunks
+ assert shard_shape == a.shards
assert array_path == a.path, (path, name, array_path, a.name, a.path)
assert a.basename == name, (a.basename, name)
assert dict(a.attrs) == expected_attrs
@@ -209,6 +286,43 @@ def basic_indices(draw: st.DrawFn, *, shape: tuple[int], **kwargs: Any) -> Any:
)
+@st.composite # type: ignore[misc]
+def orthogonal_indices(
+ draw: st.DrawFn, *, shape: tuple[int]
+) -> tuple[tuple[np.ndarray[Any, Any], ...], tuple[np.ndarray[Any, Any], ...]]:
+ """
+ Strategy that returns
+ (1) a tuple of integer arrays used for orthogonal indexing of Zarr arrays.
+ (2) an tuple of integer arrays that can be used for equivalent indexing of numpy arrays
+ """
+ zindexer = []
+ npindexer = []
+ ndim = len(shape)
+ for axis, size in enumerate(shape):
+ val = draw(
+ npst.integer_array_indices(
+ shape=(size,), result_shape=npst.array_shapes(min_side=1, max_side=size, max_dims=1)
+ )
+ | basic_indices(min_dims=1, shape=(size,), allow_ellipsis=False)
+ .map(lambda x: (x,) if not isinstance(x, tuple) else x) # bare ints, slices
+ .filter(lambda x: bool(x)) # skip empty tuple
+ )
+ (idxr,) = val
+ if isinstance(idxr, int):
+ idxr = np.array([idxr])
+ zindexer.append(idxr)
+ if isinstance(idxr, slice):
+ idxr = np.arange(*idxr.indices(size))
+ elif isinstance(idxr, (tuple, int)):
+ idxr = np.array(idxr)
+ newshape = [1] * ndim
+ newshape[axis] = idxr.size
+ npindexer.append(idxr.reshape(newshape))
+
+ # casting the output of broadcast_arrays is needed for numpy 1.25
+ return tuple(zindexer), tuple(np.broadcast_arrays(*npindexer))
+
+
def key_ranges(
keys: SearchStrategy = node_names, max_size: int = sys.maxsize
) -> SearchStrategy[list[int]]:
diff --git a/src/zarr/testing/utils.py b/src/zarr/testing/utils.py
index c7b6e7939c..0a93b93fdb 100644
--- a/src/zarr/testing/utils.py
+++ b/src/zarr/testing/utils.py
@@ -38,7 +38,7 @@ def has_cupy() -> bool:
return False
-T_Callable = TypeVar("T_Callable", bound=Callable[[], Coroutine[Any, Any, None]])
+T_Callable = TypeVar("T_Callable", bound=Callable[..., Coroutine[Any, Any, None] | None])
# Decorator for GPU tests
diff --git a/tests/test_api.py b/tests/test_api.py
index 2cfed2c52a..341245404c 100644
--- a/tests/test_api.py
+++ b/tests/test_api.py
@@ -28,6 +28,7 @@
from zarr.errors import MetadataValidationError
from zarr.storage import MemoryStore
from zarr.storage._utils import normalize_path
+from zarr.testing.utils import gpu_test
def test_create(memory_store: Store) -> None:
@@ -1132,3 +1133,40 @@ def test_open_array_with_mode_r_plus(store: Store) -> None:
assert isinstance(result, NDArrayLike)
assert (result == 1).all()
z2[:] = 3
+
+
+@gpu_test
+@pytest.mark.parametrize(
+ "store",
+ ["local", "memory", "zip"],
+ indirect=True,
+)
+@pytest.mark.parametrize("zarr_format", [None, 2, 3])
+def test_gpu_basic(store: Store, zarr_format: ZarrFormat | None) -> None:
+ import cupy as cp
+
+ if zarr_format == 2:
+ # Without this, the zstd codec attempts to convert the cupy
+ # array to bytes.
+ compressors = None
+ else:
+ compressors = "auto"
+
+ with zarr.config.enable_gpu():
+ src = cp.random.uniform(size=(100, 100)) # allocate on the device
+ z = zarr.create_array(
+ store,
+ name="a",
+ shape=src.shape,
+ chunks=(10, 10),
+ dtype=src.dtype,
+ overwrite=True,
+ zarr_format=zarr_format,
+ compressors=compressors,
+ )
+ z[:10, :10] = src[:10, :10]
+
+ result = z[:10, :10]
+ # assert_array_equal doesn't check the type
+ assert isinstance(result, type(src))
+ cp.testing.assert_array_equal(result, src[:10, :10])
diff --git a/tests/test_array.py b/tests/test_array.py
index 8ff39de70a..c9b2cbce91 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -849,66 +849,6 @@ def test_append_bad_shape(store: MemoryStore, zarr_format: ZarrFormat) -> None:
z.append(b)
-@pytest.mark.parametrize("order", ["C", "F", None])
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-def test_array_create_metadata_order_v2(
- order: MemoryOrder | None, zarr_format: int, store: MemoryStore
-) -> None:
- """
- Test that the ``order`` attribute in zarr v2 array metadata is set correctly via the ``order``
- keyword argument to ``Array.create``. When ``order`` is ``None``, the value of the
- ``array.order`` config is used.
- """
- arr = zarr.create_array(store=store, shape=(2, 2), order=order, zarr_format=2, dtype="i4")
-
- expected = order or zarr.config.get("array.order")
- assert arr.metadata.zarr_format == 2 # guard for mypy
- assert arr.metadata.order == expected
-
-
-@pytest.mark.parametrize("order_config", ["C", "F", None])
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-def test_array_create_order(
- order_config: MemoryOrder | None,
- zarr_format: ZarrFormat,
- store: MemoryStore,
-) -> None:
- """
- Test that the arrays generated by array indexing have a memory order defined by the config order
- value
- """
- config: ArrayConfigLike = {}
- if order_config is None:
- config = {}
- expected = zarr.config.get("array.order")
- else:
- config = {"order": order_config}
- expected = order_config
-
- arr = zarr.create_array(
- store=store, shape=(2, 2), zarr_format=zarr_format, dtype="i4", config=config
- )
-
- vals = np.asarray(arr)
- if expected == "C":
- assert vals.flags.c_contiguous
- elif expected == "F":
- assert vals.flags.f_contiguous
- else:
- raise AssertionError
-
-
-@pytest.mark.parametrize("write_empty_chunks", [True, False])
-def test_write_empty_chunks_config(write_empty_chunks: bool) -> None:
- """
- Test that the value of write_empty_chunks is sensitive to the global config when not set
- explicitly
- """
- with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
- arr = zarr.create_array({}, shape=(2, 2), dtype="i4")
- assert arr._async_array._config.write_empty_chunks == write_empty_chunks
-
-
@pytest.mark.parametrize("store", ["memory"], indirect=True)
@pytest.mark.parametrize("write_empty_chunks", [True, False])
@pytest.mark.parametrize("fill_value", [0, 5])
@@ -1012,339 +952,396 @@ def test_auto_partition_auto_shards(
assert auto_shards == expected_shards
-def test_chunks_and_shards() -> None:
- store = StorePath(MemoryStore())
- shape = (100, 100)
- chunks = (5, 5)
- shards = (10, 10)
-
- arr_v3 = zarr.create_array(store=store / "v3", shape=shape, chunks=chunks, dtype="i4")
- assert arr_v3.chunks == chunks
- assert arr_v3.shards is None
-
- arr_v3_sharding = zarr.create_array(
- store=store / "v3_sharding",
- shape=shape,
- chunks=chunks,
- shards=shards,
- dtype="i4",
- )
- assert arr_v3_sharding.chunks == chunks
- assert arr_v3_sharding.shards == shards
-
- arr_v2 = zarr.create_array(
- store=store / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4"
- )
- assert arr_v2.chunks == chunks
- assert arr_v2.shards is None
-
-
-def test_create_array_default_fill_values() -> None:
- a = zarr.create_array(MemoryStore(), shape=(5,), chunks=(5,), dtype=" None:
- """
- Test that the default ``filters`` and ``compressors`` are removed when ``create_array`` is invoked.
- """
+class TestCreateArray:
+ @staticmethod
+ def test_chunks_and_shards(store: Store) -> None:
+ spath = StorePath(store)
+ shape = (100, 100)
+ chunks = (5, 5)
+ shards = (10, 10)
+
+ arr_v3 = zarr.create_array(store=spath / "v3", shape=shape, chunks=chunks, dtype="i4")
+ assert arr_v3.chunks == chunks
+ assert arr_v3.shards is None
+
+ arr_v3_sharding = zarr.create_array(
+ store=spath / "v3_sharding",
+ shape=shape,
+ chunks=chunks,
+ shards=shards,
+ dtype="i4",
+ )
+ assert arr_v3_sharding.chunks == chunks
+ assert arr_v3_sharding.shards == shards
- # v2
- arr = await create_array(
- store=store,
- dtype=dtype,
- shape=(10,),
- zarr_format=2,
- compressors=empty_value,
- filters=empty_value,
+ arr_v2 = zarr.create_array(
+ store=spath / "v2", shape=shape, chunks=chunks, zarr_format=2, dtype="i4"
+ )
+ assert arr_v2.chunks == chunks
+ assert arr_v2.shards is None
+
+ @staticmethod
+ @pytest.mark.parametrize(
+ ("dtype", "fill_value_expected"), [(" None:
+ a = zarr.create_array(store, shape=(5,), chunks=(5,), dtype=dtype)
+ assert a.fill_value == fill_value_expected
+
+ @staticmethod
+ @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+ @pytest.mark.parametrize("empty_value", [None, ()])
+ async def test_no_filters_compressors(store: MemoryStore, dtype: str, empty_value: Any) -> None:
+ """
+ Test that the default ``filters`` and ``compressors`` are removed when ``create_array`` is invoked.
+ """
+
+ # v2
+ arr = await create_array(
+ store=store,
+ dtype=dtype,
+ shape=(10,),
+ zarr_format=2,
+ compressors=empty_value,
+ filters=empty_value,
+ )
+ # Test metadata explicitly
+ assert arr.metadata.zarr_format == 2 # guard for mypy
+ # The v2 metadata stores None and () separately
+ assert arr.metadata.filters == empty_value
+ # The v2 metadata does not allow tuple for compressor, therefore it is turned into None
+ assert arr.metadata.compressor is None
+
+ assert arr.filters == ()
+ assert arr.compressors == ()
+
+ # v3
+ arr = await create_array(
+ store=store,
+ dtype=dtype,
+ shape=(10,),
+ compressors=empty_value,
+ filters=empty_value,
+ )
+ assert arr.metadata.zarr_format == 3 # guard for mypy
+ if dtype == "str":
+ assert arr.metadata.codecs == (VLenUTF8Codec(),)
+ assert arr.serializer == VLenUTF8Codec()
+ else:
+ assert arr.metadata.codecs == (BytesCodec(),)
+ assert arr.serializer == BytesCodec()
+
+ @staticmethod
+ @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+ @pytest.mark.parametrize(
+ "compressors",
+ [
+ "auto",
+ None,
+ (),
+ (ZstdCodec(level=3),),
+ (ZstdCodec(level=3), GzipCodec(level=0)),
+ ZstdCodec(level=3),
+ {"name": "zstd", "configuration": {"level": 3}},
+ ({"name": "zstd", "configuration": {"level": 3}},),
+ ],
)
- assert arr.metadata.zarr_format == 3 # guard for mypy
- if dtype == "str":
- assert arr.metadata.codecs == (VLenUTF8Codec(),)
- assert arr.serializer == VLenUTF8Codec()
- else:
- assert arr.metadata.codecs == (BytesCodec(),)
- assert arr.serializer == BytesCodec()
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
-@pytest.mark.parametrize(
- "compressors",
- [
- "auto",
- None,
- (),
- (ZstdCodec(level=3),),
- (ZstdCodec(level=3), GzipCodec(level=0)),
- ZstdCodec(level=3),
- {"name": "zstd", "configuration": {"level": 3}},
- ({"name": "zstd", "configuration": {"level": 3}},),
- ],
-)
-@pytest.mark.parametrize(
- "filters",
- [
- "auto",
- None,
- (),
- (
- TransposeCodec(
- order=[
- 0,
- ]
+ @pytest.mark.parametrize(
+ "filters",
+ [
+ "auto",
+ None,
+ (),
+ (
+ TransposeCodec(
+ order=[
+ 0,
+ ]
+ ),
),
- ),
- (
- TransposeCodec(
- order=[
- 0,
- ]
+ (
+ TransposeCodec(
+ order=[
+ 0,
+ ]
+ ),
+ TransposeCodec(
+ order=[
+ 0,
+ ]
+ ),
),
TransposeCodec(
order=[
0,
]
),
- ),
- TransposeCodec(
- order=[
- 0,
- ]
- ),
- {"name": "transpose", "configuration": {"order": [0]}},
- ({"name": "transpose", "configuration": {"order": [0]}},),
- ],
-)
-@pytest.mark.parametrize(("chunks", "shards"), [((6,), None), ((3,), (6,))])
-async def test_create_array_v3_chunk_encoding(
- store: MemoryStore,
- compressors: CompressorsLike,
- filters: FiltersLike,
- dtype: str,
- chunks: tuple[int, ...],
- shards: tuple[int, ...] | None,
-) -> None:
- """
- Test various possibilities for the compressors and filters parameter to create_array
- """
- arr = await create_array(
- store=store,
- dtype=dtype,
- shape=(12,),
- chunks=chunks,
- shards=shards,
- zarr_format=3,
- filters=filters,
- compressors=compressors,
- )
- filters_expected, _, compressors_expected = _parse_chunk_encoding_v3(
- filters=filters, compressors=compressors, serializer="auto", dtype=np.dtype(dtype)
- )
- assert arr.filters == filters_expected
- assert arr.compressors == compressors_expected
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
-@pytest.mark.parametrize(
- "compressors",
- [
- "auto",
- None,
- numcodecs.Zstd(level=3),
- (),
- (numcodecs.Zstd(level=3),),
- ],
-)
-@pytest.mark.parametrize(
- "filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)]
-)
-async def test_create_array_v2_chunk_encoding(
- store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str
-) -> None:
- arr = await create_array(
- store=store,
- dtype=dtype,
- shape=(10,),
- zarr_format=2,
- compressors=compressors,
- filters=filters,
- )
- filters_expected, compressor_expected = _parse_chunk_encoding_v2(
- filters=filters, compressor=compressors, dtype=np.dtype(dtype)
- )
- assert arr.metadata.zarr_format == 2 # guard for mypy
- assert arr.metadata.compressor == compressor_expected
- assert arr.metadata.filters == filters_expected
-
- # Normalize for property getters
- compressor_expected = () if compressor_expected is None else (compressor_expected,)
- filters_expected = () if filters_expected is None else filters_expected
-
- assert arr.compressors == compressor_expected
- assert arr.filters == filters_expected
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
-async def test_create_array_v3_default_filters_compressors(store: MemoryStore, dtype: str) -> None:
- """
- Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with
- ``zarr_format`` = 3 and ``filters`` and ``compressors`` are not specified.
- """
- arr = await create_array(
- store=store,
- dtype=dtype,
- shape=(10,),
- zarr_format=3,
+ {"name": "transpose", "configuration": {"order": [0]}},
+ ({"name": "transpose", "configuration": {"order": [0]}},),
+ ],
)
- expected_filters, expected_serializer, expected_compressors = _get_default_chunk_encoding_v3(
- np_dtype=np.dtype(dtype)
- )
- assert arr.filters == expected_filters
- assert arr.serializer == expected_serializer
- assert arr.compressors == expected_compressors
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
-async def test_create_array_v2_default_filters_compressors(store: MemoryStore, dtype: str) -> None:
- """
- Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with
- ``zarr_format`` = 2 and ``filters`` and ``compressors`` are not specified.
- """
- arr = await create_array(
- store=store,
- dtype=dtype,
- shape=(10,),
- zarr_format=2,
- )
- expected_filters, expected_compressors = _get_default_chunk_encoding_v2(
- np_dtype=np.dtype(dtype)
+ @pytest.mark.parametrize(("chunks", "shards"), [((6,), None), ((3,), (6,))])
+ async def test_v3_chunk_encoding(
+ store: MemoryStore,
+ compressors: CompressorsLike,
+ filters: FiltersLike,
+ dtype: str,
+ chunks: tuple[int, ...],
+ shards: tuple[int, ...] | None,
+ ) -> None:
+ """
+ Test various possibilities for the compressors and filters parameter to create_array
+ """
+ arr = await create_array(
+ store=store,
+ dtype=dtype,
+ shape=(12,),
+ chunks=chunks,
+ shards=shards,
+ zarr_format=3,
+ filters=filters,
+ compressors=compressors,
+ )
+ filters_expected, _, compressors_expected = _parse_chunk_encoding_v3(
+ filters=filters, compressors=compressors, serializer="auto", dtype=np.dtype(dtype)
+ )
+ assert arr.filters == filters_expected
+ assert arr.compressors == compressors_expected
+
+ @staticmethod
+ @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+ @pytest.mark.parametrize(
+ "compressors",
+ [
+ "auto",
+ None,
+ numcodecs.Zstd(level=3),
+ (),
+ (numcodecs.Zstd(level=3),),
+ ],
)
- assert arr.metadata.zarr_format == 2 # guard for mypy
- assert arr.metadata.filters == expected_filters
- assert arr.metadata.compressor == expected_compressors
-
- # Normalize for property getters
- expected_filters = () if expected_filters is None else expected_filters
- expected_compressors = () if expected_compressors is None else (expected_compressors,)
- assert arr.filters == expected_filters
- assert arr.compressors == expected_compressors
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-async def test_create_array_v2_no_shards(store: MemoryStore) -> None:
- """
- Test that creating a Zarr v2 array with ``shard_shape`` set to a non-None value raises an error.
- """
- msg = re.escape(
- "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. Got `shard_shape=(5,)` instead."
+ @pytest.mark.parametrize(
+ "filters", ["auto", None, numcodecs.GZip(level=1), (numcodecs.GZip(level=1),)]
)
- with pytest.raises(ValueError, match=msg):
- _ = await create_array(
+ async def test_v2_chunk_encoding(
+ store: MemoryStore, compressors: CompressorsLike, filters: FiltersLike, dtype: str
+ ) -> None:
+ arr = await create_array(
store=store,
- dtype="uint8",
+ dtype=dtype,
shape=(10,),
- shards=(5,),
zarr_format=2,
+ compressors=compressors,
+ filters=filters,
)
-
-
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-@pytest.mark.parametrize("impl", ["sync", "async"])
-async def test_create_array_data(impl: Literal["sync", "async"], store: Store) -> None:
- """
- Test that we can invoke ``create_array`` with a ``data`` parameter.
- """
- data = np.arange(10)
- name = "foo"
- arr: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array
- if impl == "sync":
- arr = sync_api.create_array(store, name=name, data=data)
- stored = arr[:]
- elif impl == "async":
- arr = await create_array(store, name=name, data=data, zarr_format=3)
- stored = await arr._get_selection(
- BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid),
- prototype=default_buffer_prototype(),
+ filters_expected, compressor_expected = _parse_chunk_encoding_v2(
+ filters=filters, compressor=compressors, dtype=np.dtype(dtype)
)
- else:
- raise ValueError(f"Invalid impl: {impl}")
-
- assert np.array_equal(stored, data)
-
+ assert arr.metadata.zarr_format == 2 # guard for mypy
+ assert arr.metadata.compressor == compressor_expected
+ assert arr.metadata.filters == filters_expected
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-async def test_create_array_data_invalid_params(store: Store) -> None:
- """
- Test that failing to specify data AND shape / dtype results in a ValueError
- """
- with pytest.raises(ValueError, match="shape was not specified"):
- await create_array(store, data=None, shape=None, dtype=None)
+ # Normalize for property getters
+ compressor_expected = () if compressor_expected is None else (compressor_expected,)
+ filters_expected = () if filters_expected is None else filters_expected
- # we catch shape=None first, so specifying a dtype should raise the same exception as before
- with pytest.raises(ValueError, match="shape was not specified"):
- await create_array(store, data=None, shape=None, dtype="uint8")
-
- with pytest.raises(ValueError, match="dtype was not specified"):
- await create_array(store, data=None, shape=(10, 10))
+ assert arr.compressors == compressor_expected
+ assert arr.filters == filters_expected
+ @staticmethod
+ @pytest.mark.parametrize("dtype", ["uint8", "float32", "str"])
+ async def test_default_filters_compressors(
+ store: MemoryStore, dtype: str, zarr_format: ZarrFormat
+ ) -> None:
+ """
+ Test that the default ``filters`` and ``compressors`` are used when ``create_array`` is invoked with ``filters`` and ``compressors`` unspecified.
+ """
+ arr = await create_array(
+ store=store,
+ dtype=dtype,
+ shape=(10,),
+ zarr_format=zarr_format,
+ )
+ if zarr_format == 3:
+ expected_filters, expected_serializer, expected_compressors = (
+ _get_default_chunk_encoding_v3(np_dtype=np.dtype(dtype))
+ )
-@pytest.mark.parametrize("store", ["memory"], indirect=True)
-async def test_create_array_data_ignored_params(store: Store) -> None:
- """
- Test that specify data AND shape AND dtype results in a warning
- """
- data = np.arange(10)
- with pytest.raises(
- ValueError, match="The data parameter was used, but the shape parameter was also used."
- ):
- await create_array(store, data=data, shape=data.shape, dtype=None, overwrite=True)
+ elif zarr_format == 2:
+ default_filters, default_compressors = _get_default_chunk_encoding_v2(
+ np_dtype=np.dtype(dtype)
+ )
+ if default_filters is None:
+ expected_filters = ()
+ else:
+ expected_filters = default_filters
+ if default_compressors is None:
+ expected_compressors = ()
+ else:
+ expected_compressors = (default_compressors,)
+ expected_serializer = None
+ else:
+ raise ValueError(f"Invalid zarr_format: {zarr_format}")
+
+ assert arr.filters == expected_filters
+ assert arr.serializer == expected_serializer
+ assert arr.compressors == expected_compressors
+
+ @staticmethod
+ async def test_v2_no_shards(store: Store) -> None:
+ """
+ Test that creating a Zarr v2 array with ``shard_shape`` set to a non-None value raises an error.
+ """
+ msg = re.escape(
+ "Zarr format 2 arrays can only be created with `shard_shape` set to `None`. Got `shard_shape=(5,)` instead."
+ )
+ with pytest.raises(ValueError, match=msg):
+ _ = await create_array(
+ store=store,
+ dtype="uint8",
+ shape=(10,),
+ shards=(5,),
+ zarr_format=2,
+ )
- # we catch shape first, so specifying a dtype should raise the same warning as before
- with pytest.raises(
- ValueError, match="The data parameter was used, but the shape parameter was also used."
- ):
- await create_array(store, data=data, shape=data.shape, dtype=data.dtype, overwrite=True)
+ @staticmethod
+ @pytest.mark.parametrize("impl", ["sync", "async"])
+ async def test_with_data(impl: Literal["sync", "async"], store: Store) -> None:
+ """
+ Test that we can invoke ``create_array`` with a ``data`` parameter.
+ """
+ data = np.arange(10)
+ name = "foo"
+ arr: AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata] | Array
+ if impl == "sync":
+ arr = sync_api.create_array(store, name=name, data=data)
+ stored = arr[:]
+ elif impl == "async":
+ arr = await create_array(store, name=name, data=data, zarr_format=3)
+ stored = await arr._get_selection(
+ BasicIndexer(..., shape=arr.shape, chunk_grid=arr.metadata.chunk_grid),
+ prototype=default_buffer_prototype(),
+ )
+ else:
+ raise ValueError(f"Invalid impl: {impl}")
+
+ assert np.array_equal(stored, data)
+
+ @staticmethod
+ async def test_with_data_invalid_params(store: Store) -> None:
+ """
+ Test that failing to specify data AND shape / dtype results in a ValueError
+ """
+ with pytest.raises(ValueError, match="shape was not specified"):
+ await create_array(store, data=None, shape=None, dtype=None)
+
+ # we catch shape=None first, so specifying a dtype should raise the same exception as before
+ with pytest.raises(ValueError, match="shape was not specified"):
+ await create_array(store, data=None, shape=None, dtype="uint8")
+
+ with pytest.raises(ValueError, match="dtype was not specified"):
+ await create_array(store, data=None, shape=(10, 10))
+
+ @staticmethod
+ async def test_data_ignored_params(store: Store) -> None:
+ """
+ Test that specifying data AND shape AND dtype results in a ValueError
+ """
+ data = np.arange(10)
+ with pytest.raises(
+ ValueError, match="The data parameter was used, but the shape parameter was also used."
+ ):
+ await create_array(store, data=data, shape=data.shape, dtype=None, overwrite=True)
+
+ # we catch shape first, so specifying a dtype should raise the same warning as before
+ with pytest.raises(
+ ValueError, match="The data parameter was used, but the shape parameter was also used."
+ ):
+ await create_array(store, data=data, shape=data.shape, dtype=data.dtype, overwrite=True)
+
+ with pytest.raises(
+ ValueError, match="The data parameter was used, but the dtype parameter was also used."
+ ):
+ await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True)
+
+ @staticmethod
+ @pytest.mark.parametrize("order_config", ["C", "F", None])
+ def test_order(
+ order_config: MemoryOrder | None,
+ zarr_format: ZarrFormat,
+ store: MemoryStore,
+ ) -> None:
+ """
+ Test that the arrays generated by array indexing have a memory order defined by the config order
+ value, and that for zarr v2 arrays, the ``order`` field in the array metadata is set correctly.
+ """
+ config: ArrayConfigLike = {}
+ if order_config is None:
+ config = {}
+ expected = zarr.config.get("array.order")
+ else:
+ config = {"order": order_config}
+ expected = order_config
+ if zarr_format == 2:
+ arr = zarr.create_array(
+ store=store,
+ shape=(2, 2),
+ zarr_format=zarr_format,
+ dtype="i4",
+ order=expected,
+ config=config,
+ )
+ # guard for type checking
+ assert arr.metadata.zarr_format == 2
+ assert arr.metadata.order == expected
+ else:
+ arr = zarr.create_array(
+ store=store, shape=(2, 2), zarr_format=zarr_format, dtype="i4", config=config
+ )
+ vals = np.asarray(arr)
+ if expected == "C":
+ assert vals.flags.c_contiguous
+ elif expected == "F":
+ assert vals.flags.f_contiguous
+ else:
+ raise AssertionError
+
+ @staticmethod
+ @pytest.mark.parametrize("write_empty_chunks", [True, False])
+ async def test_write_empty_chunks_config(write_empty_chunks: bool, store: Store) -> None:
+ """
+ Test that the value of write_empty_chunks is sensitive to the global config when not set
+ explicitly
+ """
+ with zarr.config.set({"array.write_empty_chunks": write_empty_chunks}):
+ arr = await create_array(store, shape=(2, 2), dtype="i4")
+ assert arr._config.write_empty_chunks == write_empty_chunks
+
+ @staticmethod
+ @pytest.mark.parametrize("path", [None, "", "/", "/foo", "foo", "foo/bar"])
+ async def test_name(store: Store, zarr_format: ZarrFormat, path: str | None) -> None:
+ arr = await create_array(
+ store, shape=(2, 2), dtype="i4", name=path, zarr_format=zarr_format
+ )
+ if path is None:
+ expected_path = ""
+ elif path.startswith("/"):
+ expected_path = path.lstrip("/")
+ else:
+ expected_path = path
+ assert arr.path == expected_path
+ assert arr.name == "/" + expected_path
- with pytest.raises(
- ValueError, match="The data parameter was used, but the dtype parameter was also used."
- ):
- await create_array(store, data=data, shape=None, dtype=data.dtype, overwrite=True)
+ # test that implicit groups were created
+ path_parts = expected_path.split("/")
+ if len(path_parts) > 1:
+ *parents, _ = ["", *accumulate(path_parts, lambda x, y: "/".join([x, y]))] # noqa: FLY002
+ for parent_path in parents:
+ # this will raise if these groups were not created
+ _ = await zarr.api.asynchronous.open_group(
+ store=store, path=parent_path, mode="r", zarr_format=zarr_format
+ )
@pytest.mark.parametrize("value", [1, 1.4, "a", b"a", np.array(1)])
@@ -1361,9 +1358,18 @@ async def test_orthogonal_set_total_slice() -> None:
"""Ensure that a whole chunk overwrite does not read chunks"""
store = MemoryStore()
array = zarr.create_array(store, shape=(20, 20), chunks=(1, 2), dtype=int, fill_value=-1)
- with mock.patch("zarr.storage.MemoryStore.get", side_effect=ValueError):
+ with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
array[0, slice(4, 10)] = np.arange(6)
+ array = zarr.create_array(
+ store, shape=(20, 21), chunks=(1, 2), dtype=int, fill_value=-1, overwrite=True
+ )
+ with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
+ array[0, :] = np.arange(21)
+
+ with mock.patch("zarr.storage.MemoryStore.get", side_effect=RuntimeError):
+ array[:] = 1
+
@pytest.mark.skipif(
Version(numcodecs.__version__) < Version("0.15.1"),
@@ -1443,3 +1449,18 @@ def test_multiprocessing(store: Store, method: Literal["fork", "spawn", "forkser
results = pool.starmap(_index_array, [(arr, slice(len(data)))])
assert all(np.array_equal(r, data) for r in results)
+
+
+async def test_sharding_coordinate_selection() -> None:
+ store = MemoryStore()
+ g = zarr.open_group(store, mode="w")
+ arr = g.create_array(
+ name="a",
+ shape=(2, 3, 4),
+ chunks=(1, 2, 2),
+ overwrite=True,
+ dtype=np.float32,
+ shards=(2, 4, 4),
+ )
+ arr[:] = np.arange(2 * 3 * 4).reshape((2, 3, 4))
+ assert (arr[1, [0, 1]] == np.array([[12, 13, 14, 15], [16, 17, 18, 19]])).all() # type: ignore[index]
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 932c32f1ae..30d0d75f22 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -19,7 +19,6 @@
OrthogonalSelection,
Selection,
_iter_grid,
- is_total_slice,
make_slice_selection,
normalize_integer_selection,
oindex,
@@ -1954,8 +1953,3 @@ def test_vectorized_indexing_incompatible_shape(store) -> None:
)
with pytest.raises(ValueError, match="Attempting to set"):
arr[np.array([1, 2]), np.array([1, 2])] = np.array([[-1, -2], [-3, -4]])
-
-
-def test_is_total_slice():
- assert is_total_slice((0, slice(4, 6)), (1, 2))
- assert is_total_slice((slice(0, 1, None), slice(4, 6)), (1, 2))
diff --git a/tests/test_properties.py b/tests/test_properties.py
index 2e60c951dd..acecd44810 100644
--- a/tests/test_properties.py
+++ b/tests/test_properties.py
@@ -1,14 +1,26 @@
-import numpy as np
import pytest
from numpy.testing import assert_array_equal
+from zarr.core.buffer import default_buffer_prototype
+
pytest.importorskip("hypothesis")
import hypothesis.extra.numpy as npst
import hypothesis.strategies as st
-from hypothesis import given
-
-from zarr.testing.strategies import arrays, basic_indices, numpy_arrays, zarr_formats
+from hypothesis import assume, given
+
+from zarr.abc.store import Store
+from zarr.core.metadata import ArrayV2Metadata, ArrayV3Metadata
+from zarr.core.sync import sync
+from zarr.testing.strategies import (
+ array_metadata,
+ arrays,
+ basic_indices,
+ numpy_arrays,
+ orthogonal_indices,
+ stores,
+ zarr_formats,
+)
@given(data=st.data(), zarr_format=zarr_formats)
@@ -18,6 +30,24 @@ def test_roundtrip(data: st.DataObject, zarr_format: int) -> None:
assert_array_equal(nparray, zarray[:])
+@given(array=arrays())
+def test_array_creates_implicit_groups(array):
+ path = array.path
+ ancestry = path.split("/")[:-1]
+ for i in range(len(ancestry)):
+ parent = "/".join(ancestry[: i + 1])
+ if array.metadata.zarr_format == 2:
+ assert (
+ sync(array.store.get(f"{parent}/.zgroup", prototype=default_buffer_prototype()))
+ is not None
+ )
+ elif array.metadata.zarr_format == 3:
+ assert (
+ sync(array.store.get(f"{parent}/zarr.json", prototype=default_buffer_prototype()))
+ is not None
+ )
+
+
@given(data=st.data())
def test_basic_indexing(data: st.DataObject) -> None:
zarray = data.draw(arrays())
@@ -26,12 +56,29 @@ def test_basic_indexing(data: st.DataObject) -> None:
actual = zarray[indexer]
assert_array_equal(nparray[indexer], actual)
- new_data = np.ones_like(actual)
+ new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
zarray[indexer] = new_data
nparray[indexer] = new_data
assert_array_equal(nparray, zarray[:])
+@given(data=st.data())
+def test_oindex(data: st.DataObject) -> None:
+ # integer_array_indices can't handle 0-size dimensions.
+ zarray = data.draw(arrays(shapes=npst.array_shapes(max_dims=4, min_side=1)))
+ nparray = zarray[:]
+
+ zindexer, npindexer = data.draw(orthogonal_indices(shape=nparray.shape))
+ actual = zarray.oindex[zindexer]
+ assert_array_equal(nparray[npindexer], actual)
+
+ assume(zarray.shards is None) # GH2834
+ new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
+ nparray[npindexer] = new_data
+ zarray.oindex[zindexer] = new_data
+ assert_array_equal(nparray, zarray[:])
+
+
@given(data=st.data())
def test_vindex(data: st.DataObject) -> None:
# integer_array_indices can't handle 0-size dimensions.
@@ -46,6 +93,25 @@ def test_vindex(data: st.DataObject) -> None:
actual = zarray.vindex[indexer]
assert_array_equal(nparray[indexer], actual)
+ # FIXME!
+ # when the indexer is such that a value gets overwritten multiple times,
+ # I think the output depends on chunking.
+ # new_data = data.draw(npst.arrays(shape=st.just(actual.shape), dtype=nparray.dtype))
+ # nparray[indexer] = new_data
+ # zarray.vindex[indexer] = new_data
+ # assert_array_equal(nparray, zarray[:])
+
+
+@given(store=stores, meta=array_metadata()) # type: ignore[misc]
+async def test_roundtrip_array_metadata(
+ store: Store, meta: ArrayV2Metadata | ArrayV3Metadata
+) -> None:
+ asdict = meta.to_buffer_dict(prototype=default_buffer_prototype())
+ for key, expected in asdict.items():
+ await store.set(f"0/{key}", expected)
+ actual = await store.get(f"0/{key}", prototype=default_buffer_prototype())
+ assert actual == expected
+
# @st.composite
# def advanced_indices(draw, *, shape):
diff --git a/tests/test_store/test_fsspec.py b/tests/test_store/test_fsspec.py
index 929de37869..2e9620f29d 100644
--- a/tests/test_store/test_fsspec.py
+++ b/tests/test_store/test_fsspec.py
@@ -110,7 +110,12 @@ class TestFsspecStoreS3(StoreTests[FsspecStore, cpu.Buffer]):
@pytest.fixture
def store_kwargs(self, request) -> dict[str, str | bool]:
- fs, path = fsspec.url_to_fs(
+ try:
+ from fsspec import url_to_fs
+ except ImportError:
+ # before fsspec==2024.3.1
+ from fsspec.core import url_to_fs
+ fs, path = url_to_fs(
f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=True
)
return {"fs": fs, "path": path}
@@ -182,6 +187,10 @@ async def test_fsspec_store_from_uri(self, store: FsspecStore) -> None:
)
assert dict(group.attrs) == {"key": "value-3"}
+ @pytest.mark.skipif(
+ parse_version(fsspec.__version__) < parse_version("2024.03.01"),
+ reason="Prior bug in from_upath",
+ )
def test_from_upath(self) -> None:
upath = pytest.importorskip("upath")
path = upath.UPath(
@@ -204,7 +213,12 @@ def test_init_raises_if_path_has_scheme(self, store_kwargs) -> None:
self.store_cls(**store_kwargs)
def test_init_warns_if_fs_asynchronous_is_false(self) -> None:
- fs, path = fsspec.url_to_fs(
+ try:
+ from fsspec import url_to_fs
+ except ImportError:
+ # before fsspec==2024.3.1
+ from fsspec.core import url_to_fs
+ fs, path = url_to_fs(
f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=False
)
store_kwargs = {"fs": fs, "path": path}
@@ -217,6 +231,14 @@ async def test_empty_nonexistent_path(self, store_kwargs) -> None:
store = await self.store_cls.open(**store_kwargs)
assert await store.is_empty("")
+ async def test_delete_dir_unsupported_deletes(self, store: FsspecStore) -> None:
+ store.supports_deletes = False
+ with pytest.raises(
+ NotImplementedError,
+ match="This method is only available for stores that support deletes.",
+ ):
+ await store.delete_dir("test_prefix")
+
@pytest.mark.skipif(
parse_version(fsspec.__version__) < parse_version("2024.12.0"),
@@ -244,3 +266,28 @@ def test_no_wrap_async_filesystem():
assert not isinstance(store.fs, AsyncFileSystemWrapper)
assert store.fs.async_impl
+
+
+@pytest.mark.skipif(
+ parse_version(fsspec.__version__) < parse_version("2024.12.0"),
+ reason="No AsyncFileSystemWrapper",
+)
+async def test_delete_dir_wrapped_filesystem(tmpdir) -> None:
+ from fsspec.implementations.asyn_wrapper import AsyncFileSystemWrapper
+ from fsspec.implementations.local import LocalFileSystem
+
+ wrapped_fs = AsyncFileSystemWrapper(LocalFileSystem(auto_mkdir=True))
+ store = FsspecStore(wrapped_fs, read_only=False, path=f"{tmpdir}/test/path")
+
+ assert isinstance(store.fs, AsyncFileSystemWrapper)
+ assert store.fs.asynchronous
+
+ await store.set("zarr.json", cpu.Buffer.from_bytes(b"root"))
+ await store.set("foo-bar/zarr.json", cpu.Buffer.from_bytes(b"root"))
+ await store.set("foo/zarr.json", cpu.Buffer.from_bytes(b"bar"))
+ await store.set("foo/c/0", cpu.Buffer.from_bytes(b"chunk"))
+ await store.delete_dir("foo")
+ assert await store.exists("zarr.json")
+ assert await store.exists("foo-bar/zarr.json")
+ assert not await store.exists("foo/zarr.json")
+ assert not await store.exists("foo/c/0")