diff --git a/docs/api.rst b/docs/api.rst index e200dd908d..2b6e7ea516 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -13,7 +13,6 @@ API reference api/codecs api/attrs api/sync - api/v3 Indices and tables ------------------ diff --git a/docs/api/attrs.rst b/docs/api/attrs.rst index f95e63af3a..067b45fac6 100644 --- a/docs/api/attrs.rst +++ b/docs/api/attrs.rst @@ -1,6 +1,6 @@ -The Attributes class (``zarr.attrs``) -===================================== -.. module:: zarr.attrs +The Attributes class (``zarr.v2.attrs``) +======================================== +.. module:: zarr.v2.attrs .. autoclass:: Attributes diff --git a/docs/api/codecs.rst b/docs/api/codecs.rst index b50f747d74..454c5ccd20 100644 --- a/docs/api/codecs.rst +++ b/docs/api/codecs.rst @@ -1,5 +1,5 @@ -Compressors and filters (``zarr.codecs``) -========================================= +Compressors and filters (``zarr.v2.codecs``) +============================================ .. module:: zarr.codecs This module contains compressor and filter classes for use with Zarr. Please note that this module @@ -8,9 +8,9 @@ onwards, all codec classes have been moved to a separate package called Numcodec packages (Zarr and Numcodecs_) are designed to be used together. For example, a Numcodecs_ codec class can be used as a compressor for a Zarr array:: - >>> import zarr + >>> import zarr.v2 >>> from numcodecs import Blosc - >>> z = zarr.zeros(1000000, compressor=Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)) + >>> z = zarr.v2.zeros(1000000, compressor=Blosc(cname='zstd', clevel=1, shuffle=Blosc.SHUFFLE)) Codec classes can also be used as filters. See the tutorial section on :ref:`tutorial_filters` for more information. diff --git a/docs/api/convenience.rst b/docs/api/convenience.rst index a70a90ce7c..1ff26452fa 100644 --- a/docs/api/convenience.rst +++ b/docs/api/convenience.rst @@ -1,6 +1,6 @@ -Convenience functions (``zarr.convenience``) -============================================ -.. automodule:: zarr.convenience +Convenience functions (``zarr.v2.convenience``) +=============================================== +.. automodule:: zarr.v2.convenience .. autofunction:: open .. autofunction:: save .. autofunction:: load diff --git a/docs/api/core.rst b/docs/api/core.rst index b310460e51..aacd03e2a5 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -1,5 +1,5 @@ -The Array class (``zarr.core``) -=============================== +The Array class (``zarr.v2.core``) +================================== -.. automodapi:: zarr.core +.. automodapi:: zarr.v2.core :no-heading: diff --git a/docs/api/creation.rst b/docs/api/creation.rst index 66422c0670..ad0a2ead49 100644 --- a/docs/api/creation.rst +++ b/docs/api/creation.rst @@ -1,6 +1,6 @@ -Array creation (``zarr.creation``) -================================== -.. module:: zarr.creation +Array creation (``zarr.v2.creation``) +===================================== +.. module:: zarr.v2.creation .. autofunction:: create .. autofunction:: empty .. autofunction:: zeros diff --git a/docs/api/hierarchy.rst b/docs/api/hierarchy.rst index 11a5575144..5d9280af1e 100644 --- a/docs/api/hierarchy.rst +++ b/docs/api/hierarchy.rst @@ -1,6 +1,6 @@ -Groups (``zarr.hierarchy``) -=========================== -.. module:: zarr.hierarchy +Groups (``zarr.v2.hierarchy``) +============================== +.. module:: zarr.v2.hierarchy .. autofunction:: group .. autofunction:: open_group diff --git a/docs/api/n5.rst b/docs/api/n5.rst index b6a8d8c61e..22e490bad4 100644 --- a/docs/api/n5.rst +++ b/docs/api/n5.rst @@ -1,5 +1,5 @@ -N5 (``zarr.n5``) -================ -.. automodule:: zarr.n5 +N5 (``zarr.v2.n5``) +=================== +.. automodule:: zarr.v2.n5 .. autoclass:: N5Store diff --git a/docs/api/storage.rst b/docs/api/storage.rst index 4321837449..d0ebd8a429 100644 --- a/docs/api/storage.rst +++ b/docs/api/storage.rst @@ -1,6 +1,6 @@ -Storage (``zarr.storage``) -========================== -.. automodule:: zarr.storage +Storage (``zarr.v2.storage``) +============================= +.. automodule:: zarr.v2.storage .. autoclass:: MemoryStore .. autoclass:: DirectoryStore diff --git a/docs/api/sync.rst b/docs/api/sync.rst index a139805e78..ff961543af 100644 --- a/docs/api/sync.rst +++ b/docs/api/sync.rst @@ -1,6 +1,6 @@ -Synchronization (``zarr.sync``) -=============================== -.. module:: zarr.sync +Synchronization (``zarr.v2.sync``) +================================== +.. module:: zarr.v2.sync .. autoclass:: ThreadSynchronizer .. autoclass:: ProcessSynchronizer diff --git a/docs/api/v3.rst b/docs/api/v3.rst deleted file mode 100644 index 7665b2ddd1..0000000000 --- a/docs/api/v3.rst +++ /dev/null @@ -1,77 +0,0 @@ -V3 Specification Implementation(``zarr._storage.v3``) -===================================================== - -This module contains the implementation of the `Zarr V3 Specification `_. - -.. warning:: - Since Zarr Python 2.12 release, this module provides experimental infrastructure for reading and - writing the upcoming V3 spec of the Zarr format. Users wishing to prepare for the migration can set - the environment variable ``ZARR_V3_EXPERIMENTAL_API=1`` to begin experimenting, however data - written with this API should be expected to become stale, as the implementation will still change. - -The new ``zarr._store.v3`` package has the necessary classes and functions for evaluating Zarr V3. -Since the design is not finalised, the classes and functions are not automatically imported into -the regular Zarr namespace. - -Code snippet for creating Zarr V3 arrays:: - - >>> import zarr - >>> z = zarr.create((10000, 10000), - >>> chunks=(100, 100), - >>> dtype='f8', - >>> compressor='default', - >>> path='path-where-you-want-zarr-v3-array', - >>> zarr_version=3) - -Further, you can use `z.info` to see details about the array you just created:: - - >>> z.info - Name : path-where-you-want-zarr-v3-array - Type : zarr.core.Array - Data type : float64 - Shape : (10000, 10000) - Chunk shape : (100, 100) - Order : C - Read-only : False - Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : zarr._storage.v3.KVStoreV3 - No. bytes : 800000000 (762.9M) - No. bytes stored : 557 - Storage ratio : 1436265.7 - Chunks initialized : 0/10000 - -You can also check ``Store type`` here (which indicates Zarr V3). - -.. module:: zarr._storage.v3 - -.. autoclass:: RmdirV3 -.. autoclass:: KVStoreV3 -.. autoclass:: FSStoreV3 -.. autoclass:: MemoryStoreV3 -.. autoclass:: DirectoryStoreV3 -.. autoclass:: ZipStoreV3 -.. autoclass:: RedisStoreV3 -.. autoclass:: MongoDBStoreV3 -.. autoclass:: DBMStoreV3 -.. autoclass:: LMDBStoreV3 -.. autoclass:: SQLiteStoreV3 -.. autoclass:: LRUStoreCacheV3 -.. autoclass:: ConsolidatedMetadataStoreV3 - -In v3 `storage transformers `_ -can be set via ``zarr.create(…, storage_transformers=[…])``. -The experimental sharding storage transformer can be tested by setting -the environment variable ``ZARR_V3_SHARDING=1``. Data written with this flag -enabled should be expected to become stale until -`ZEP 2 `_ is approved -and fully implemented. - -.. module:: zarr._storage.v3_storage_transformers - -.. autoclass:: ShardingStorageTransformer - -The abstract base class for storage transformers is - -.. module:: zarr._storage.store - -.. autoclass:: StorageTransformer diff --git a/pyproject.toml b/pyproject.toml index 00c6333aa5..3dcda98980 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,6 +52,10 @@ docs = [ 'pydata-sphinx-theme', 'numpydoc', 'numcodecs[msgpack]', + "msgpack", + "lmdb", + "zstandard", + "crc32c", ] extra = [ 'attrs', @@ -164,31 +168,31 @@ disallow_any_generics = true [[tool.mypy.overrides]] module = [ - "zarr._storage.store", - "zarr._storage.v3_storage_transformers", - "zarr.v3.group", - "zarr.core", - "zarr.hierarchy", - "zarr.indexing", - "zarr.storage", - "zarr.sync", - "zarr.util", + "zarr.v2._storage.store", + "zarr.v2._storage.v3_storage_transformers", + "zarr.group", + "zarr.v2.core", + "zarr.v2.hierarchy", + "zarr.v2.indexing", + "zarr.v2.storage", + "zarr.v2.sync", + "zarr.v2.util", "tests.*", ] check_untyped_defs = false [[tool.mypy.overrides]] module = [ - "zarr.v3.abc.codec", - "zarr.v3.codecs.bytes", - "zarr.v3.codecs.pipeline", - "zarr.v3.codecs.sharding", - "zarr.v3.codecs.transpose", - "zarr.v3.array_v2", - "zarr.v3.array", - "zarr.v3.sync", - "zarr.convenience", - "zarr.meta", + "zarr.abc.codec", + "zarr.codecs.bytes", + "zarr.codecs.pipeline", + "zarr.codecs.sharding", + "zarr.codecs.transpose", + "zarr.array_v2", + "zarr.array", + "zarr.sync", + "zarr.v2.convenience", + "zarr.v2.meta", ] disallow_any_generics = false diff --git a/src/zarr/__init__.py b/src/zarr/__init__.py index 601b1295ab..9ae9dc54c4 100644 --- a/src/zarr/__init__.py +++ b/src/zarr/__init__.py @@ -1,54 +1,45 @@ -# flake8: noqa -from zarr.codecs import * -from zarr.convenience import ( - consolidate_metadata, - copy, - copy_all, - copy_store, - load, - open, - open_consolidated, - save, - save_array, - save_group, - tree, -) -from zarr.core import Array -from zarr.creation import ( - array, - create, - empty, - empty_like, - full, - full_like, - ones, - ones_like, - open_array, - open_like, - zeros, - zeros_like, -) -from zarr.errors import CopyError, MetadataError -from zarr.hierarchy import Group, group, open_group -from zarr.n5 import N5Store, N5FSStore -from zarr.storage import ( - ABSStore, - DBMStore, - DictStore, - DirectoryStore, - KVStore, - LMDBStore, - LRUStoreCache, - MemoryStore, - MongoDBStore, - NestedDirectoryStore, - RedisStore, - SQLiteStore, - TempStore, - ZipStore, +from __future__ import annotations + +from typing import Union + +import zarr.codecs # noqa: F401 +from zarr.array import Array, AsyncArray # noqa: F401 +from zarr.array_v2 import ArrayV2 +from zarr.config import RuntimeConfiguration # noqa: F401 +from zarr.group import AsyncGroup, Group # noqa: F401 +from zarr.metadata import runtime_configuration # noqa: F401 +from zarr.store import ( # noqa: F401 + StoreLike, + make_store_path, ) -from zarr.sync import ProcessSynchronizer, ThreadSynchronizer +from zarr.sync import sync as _sync from zarr._version import version as __version__ # in case setuptools scm screw up and find version to be 0.0.0 assert not __version__.startswith("0.0.0") + + +async def open_auto_async( + store: StoreLike, + runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(), +) -> Union[AsyncArray, AsyncGroup]: + store_path = make_store_path(store) + try: + return await AsyncArray.open(store_path, runtime_configuration=runtime_configuration_) + except KeyError: + return await AsyncGroup.open(store_path, runtime_configuration=runtime_configuration_) + + +def open_auto( + store: StoreLike, + runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(), +) -> Union[Array, ArrayV2, Group]: + object = _sync( + open_auto_async(store, runtime_configuration_), + runtime_configuration_.asyncio_loop, + ) + if isinstance(object, AsyncArray): + return Array(object) + if isinstance(object, AsyncGroup): + return Group(object) + raise TypeError(f"Unexpected object type. Got {type(object)}.") diff --git a/src/zarr/_storage/__init__.py b/src/zarr/abc/__init__.py similarity index 100% rename from src/zarr/_storage/__init__.py rename to src/zarr/abc/__init__.py diff --git a/src/zarr/v3/abc/codec.py b/src/zarr/abc/codec.py similarity index 90% rename from src/zarr/v3/abc/codec.py rename to src/zarr/abc/codec.py index d0e51ff894..1abc21b30b 100644 --- a/src/zarr/v3/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -4,17 +4,17 @@ from typing import TYPE_CHECKING, Optional import numpy as np -from zarr.v3.abc.metadata import Metadata +from zarr.abc.metadata import Metadata -from zarr.v3.common import ArraySpec -from zarr.v3.store import StorePath +from zarr.common import ArraySpec +from zarr.store import StorePath if TYPE_CHECKING: from typing_extensions import Self - from zarr.v3.common import BytesLike, SliceSelection - from zarr.v3.metadata import ArrayMetadata - from zarr.v3.config import RuntimeConfiguration + from zarr.common import BytesLike, SliceSelection + from zarr.metadata import ArrayMetadata + from zarr.config import RuntimeConfiguration class Codec(Metadata): diff --git a/src/zarr/v3/abc/metadata.py b/src/zarr/abc/metadata.py similarity index 97% rename from src/zarr/v3/abc/metadata.py rename to src/zarr/abc/metadata.py index 4fcabf72a1..f27b37cba4 100644 --- a/src/zarr/v3/abc/metadata.py +++ b/src/zarr/abc/metadata.py @@ -7,7 +7,7 @@ from dataclasses import fields, dataclass -from zarr.v3.common import JSON +from zarr.common import JSON @dataclass(frozen=True) diff --git a/src/zarr/v3/abc/store.py b/src/zarr/abc/store.py similarity index 100% rename from src/zarr/v3/abc/store.py rename to src/zarr/abc/store.py diff --git a/src/zarr/v3/array.py b/src/zarr/array.py similarity index 97% rename from src/zarr/v3/array.py rename to src/zarr/array.py index c0a00a624e..b739b310d4 100644 --- a/src/zarr/v3/array.py +++ b/src/zarr/array.py @@ -16,12 +16,12 @@ from typing import Any, Dict, Iterable, Literal, Optional, Tuple, Union import numpy as np -from zarr.v3.abc.codec import Codec +from zarr.abc.codec import Codec -# from zarr.v3.array_v2 import ArrayV2 -from zarr.v3.codecs import BytesCodec -from zarr.v3.common import ( +# from zarr.array_v2 import ArrayV2 +from zarr.codecs import BytesCodec +from zarr.common import ( ZARR_JSON, ArraySpec, ChunkCoords, @@ -29,14 +29,14 @@ SliceSelection, concurrent_map, ) -from zarr.v3.config import RuntimeConfiguration - -from zarr.v3.indexing import BasicIndexer, all_chunk_coords, is_total_slice -from zarr.v3.chunk_grids import RegularChunkGrid -from zarr.v3.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding -from zarr.v3.metadata import ArrayMetadata -from zarr.v3.store import StoreLike, StorePath, make_store_path -from zarr.v3.sync import sync +from zarr.config import RuntimeConfiguration + +from zarr.indexing import BasicIndexer, all_chunk_coords, is_total_slice +from zarr.chunk_grids import RegularChunkGrid +from zarr.chunk_key_encodings import DefaultChunkKeyEncoding, V2ChunkKeyEncoding +from zarr.metadata import ArrayMetadata +from zarr.store import StoreLike, StorePath, make_store_path +from zarr.sync import sync def parse_array_metadata(data: Any): diff --git a/src/zarr/v3/array_v2.py b/src/zarr/array_v2.py similarity index 96% rename from src/zarr/v3/array_v2.py rename to src/zarr/array_v2.py index f150d2dbd2..8c2cd3faec 100644 --- a/src/zarr/v3/array_v2.py +++ b/src/zarr/array_v2.py @@ -10,7 +10,7 @@ from numcodecs.compat import ensure_bytes, ensure_ndarray -from zarr.v3.common import ( +from zarr.common import ( ZARRAY_JSON, ZATTRS_JSON, BytesLike, @@ -20,14 +20,14 @@ concurrent_map, to_thread, ) -from zarr.v3.config import RuntimeConfiguration -from zarr.v3.indexing import BasicIndexer, all_chunk_coords, is_total_slice -from zarr.v3.metadata import ArrayV2Metadata -from zarr.v3.store import StoreLike, StorePath, make_store_path -from zarr.v3.sync import sync +from zarr.config import RuntimeConfiguration +from zarr.indexing import BasicIndexer, all_chunk_coords, is_total_slice +from zarr.metadata import ArrayV2Metadata +from zarr.store import StoreLike, StorePath, make_store_path +from zarr.sync import sync if TYPE_CHECKING: - from zarr.v3.array import Array + from zarr.array import Array @dataclass(frozen=True) @@ -441,14 +441,14 @@ def resize(self, new_shape: ChunkCoords) -> ArrayV2: async def convert_to_v3_async(self) -> Array: from sys import byteorder as sys_byteorder - from zarr.v3.abc.codec import Codec - from zarr.v3.array import Array - from zarr.v3.common import ZARR_JSON - from zarr.v3.chunk_grids import RegularChunkGrid - from zarr.v3.chunk_key_encodings import V2ChunkKeyEncoding - from zarr.v3.metadata import ArrayMetadata, DataType + from zarr.abc.codec import Codec + from zarr.array import Array + from zarr.common import ZARR_JSON + from zarr.chunk_grids import RegularChunkGrid + from zarr.chunk_key_encodings import V2ChunkKeyEncoding + from zarr.metadata import ArrayMetadata, DataType - from zarr.v3.codecs import ( + from zarr.codecs import ( BloscCodec, BloscShuffle, BytesCodec, diff --git a/src/zarr/v3/attributes.py b/src/zarr/attributes.py similarity index 92% rename from src/zarr/v3/attributes.py rename to src/zarr/attributes.py index edbc84d8aa..8086e18d7b 100644 --- a/src/zarr/v3/attributes.py +++ b/src/zarr/attributes.py @@ -3,8 +3,8 @@ from typing import TYPE_CHECKING, Any, Union if TYPE_CHECKING: - from zarr.v3.group import Group - from zarr.v3.array import Array + from zarr.group import Group + from zarr.array import Array class Attributes(MutableMapping[str, Any]): diff --git a/src/zarr/v3/chunk_grids.py b/src/zarr/chunk_grids.py similarity index 94% rename from src/zarr/v3/chunk_grids.py rename to src/zarr/chunk_grids.py index b0a2a7bb36..73557f6e4b 100644 --- a/src/zarr/v3/chunk_grids.py +++ b/src/zarr/chunk_grids.py @@ -1,9 +1,9 @@ from __future__ import annotations from typing import TYPE_CHECKING, Any, Dict from dataclasses import dataclass -from zarr.v3.abc.metadata import Metadata +from zarr.abc.metadata import Metadata -from zarr.v3.common import ( +from zarr.common import ( JSON, ChunkCoords, ChunkCoordsLike, diff --git a/src/zarr/v3/chunk_key_encodings.py b/src/zarr/chunk_key_encodings.py similarity index 97% rename from src/zarr/v3/chunk_key_encodings.py rename to src/zarr/chunk_key_encodings.py index 9889a2f04a..ebc7654dde 100644 --- a/src/zarr/v3/chunk_key_encodings.py +++ b/src/zarr/chunk_key_encodings.py @@ -2,9 +2,9 @@ from abc import abstractmethod from typing import TYPE_CHECKING, Dict, Literal, cast from dataclasses import dataclass -from zarr.v3.abc.metadata import Metadata +from zarr.abc.metadata import Metadata -from zarr.v3.common import ( +from zarr.common import ( JSON, ChunkCoords, parse_named_configuration, diff --git a/src/zarr/codecs/__init__.py b/src/zarr/codecs/__init__.py new file mode 100644 index 0000000000..8fa0c9f7b0 --- /dev/null +++ b/src/zarr/codecs/__init__.py @@ -0,0 +1,9 @@ +from __future__ import annotations + +from zarr.codecs.blosc import BloscCodec, BloscCname, BloscShuffle # noqa: F401 +from zarr.codecs.bytes import BytesCodec, Endian # noqa: F401 +from zarr.codecs.crc32c_ import Crc32cCodec # noqa: F401 +from zarr.codecs.gzip import GzipCodec # noqa: F401 +from zarr.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation # noqa: F401 +from zarr.codecs.transpose import TransposeCodec # noqa: F401 +from zarr.codecs.zstd import ZstdCodec # noqa: F401 diff --git a/src/zarr/v3/codecs/blosc.py b/src/zarr/codecs/blosc.py similarity index 95% rename from src/zarr/v3/codecs/blosc.py rename to src/zarr/codecs/blosc.py index 479865241f..374375e6c2 100644 --- a/src/zarr/v3/codecs/blosc.py +++ b/src/zarr/codecs/blosc.py @@ -9,15 +9,15 @@ import numpy as np from numcodecs.blosc import Blosc -from zarr.v3.abc.codec import BytesBytesCodec -from zarr.v3.codecs.registry import register_codec -from zarr.v3.common import parse_enum, parse_named_configuration, to_thread +from zarr.abc.codec import BytesBytesCodec +from zarr.codecs.registry import register_codec +from zarr.common import parse_enum, parse_named_configuration, to_thread if TYPE_CHECKING: from typing import Dict, Optional from typing_extensions import Self - from zarr.v3.common import JSON, ArraySpec, BytesLike - from zarr.v3.config import RuntimeConfiguration + from zarr.common import JSON, ArraySpec, BytesLike + from zarr.config import RuntimeConfiguration class BloscShuffle(Enum): diff --git a/src/zarr/v3/codecs/bytes.py b/src/zarr/codecs/bytes.py similarity index 92% rename from src/zarr/v3/codecs/bytes.py rename to src/zarr/codecs/bytes.py index f92fe5606d..1b872ac6c6 100644 --- a/src/zarr/v3/codecs/bytes.py +++ b/src/zarr/codecs/bytes.py @@ -7,13 +7,13 @@ import numpy as np -from zarr.v3.abc.codec import ArrayBytesCodec -from zarr.v3.codecs.registry import register_codec -from zarr.v3.common import parse_enum, parse_named_configuration +from zarr.abc.codec import ArrayBytesCodec +from zarr.codecs.registry import register_codec +from zarr.common import parse_enum, parse_named_configuration if TYPE_CHECKING: - from zarr.v3.common import JSON, ArraySpec, BytesLike - from zarr.v3.config import RuntimeConfiguration + from zarr.common import JSON, ArraySpec, BytesLike + from zarr.config import RuntimeConfiguration from typing_extensions import Self diff --git a/src/zarr/v3/codecs/crc32c_.py b/src/zarr/codecs/crc32c_.py similarity index 86% rename from src/zarr/v3/codecs/crc32c_.py rename to src/zarr/codecs/crc32c_.py index 555bdeae3b..04d5b88d70 100644 --- a/src/zarr/v3/codecs/crc32c_.py +++ b/src/zarr/codecs/crc32c_.py @@ -7,15 +7,15 @@ from crc32c import crc32c -from zarr.v3.abc.codec import BytesBytesCodec -from zarr.v3.codecs.registry import register_codec -from zarr.v3.common import parse_named_configuration +from zarr.abc.codec import BytesBytesCodec +from zarr.codecs.registry import register_codec +from zarr.common import parse_named_configuration if TYPE_CHECKING: from typing import Dict, Optional from typing_extensions import Self - from zarr.v3.common import JSON, BytesLike, ArraySpec - from zarr.v3.config import RuntimeConfiguration + from zarr.common import JSON, BytesLike, ArraySpec + from zarr.config import RuntimeConfiguration @dataclass(frozen=True) diff --git a/src/zarr/v3/codecs/gzip.py b/src/zarr/codecs/gzip.py similarity index 87% rename from src/zarr/v3/codecs/gzip.py rename to src/zarr/codecs/gzip.py index 478eee90c1..f75f5b743e 100644 --- a/src/zarr/v3/codecs/gzip.py +++ b/src/zarr/codecs/gzip.py @@ -4,15 +4,15 @@ from typing import TYPE_CHECKING from numcodecs.gzip import GZip -from zarr.v3.abc.codec import BytesBytesCodec -from zarr.v3.codecs.registry import register_codec -from zarr.v3.common import parse_named_configuration, to_thread +from zarr.abc.codec import BytesBytesCodec +from zarr.codecs.registry import register_codec +from zarr.common import parse_named_configuration, to_thread if TYPE_CHECKING: from typing import Optional, Dict from typing_extensions import Self - from zarr.v3.common import JSON, ArraySpec, BytesLike - from zarr.v3.config import RuntimeConfiguration + from zarr.common import JSON, ArraySpec, BytesLike + from zarr.config import RuntimeConfiguration def parse_gzip_level(data: JSON) -> int: diff --git a/src/zarr/v3/codecs/pipeline.py b/src/zarr/codecs/pipeline.py similarity index 95% rename from src/zarr/v3/codecs/pipeline.py rename to src/zarr/codecs/pipeline.py index 7bb872eb79..4908ee8057 100644 --- a/src/zarr/v3/codecs/pipeline.py +++ b/src/zarr/codecs/pipeline.py @@ -5,7 +5,7 @@ from dataclasses import dataclass from warnings import warn -from zarr.v3.abc.codec import ( +from zarr.abc.codec import ( ArrayArrayCodec, ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, @@ -13,16 +13,16 @@ BytesBytesCodec, Codec, ) -from zarr.v3.abc.metadata import Metadata -from zarr.v3.codecs.registry import get_codec_class -from zarr.v3.common import parse_named_configuration +from zarr.abc.metadata import Metadata +from zarr.codecs.registry import get_codec_class +from zarr.common import parse_named_configuration if TYPE_CHECKING: from typing import Iterator, List, Optional, Tuple, Union - from zarr.v3.store import StorePath - from zarr.v3.metadata import ArrayMetadata - from zarr.v3.config import RuntimeConfiguration - from zarr.v3.common import JSON, ArraySpec, BytesLike, SliceSelection + from zarr.store import StorePath + from zarr.metadata import ArrayMetadata + from zarr.config import RuntimeConfiguration + from zarr.common import JSON, ArraySpec, BytesLike, SliceSelection @dataclass(frozen=True) @@ -53,7 +53,7 @@ def evolve(self, array_spec: ArraySpec) -> CodecPipeline: @classmethod def from_list(cls, codecs: List[Codec]) -> CodecPipeline: - from zarr.v3.codecs.sharding import ShardingCodec + from zarr.codecs.sharding import ShardingCodec if not any(isinstance(codec, ArrayBytesCodec) for codec in codecs): raise ValueError("Exactly one array-to-bytes codec is required.") diff --git a/src/zarr/v3/codecs/registry.py b/src/zarr/codecs/registry.py similarity index 96% rename from src/zarr/v3/codecs/registry.py rename to src/zarr/codecs/registry.py index 4cf2736685..140e1372ef 100644 --- a/src/zarr/v3/codecs/registry.py +++ b/src/zarr/codecs/registry.py @@ -3,7 +3,7 @@ if TYPE_CHECKING: from typing import Dict, Type - from zarr.v3.abc.codec import Codec + from zarr.abc.codec import Codec from importlib.metadata import EntryPoint, entry_points as get_entry_points diff --git a/src/zarr/v3/codecs/sharding.py b/src/zarr/codecs/sharding.py similarity index 97% rename from src/zarr/v3/codecs/sharding.py rename to src/zarr/codecs/sharding.py index 0385154c0f..948e46f132 100644 --- a/src/zarr/v3/codecs/sharding.py +++ b/src/zarr/codecs/sharding.py @@ -6,17 +6,17 @@ import numpy as np -from zarr.v3.abc.codec import ( +from zarr.abc.codec import ( Codec, ArrayBytesCodec, ArrayBytesCodecPartialDecodeMixin, ArrayBytesCodecPartialEncodeMixin, ) -from zarr.v3.codecs.bytes import BytesCodec -from zarr.v3.codecs.crc32c_ import Crc32cCodec -from zarr.v3.codecs.pipeline import CodecPipeline -from zarr.v3.codecs.registry import register_codec -from zarr.v3.common import ( +from zarr.codecs.bytes import BytesCodec +from zarr.codecs.crc32c_ import Crc32cCodec +from zarr.codecs.pipeline import CodecPipeline +from zarr.codecs.registry import register_codec +from zarr.common import ( ArraySpec, ChunkCoordsLike, concurrent_map, @@ -25,14 +25,14 @@ parse_shapelike, product, ) -from zarr.v3.chunk_grids import RegularChunkGrid -from zarr.v3.indexing import ( +from zarr.chunk_grids import RegularChunkGrid +from zarr.indexing import ( BasicIndexer, c_order_iter, is_total_slice, morton_order_iter, ) -from zarr.v3.metadata import ( +from zarr.metadata import ( ArrayMetadata, runtime_configuration as make_runtime_configuration, parse_codecs, @@ -42,14 +42,14 @@ from typing import Awaitable, Callable, Dict, Iterator, List, Optional, Set, Tuple from typing_extensions import Self - from zarr.v3.store import StorePath - from zarr.v3.common import ( + from zarr.store import StorePath + from zarr.common import ( JSON, ChunkCoords, BytesLike, SliceSelection, ) - from zarr.v3.config import RuntimeConfiguration + from zarr.config import RuntimeConfiguration MAX_UINT_64 = 2**64 - 1 diff --git a/src/zarr/v3/codecs/transpose.py b/src/zarr/codecs/transpose.py similarity index 92% rename from src/zarr/v3/codecs/transpose.py rename to src/zarr/codecs/transpose.py index b663230e35..c63327f6fc 100644 --- a/src/zarr/v3/codecs/transpose.py +++ b/src/zarr/codecs/transpose.py @@ -3,17 +3,17 @@ from dataclasses import dataclass, replace -from zarr.v3.common import JSON, ArraySpec, ChunkCoordsLike, parse_named_configuration +from zarr.common import JSON, ArraySpec, ChunkCoordsLike, parse_named_configuration if TYPE_CHECKING: - from zarr.v3.config import RuntimeConfiguration + from zarr.config import RuntimeConfiguration from typing import TYPE_CHECKING, Optional, Tuple from typing_extensions import Self import numpy as np -from zarr.v3.abc.codec import ArrayArrayCodec -from zarr.v3.codecs.registry import register_codec +from zarr.abc.codec import ArrayArrayCodec +from zarr.codecs.registry import register_codec def parse_transpose_order(data: Union[JSON, Iterable[int]]) -> Tuple[int, ...]: @@ -65,7 +65,7 @@ def evolve(self, array_spec: ArraySpec) -> Self: return self def resolve_metadata(self, chunk_spec: ArraySpec) -> ArraySpec: - from zarr.v3.common import ArraySpec + from zarr.common import ArraySpec return ArraySpec( shape=tuple(chunk_spec.shape[self.order[i]] for i in range(chunk_spec.ndim)), diff --git a/src/zarr/v3/codecs/zstd.py b/src/zarr/codecs/zstd.py similarity index 89% rename from src/zarr/v3/codecs/zstd.py rename to src/zarr/codecs/zstd.py index 774bb8bdbb..41db850ab6 100644 --- a/src/zarr/v3/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -5,15 +5,15 @@ from zstandard import ZstdCompressor, ZstdDecompressor -from zarr.v3.abc.codec import BytesBytesCodec -from zarr.v3.codecs.registry import register_codec -from zarr.v3.common import parse_named_configuration, to_thread +from zarr.abc.codec import BytesBytesCodec +from zarr.codecs.registry import register_codec +from zarr.common import parse_named_configuration, to_thread if TYPE_CHECKING: from typing import Dict, Optional from typing_extensions import Self - from zarr.v3.config import RuntimeConfiguration - from zarr.v3.common import BytesLike, JSON, ArraySpec + from zarr.config import RuntimeConfiguration + from zarr.common import BytesLike, JSON, ArraySpec def parse_zstd_level(data: JSON) -> int: diff --git a/src/zarr/v3/common.py b/src/zarr/common.py similarity index 100% rename from src/zarr/v3/common.py rename to src/zarr/common.py diff --git a/src/zarr/v3/config.py b/src/zarr/config.py similarity index 100% rename from src/zarr/v3/config.py rename to src/zarr/config.py diff --git a/src/zarr/v3/group.py b/src/zarr/group.py similarity index 97% rename from src/zarr/v3/group.py rename to src/zarr/group.py index fcd2fea215..aff24ed0d9 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/group.py @@ -5,14 +5,14 @@ import json import logging from typing import Any, Dict, Literal, Optional, Union, AsyncIterator, List -from zarr.v3.abc.metadata import Metadata - -from zarr.v3.array import AsyncArray, Array -from zarr.v3.attributes import Attributes -from zarr.v3.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON -from zarr.v3.config import RuntimeConfiguration, SyncConfiguration -from zarr.v3.store import StoreLike, StorePath, make_store_path -from zarr.v3.sync import SyncMixin, sync +from zarr.abc.metadata import Metadata + +from zarr.array import AsyncArray, Array +from zarr.attributes import Attributes +from zarr.common import ZARR_JSON, ZARRAY_JSON, ZATTRS_JSON, ZGROUP_JSON +from zarr.config import RuntimeConfiguration, SyncConfiguration +from zarr.store import StoreLike, StorePath, make_store_path +from zarr.sync import SyncMixin, sync logger = logging.getLogger("zarr.group") diff --git a/src/zarr/indexing.py b/src/zarr/indexing.py index b72d5a255d..7c1a4df226 100644 --- a/src/zarr/indexing.py +++ b/src/zarr/indexing.py @@ -1,187 +1,83 @@ -import collections +from __future__ import annotations + import itertools import math -import numbers - -import numpy as np - - -from zarr.errors import ( - ArrayIndexError, - NegativeStepError, - err_too_many_indices, - VindexInvalidSelectionError, - BoundsCheckError, -) - - -def is_integer(x): - """True if x is an integer (both pure Python or NumPy). - - Note that Python's bool is considered an integer too. - """ - return isinstance(x, numbers.Integral) - - -def is_integer_list(x): - """True if x is a list of integers. +from typing import Iterator, List, NamedTuple, Optional, Tuple - This function assumes ie *does not check* that all elements of the list - have the same type. Mixed type lists will result in other errors that will - bubble up anyway. - """ - return isinstance(x, list) and len(x) > 0 and is_integer(x[0]) - - -def is_integer_array(x, ndim=None): - t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" - if ndim is not None: - t = t and len(x.shape) == ndim - return t - - -def is_bool_array(x, ndim=None): - t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool - if ndim is not None: - t = t and len(x.shape) == ndim - return t - - -def is_scalar(value, dtype): - if np.isscalar(value): - return True - if isinstance(value, tuple) and dtype.names and len(value) == len(dtype.names): - return True - return False +from zarr.common import ChunkCoords, Selection, SliceSelection, product -def is_pure_fancy_indexing(selection, ndim): - """Check whether a selection contains only scalars or integer array-likes. - - Parameters - ---------- - selection : tuple, slice, or scalar - A valid selection value for indexing into arrays. - - Returns - ------- - is_pure : bool - True if the selection is a pure fancy indexing expression (ie not mixed - with boolean or slices). - """ - if ndim == 1: - if is_integer_list(selection) or is_integer_array(selection): - return True - # if not, we go through the normal path below, because a 1-tuple - # of integers is also allowed. - no_slicing = ( - isinstance(selection, tuple) - and len(selection) == ndim - and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) - ) - return ( - no_slicing - and all( - is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) - for elem in selection - ) - and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) - ) - - -def is_pure_orthogonal_indexing(selection, ndim): - if not ndim: - return False +def _ensure_tuple(v: Selection) -> SliceSelection: + if not isinstance(v, tuple): + v = (v,) + return v - # Case 1: Selection is a single iterable of integers - if is_integer_list(selection) or is_integer_array(selection, ndim=1): - return True - # Case two: selection contains either zero or one integer iterables. - # All other selection elements are slices or integers - return ( - isinstance(selection, tuple) - and len(selection) == ndim - and sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 - and all( - is_integer_list(elem) or is_integer_array(elem) or isinstance(elem, (int, slice)) - for elem in selection - ) +def _err_too_many_indices(selection: SliceSelection, shape: ChunkCoords): + raise IndexError( + "too many indices for array; expected {}, got {}".format(len(shape), len(selection)) ) -def normalize_integer_selection(dim_sel, dim_len): - # normalize type to int - dim_sel = int(dim_sel) +def _err_negative_step(): + raise IndexError("only slices with step >= 1 are supported") - # handle wraparound - if dim_sel < 0: - dim_sel = dim_len + dim_sel - # handle out of bounds - if dim_sel >= dim_len or dim_sel < 0: - raise BoundsCheckError(dim_len) +def _check_selection_length(selection: SliceSelection, shape: ChunkCoords): + if len(selection) > len(shape): + _err_too_many_indices(selection, shape) - return dim_sel +def _ensure_selection( + selection: Selection, + shape: ChunkCoords, +) -> SliceSelection: + selection = _ensure_tuple(selection) -ChunkDimProjection = collections.namedtuple( - "ChunkDimProjection", ("dim_chunk_ix", "dim_chunk_sel", "dim_out_sel") -) -"""A mapping from chunk to output array for a single dimension. + # fill out selection if not completely specified + if len(selection) < len(shape): + selection += (slice(None),) * (len(shape) - len(selection)) -Parameters ----------- -dim_chunk_ix - Index of chunk. -dim_chunk_sel - Selection of items from chunk array. -dim_out_sel - Selection of items in target (output) array. + # check selection not too long + _check_selection_length(selection, shape) -""" + return selection -class IntDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize - dim_sel = normalize_integer_selection(dim_sel, dim_len) +class _ChunkDimProjection(NamedTuple): + dim_chunk_ix: int + dim_chunk_sel: slice + dim_out_sel: Optional[slice] - # store attributes - self.dim_sel = dim_sel - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nitems = 1 - def __iter__(self): - dim_chunk_ix = self.dim_sel // self.dim_chunk_len - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel - dim_offset - dim_out_sel = None - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) +def _ceildiv(a, b): + return math.ceil(a / b) -def ceildiv(a, b): - return math.ceil(a / b) +class _SliceDimIndexer: + dim_sel: slice + dim_len: int + dim_chunk_len: int + nitems: int + start: int + stop: int + step: int -class SliceDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): - # normalize + def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int): self.start, self.stop, self.step = dim_sel.indices(dim_len) if self.step < 1: - raise NegativeStepError() + _err_negative_step() - # store attributes self.dim_len = dim_len self.dim_chunk_len = dim_chunk_len - self.nitems = max(0, ceildiv((self.stop - self.start), self.step)) - self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) + self.nitems = max(0, _ceildiv((self.stop - self.start), self.step)) + self.nchunks = _ceildiv(self.dim_len, self.dim_chunk_len) - def __iter__(self): + def __iter__(self) -> Iterator[_ChunkDimProjection]: # figure out the range of chunks we need to visit dim_chunk_ix_from = self.start // self.dim_chunk_len - dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) + dim_chunk_ix_to = _ceildiv(self.stop, self.dim_chunk_len) # iterate over chunks in range for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): @@ -199,7 +95,7 @@ def __iter__(self): if remainder: dim_chunk_sel_start += self.step - remainder # compute number of previous items, provides offset into output array - dim_out_offset = ceildiv((dim_offset - self.start), self.step) + dim_out_offset = _ceildiv((dim_offset - self.start), self.step) else: # selection starts within current chunk @@ -215,522 +111,38 @@ def __iter__(self): dim_chunk_sel_stop = self.stop - dim_offset dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) - dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) - - # If there are no elements on the selection within this chunk, then skip - if dim_chunk_nitems == 0: - continue - + dim_chunk_nitems = _ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems) - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -def check_selection_length(selection, shape): - if len(selection) > len(shape): - err_too_many_indices(selection, shape) - - -def replace_ellipsis(selection, shape): - selection = ensure_tuple(selection) - - # count number of ellipsis present - n_ellipsis = sum(1 for i in selection if i is Ellipsis) - - if n_ellipsis > 1: - # more than 1 is an error - raise IndexError("an index can only have a single ellipsis ('...')") + yield _ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - elif n_ellipsis == 1: - # locate the ellipsis, count how many items to left and right - n_items_l = selection.index(Ellipsis) # items to left of ellipsis - n_items_r = len(selection) - (n_items_l + 1) # items to right of ellipsis - n_items = len(selection) - 1 # all non-ellipsis items - if n_items >= len(shape): - # ellipsis does nothing, just remove it - selection = tuple(i for i in selection if i != Ellipsis) +class _ChunkProjection(NamedTuple): + chunk_coords: ChunkCoords + chunk_selection: SliceSelection + out_selection: SliceSelection - else: - # replace ellipsis with as many slices are needed for number of dims - new_item = selection[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) - if n_items_r: - new_item += selection[-n_items_r:] - selection = new_item - - # fill out selection if not completely specified - if len(selection) < len(shape): - selection += (slice(None),) * (len(shape) - len(selection)) - - # check selection not too long - check_selection_length(selection, shape) - - return selection - - -def replace_lists(selection): - return tuple( - np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection - ) - - -def ensure_tuple(v): - if not isinstance(v, tuple): - v = (v,) - return v - -ChunkProjection = collections.namedtuple( - "ChunkProjection", ("chunk_coords", "chunk_selection", "out_selection") -) -"""A mapping of items from chunk to output array. Can be used to extract items from the -chunk array for loading into an output array. Can also be used to extract items from a -value array for setting/updating in a chunk array. - -Parameters ----------- -chunk_coords - Indices of chunk. -chunk_selection - Selection of items from chunk array. -out_selection - Selection of items in target (output) array. - -""" - - -def is_slice(s): - return isinstance(s, slice) - - -def is_contiguous_slice(s): - return is_slice(s) and (s.step is None or s.step == 1) - - -def is_positive_slice(s): - return is_slice(s) and (s.step is None or s.step >= 1) - - -def is_contiguous_selection(selection): - selection = ensure_tuple(selection) - return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) - - -def is_basic_selection(selection): - selection = ensure_tuple(selection) - return all(is_integer(s) or is_positive_slice(s) for s in selection) - - -# noinspection PyProtectedMember class BasicIndexer: - def __init__(self, selection, array): - # handle ellipsis - selection = replace_ellipsis(selection, array._shape) - - # setup per-dimension indexers - dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): - dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif is_slice(dim_sel): - dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) - - else: - raise IndexError( - "unsupported selection item for basic indexing; " - "expected integer or slice, got {!r}".format(type(dim_sel)) - ) - - dim_indexers.append(dim_indexer) - - self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) - self.drop_axes = None - - def __iter__(self): - for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) - chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple( - p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None - ) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -class BoolArrayDimIndexer: - def __init__(self, dim_sel, dim_len, dim_chunk_len): - # check number of dimensions - if not is_bool_array(dim_sel, 1): - raise IndexError( - "Boolean arrays in an orthogonal selection must " "be 1-dimensional only" - ) - - # check shape - if dim_sel.shape[0] != dim_len: - raise IndexError( - "Boolean array has the wrong length for dimension; " "expected {}, got {}".format( - dim_len, dim_sel.shape[0] - ) - ) - - # store attributes - self.dim_sel = dim_sel - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) - - # precompute number of selected items for each chunk - self.chunk_nitems = np.zeros(self.nchunks, dtype="i8") - for dim_chunk_ix in range(self.nchunks): - dim_offset = dim_chunk_ix * self.dim_chunk_len - self.chunk_nitems[dim_chunk_ix] = np.count_nonzero( - self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] - ) - self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) - self.nitems = self.chunk_nitems_cumsum[-1] - self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] - - def __iter__(self): - # iterate over chunks with at least one item - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in chunk - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] - - # pad out if final chunk - if dim_chunk_sel.shape[0] < self.dim_chunk_len: - tmp = np.zeros(self.dim_chunk_len, dtype=bool) - tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel - dim_chunk_sel = tmp - - # find region in output - if dim_chunk_ix == 0: - start = 0 - else: - start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] - stop = self.chunk_nitems_cumsum[dim_chunk_ix] - dim_out_sel = slice(start, stop) - - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -class Order: - UNKNOWN = 0 - INCREASING = 1 - DECREASING = 2 - UNORDERED = 3 - - @staticmethod - def check(a): - diff = np.diff(a) - diff_positive = diff >= 0 - n_diff_positive = np.count_nonzero(diff_positive) - all_increasing = n_diff_positive == len(diff_positive) - any_increasing = n_diff_positive > 0 - if all_increasing: - order = Order.INCREASING - elif any_increasing: - order = Order.UNORDERED - else: - order = Order.DECREASING - return order - - -def wraparound_indices(x, dim_len): - loc_neg = x < 0 - if np.any(loc_neg): - x[loc_neg] = x[loc_neg] + dim_len - - -def boundscheck_indices(x, dim_len): - if np.any(x < 0) or np.any(x >= dim_len): - raise BoundsCheckError(dim_len) - - -class IntArrayDimIndexer: - """Integer array selection against a single dimension.""" + dim_indexers: List[_SliceDimIndexer] + shape: ChunkCoords def __init__( self, - dim_sel, - dim_len, - dim_chunk_len, - wraparound=True, - boundscheck=True, - order=Order.UNKNOWN, + selection: Selection, + shape: Tuple[int, ...], + chunk_shape: Tuple[int, ...], ): - # ensure 1d array - dim_sel = np.asanyarray(dim_sel) - if not is_integer_array(dim_sel, 1): - raise IndexError( - "integer arrays in an orthogonal selection must be " "1-dimensional only" - ) - - # handle wraparound - if wraparound: - wraparound_indices(dim_sel, dim_len) - - # handle out of bounds - if boundscheck: - boundscheck_indices(dim_sel, dim_len) - - # store attributes - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) - self.nitems = len(dim_sel) - - # determine which chunk is needed for each selection item - # note: for dense integer selections, the division operation here is the - # bottleneck - dim_sel_chunk = dim_sel // dim_chunk_len - - # determine order of indices - if order == Order.UNKNOWN: - order = Order.check(dim_sel) - self.order = order - - if self.order == Order.INCREASING: - self.dim_sel = dim_sel - self.dim_out_sel = None - elif self.order == Order.DECREASING: - self.dim_sel = dim_sel[::-1] - # TODO should be possible to do this without creating an arange - self.dim_out_sel = np.arange(self.nitems - 1, -1, -1) - else: - # sort indices to group by chunk - self.dim_out_sel = np.argsort(dim_sel_chunk) - self.dim_sel = np.take(dim_sel, self.dim_out_sel) - - # precompute number of selected items for each chunk - self.chunk_nitems = np.bincount(dim_sel_chunk, minlength=self.nchunks) - - # find chunks that we need to visit - self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] - - # compute offsets into the output array - self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) - - def __iter__(self): - for dim_chunk_ix in self.dim_chunk_ixs: - # find region in output - if dim_chunk_ix == 0: - start = 0 - else: - start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] - stop = self.chunk_nitems_cumsum[dim_chunk_ix] - if self.order == Order.INCREASING: - dim_out_sel = slice(start, stop) - else: - dim_out_sel = self.dim_out_sel[start:stop] - - # find region in chunk - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_chunk_sel = self.dim_sel[start:stop] - dim_offset - - yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -def slice_to_range(s: slice, l: int): # noqa: E741 - return range(*s.indices(l)) - - -def ix_(selection, shape): - """Convert an orthogonal selection to a numpy advanced (fancy) selection, like numpy.ix_ - but with support for slices and single ints.""" - - # normalisation - selection = replace_ellipsis(selection, shape) - - # replace slice and int as these are not supported by numpy.ix_ - selection = [ - slice_to_range(dim_sel, dim_len) - if isinstance(dim_sel, slice) - else [dim_sel] - if is_integer(dim_sel) - else dim_sel - for dim_sel, dim_len in zip(selection, shape) - ] - - # now get numpy to convert to a coordinate selection - selection = np.ix_(*selection) - - return selection - - -def oindex(a, selection): - """Implementation of orthogonal indexing with slices and ints.""" - selection = replace_ellipsis(selection, a.shape) - drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) - selection = ix_(selection, a.shape) - result = a[selection] - if drop_axes: - result = result.squeeze(axis=drop_axes) - return result - - -def oindex_set(a, selection, value): - selection = replace_ellipsis(selection, a.shape) - drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) - selection = ix_(selection, a.shape) - if not np.isscalar(value) and drop_axes: - value = np.asanyarray(value) - value_selection = [slice(None)] * len(a.shape) - for i in drop_axes: - value_selection[i] = np.newaxis - value_selection = tuple(value_selection) - value = value[value_selection] - a[selection] = value - - -# noinspection PyProtectedMember -class OrthogonalIndexer: - def __init__(self, selection, array): - # handle ellipsis - selection = replace_ellipsis(selection, array._shape) - - # normalize list to array - selection = replace_lists(selection) - # setup per-dimension indexers - dim_indexers = [] - for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): - if is_integer(dim_sel): - dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif isinstance(dim_sel, slice): - dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif is_integer_array(dim_sel): - dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) - - elif is_bool_array(dim_sel): - dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) - - else: - raise IndexError( - "unsupported selection item for orthogonal indexing; " - "expected integer, slice, integer array or Boolean " - "array, got {!r}".format(type(dim_sel)) - ) - - dim_indexers.append(dim_indexer) - - self.array = array - self.dim_indexers = dim_indexers - self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) - self.is_advanced = not is_basic_selection(selection) - if self.is_advanced: - self.drop_axes = tuple( - i - for i, dim_indexer in enumerate(self.dim_indexers) - if isinstance(dim_indexer, IntDimIndexer) - ) - else: - self.drop_axes = None - - def __iter__(self): - for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) - chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple( - p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + self.dim_indexers = [ + _SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) + for dim_sel, dim_len, dim_chunk_len in zip( + _ensure_selection(selection, shape), shape, chunk_shape ) - - # handle advanced indexing arrays orthogonally - if self.is_advanced: - # N.B., numpy doesn't support orthogonal indexing directly as yet, - # so need to work around via np.ix_. Also np.ix_ does not support a - # mixture of arrays and slices or integers, so need to convert slices - # and integers into ranges. - chunk_selection = ix_(chunk_selection, self.array._chunks) - - # special case for non-monotonic indices - if not is_basic_selection(out_selection): - out_selection = ix_(out_selection, self.shape) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -class OIndex: - def __init__(self, array): - self.array = array - - def __getitem__(self, selection): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.get_orthogonal_selection(selection, fields=fields) - - def __setitem__(self, selection, value): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.set_orthogonal_selection(selection, value, fields=fields) - - -# noinspection PyProtectedMember -class BlockIndexer: - def __init__(self, selection, array): - # handle ellipsis - selection = replace_ellipsis(selection, array._shape) - - # normalize list to array - selection = replace_lists(selection) - - # setup per-dimension indexers - dim_indexers = [] - for dim_sel, dim_len, dim_chunk_size in zip(selection, array._shape, array._chunks): - dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) - - if is_integer(dim_sel): - if dim_sel < 0: - dim_sel = dim_numchunks + dim_sel - - start = dim_sel * dim_chunk_size - stop = start + dim_chunk_size - slice_ = slice(start, stop) - - elif is_slice(dim_sel): - start = dim_sel.start if dim_sel.start is not None else 0 - stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks - - if dim_sel.step not in {1, None}: - raise IndexError( - "unsupported selection item for block indexing; " - "expected integer or slice with step=1, got {!r}".format(type(dim_sel)) - ) - - # Can't reuse wraparound_indices because it expects a numpy array - # We have integers here. - if start < 0: - start = dim_numchunks + start - if stop < 0: - stop = dim_numchunks + stop - - start = start * dim_chunk_size - stop = stop * dim_chunk_size - slice_ = slice(start, stop) - - else: - raise IndexError( - "unsupported selection item for block indexing; " - "expected integer or slice, got {!r}".format(type(dim_sel)) - ) - - dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) - dim_indexers.append(dim_indexer) - - if start >= dim_len or start < 0: - raise BoundsCheckError(dim_len) - - self.dim_indexers = dim_indexers + ] self.shape = tuple(s.nitems for s in self.dim_indexers) - self.drop_axes = None - def __iter__(self): + def __iter__(self) -> Iterator[_ChunkProjection]: for dim_projections in itertools.product(*self.dim_indexers): chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) @@ -738,343 +150,59 @@ def __iter__(self): p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None ) - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -class BlockIndex: - def __init__(self, array): - self.array = array - - def __getitem__(self, selection): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.get_block_selection(selection, fields=fields) + yield _ChunkProjection(chunk_coords, chunk_selection, out_selection) - def __setitem__(self, selection, value): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - return self.array.set_block_selection(selection, value, fields=fields) +def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: + def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: + # Inspired by compressed morton code as implemented in Neuroglancer + # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code + bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) + max_coords_bits = max(*bits) + input_bit = 0 + input_value = z + out = [0 for _ in range(len(chunk_shape))] -# noinspection PyProtectedMember -def is_coordinate_selection(selection, array): - return (len(selection) == len(array._shape)) and all( - is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection - ) - - -# noinspection PyProtectedMember -def is_mask_selection(selection, array): - return ( - len(selection) == 1 and is_bool_array(selection[0]) and selection[0].shape == array._shape - ) - - -# noinspection PyProtectedMember -class CoordinateIndexer: - def __init__(self, selection, array): - # some initial normalization - selection = ensure_tuple(selection) - selection = tuple([i] if is_integer(i) else i for i in selection) - selection = replace_lists(selection) - - # validation - if not is_coordinate_selection(selection, array): - raise IndexError( - "invalid coordinate selection; expected one integer " - "(coordinate) array per dimension of the target array, " - "got {!r}".format(selection) - ) - - # handle wraparound, boundscheck - for dim_sel, dim_len in zip(selection, array.shape): - # handle wraparound - wraparound_indices(dim_sel, dim_len) - - # handle out of bounds - boundscheck_indices(dim_sel, dim_len) - - # compute chunk index for each point in the selection - chunks_multi_index = tuple( - dim_sel // dim_chunk_len for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) - ) + for coord_bit in range(max_coords_bits): + for dim in range(len(chunk_shape)): + if coord_bit < bits[dim]: + bit = (input_value >> input_bit) & 1 + out[dim] |= bit << coord_bit + input_bit += 1 + return tuple(out) - # broadcast selection - this will raise error if array dimensions don't match - selection = np.broadcast_arrays(*selection) - chunks_multi_index = np.broadcast_arrays(*chunks_multi_index) + for i in range(product(chunk_shape)): + yield decode_morton(i, chunk_shape) - # remember shape of selection, because we will flatten indices for processing - self.sel_shape = selection[0].shape if selection[0].shape else (1,) - # flatten selection - selection = [dim_sel.reshape(-1) for dim_sel in selection] - chunks_multi_index = [dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index] - - # ravel chunk indices - chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, dims=array._cdata_shape) - - # group points by chunk - if np.any(np.diff(chunks_raveled_indices) < 0): - # optimisation, only sort if needed - sel_sort = np.argsort(chunks_raveled_indices) - selection = tuple(dim_sel[sel_sort] for dim_sel in selection) - else: - sel_sort = None - - # store attributes - self.selection = selection - self.sel_sort = sel_sort - self.shape = selection[0].shape if selection[0].shape else (1,) - self.drop_axes = None - self.array = array - - # precompute number of selected items for each chunk - self.chunk_nitems = np.bincount(chunks_raveled_indices, minlength=array.nchunks) - self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) - # locate the chunks we need to process - self.chunk_rixs = np.nonzero(self.chunk_nitems)[0] - - # unravel chunk indices - self.chunk_mixs = np.unravel_index(self.chunk_rixs, array._cdata_shape) - - def __iter__(self): - # iterate over chunks - for i, chunk_rix in enumerate(self.chunk_rixs): - chunk_coords = tuple(m[i] for m in self.chunk_mixs) - if chunk_rix == 0: - start = 0 - else: - start = self.chunk_nitems_cumsum[chunk_rix - 1] - stop = self.chunk_nitems_cumsum[chunk_rix] - if self.sel_sort is None: - out_selection = slice(start, stop) - else: - out_selection = self.sel_sort[start:stop] - - chunk_offsets = tuple( - dim_chunk_ix * dim_chunk_len - for dim_chunk_ix, dim_chunk_len in zip(chunk_coords, self.array._chunks) - ) - chunk_selection = tuple( - dim_sel[start:stop] - dim_chunk_offset - for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets) - ) - - yield ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -# noinspection PyProtectedMember -class MaskIndexer(CoordinateIndexer): - def __init__(self, selection, array): - # some initial normalization - selection = ensure_tuple(selection) - selection = replace_lists(selection) - - # validation - if not is_mask_selection(selection, array): - raise IndexError( - "invalid mask selection; expected one Boolean (mask)" - "array with the same shape as the target array, got {!r}".format(selection) - ) +def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]: + return itertools.product(*(range(x) for x in chunks_per_shard)) - # convert to indices - selection = np.nonzero(selection[0]) - # delegate the rest to superclass - super().__init__(selection, array) +def is_total_slice(item: Selection, shape: ChunkCoords): + """Determine whether `item` specifies a complete slice of array with the + given `shape`. Used to optimize __setitem__ operations on the Chunk + class.""" - -class VIndex: - def __init__(self, array): - self.array = array - - def __getitem__(self, selection): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - if is_coordinate_selection(selection, self.array): - return self.array.get_coordinate_selection(selection, fields=fields) - elif is_mask_selection(selection, self.array): - return self.array.get_mask_selection(selection, fields=fields) - else: - raise VindexInvalidSelectionError(selection) - - def __setitem__(self, selection, value): - fields, selection = pop_fields(selection) - selection = ensure_tuple(selection) - selection = replace_lists(selection) - if is_coordinate_selection(selection, self.array): - self.array.set_coordinate_selection(selection, value, fields=fields) - elif is_mask_selection(selection, self.array): - self.array.set_mask_selection(selection, value, fields=fields) - else: - raise VindexInvalidSelectionError(selection) - - -def check_fields(fields, dtype): - # early out - if fields is None: - return dtype - # check type - if not isinstance(fields, (str, list, tuple)): - raise IndexError( - "'fields' argument must be a string or list of strings; found " "{!r}".format( - type(fields) + # N.B., assume shape is normalized + if item == slice(None): + return True + if isinstance(item, slice): + item = (item,) + if isinstance(item, tuple): + return all( + ( + isinstance(dim_sel, slice) + and ( + (dim_sel == slice(None)) + or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None])) + ) ) + for dim_sel, dim_len in zip(item, shape) ) - if fields: - if dtype.names is None: - raise IndexError("invalid 'fields' argument, array does not have any fields") - try: - if isinstance(fields, str): - # single field selection - out_dtype = dtype[fields] - else: - # multiple field selection - out_dtype = np.dtype([(f, dtype[f]) for f in fields]) - except KeyError as e: - raise IndexError("invalid 'fields' argument, field not found: {!r}".format(e)) - else: - return out_dtype else: - return dtype - - -def check_no_multi_fields(fields): - if isinstance(fields, list): - if len(fields) == 1: - return fields[0] - elif len(fields) > 1: - raise IndexError("multiple fields are not supported for this operation") - return fields - - -def pop_fields(selection): - if isinstance(selection, str): - # single field selection - fields = selection - selection = () - elif not isinstance(selection, tuple): - # single selection item, no fields - fields = None - # leave selection as-is - else: - # multiple items, split fields from selection items - fields = [f for f in selection if isinstance(f, str)] - fields = fields[0] if len(fields) == 1 else fields - selection = tuple(s for s in selection if not isinstance(s, str)) - selection = selection[0] if len(selection) == 1 else selection - return fields, selection - - -def make_slice_selection(selection): - ls = [] - for dim_selection in selection: - if is_integer(dim_selection): - ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) - elif isinstance(dim_selection, np.ndarray): - if len(dim_selection) == 1: - ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) - else: - raise ArrayIndexError() - else: - ls.append(dim_selection) - return ls + raise TypeError("expected slice or tuple of slices, found %r" % item) -class PartialChunkIterator: - """Iterator to retrieve the specific coordinates of requested data - from within a compressed chunk. - - Parameters - ---------- - selection : tuple - tuple of slice objects to take from the chunk - arr_shape : shape of chunk to select data from - - Attributes - ----------- - arr_shape - selection - - Returns - ------- - Tuple with 3 elements: - - start: int - elements offset in the chunk to read from - nitems: int - number of elements to read in the chunk from start - partial_out_selection: list of slices - indices of a temporary empty array of size `Array._chunks` to assign - the decompressed data to after the partial read. - - Notes - ----- - An array is flattened when compressed with blosc, so this iterator takes - the wanted selection of an array and determines the wanted coordinates - of the flattened, compressed data to be read and then decompressed. The - decompressed data is then placed in a temporary empty array of size - `Array._chunks` at the indices yielded as partial_out_selection. - Once all the slices yielded by this iterator have been read, decompressed - and written to the temporary array, the wanted slice of the chunk can be - indexed from the temporary array and written to the out_selection slice - of the out array. - - """ - - def __init__(self, selection, arr_shape): - selection = make_slice_selection(selection) - self.arr_shape = arr_shape - - # number of selection dimensions can't be greater than the number of chunk dimensions - if len(selection) > len(self.arr_shape): - raise ValueError( - "Selection has more dimensions then the array:\n" - f"selection dimensions = {len(selection)}\n" - f"array dimensions = {len(self.arr_shape)}" - ) - - # any selection can not be out of the range of the chunk - selection_shape = np.empty(self.arr_shape)[tuple(selection)].shape - if any( - selection_dim < 0 or selection_dim > arr_dim - for selection_dim, arr_dim in zip(selection_shape, self.arr_shape) - ): - raise IndexError( - "a selection index is out of range for the dimension" - ) # pragma: no cover - - for i, dim_size in enumerate(self.arr_shape[::-1]): - index = len(self.arr_shape) - (i + 1) - if index <= len(selection) - 1: - slice_size = selection_shape[index] - if slice_size == dim_size and index > 0: - selection.pop() - else: - break - - chunk_loc_slices = [] - last_dim_slice = None if selection[-1].step > 1 else selection.pop() - for arr_shape_i, sl in zip(arr_shape, selection): - dim_chunk_loc_slices = [] - assert isinstance(sl, slice) - for x in slice_to_range(sl, arr_shape_i): - dim_chunk_loc_slices.append(slice(x, x + 1, 1)) - chunk_loc_slices.append(dim_chunk_loc_slices) - if last_dim_slice: - chunk_loc_slices.append([last_dim_slice]) - self.chunk_loc_slices = list(itertools.product(*chunk_loc_slices)) - - def __iter__(self): - chunk1 = self.chunk_loc_slices[0] - nitems = (chunk1[-1].stop - chunk1[-1].start) * np.prod( - self.arr_shape[len(chunk1) :], dtype=int - ) - for partial_out_selection in self.chunk_loc_slices: - start = 0 - for i, sl in enumerate(partial_out_selection): - start += sl.start * np.prod(self.arr_shape[i + 1 :], dtype=int) - yield start, nitems, partial_out_selection +def all_chunk_coords(shape: ChunkCoords, chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: + return itertools.product(*(range(0, _ceildiv(s, c)) for s, c in zip(shape, chunk_shape))) diff --git a/src/zarr/v3/metadata.py b/src/zarr/metadata.py similarity index 96% rename from src/zarr/v3/metadata.py rename to src/zarr/metadata.py index 573b8484f0..8eba9a0b5a 100644 --- a/src/zarr/v3/metadata.py +++ b/src/zarr/metadata.py @@ -6,19 +6,19 @@ import numpy as np import numpy.typing as npt -from zarr.v3.chunk_grids import ChunkGrid, RegularChunkGrid -from zarr.v3.chunk_key_encodings import ChunkKeyEncoding, parse_separator +from zarr.chunk_grids import ChunkGrid, RegularChunkGrid +from zarr.chunk_key_encodings import ChunkKeyEncoding, parse_separator if TYPE_CHECKING: from typing import Literal, Union, List, Optional, Tuple - from zarr.v3.codecs.pipeline import CodecPipeline + from zarr.codecs.pipeline import CodecPipeline -from zarr.v3.abc.codec import Codec -from zarr.v3.abc.metadata import Metadata +from zarr.abc.codec import Codec +from zarr.abc.metadata import Metadata -from zarr.v3.common import ( +from zarr.common import ( JSON, ArraySpec, ChunkCoords, @@ -26,7 +26,7 @@ parse_fill_value, parse_shapelike, ) -from zarr.v3.config import RuntimeConfiguration, parse_indexing_order +from zarr.config import RuntimeConfiguration, parse_indexing_order def runtime_configuration( @@ -370,7 +370,7 @@ def parse_v2_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: def parse_codecs(data: Iterable[Union[Codec, JSON]]) -> CodecPipeline: - from zarr.v3.codecs.pipeline import CodecPipeline + from zarr.codecs.pipeline import CodecPipeline if not isinstance(data, Iterable): raise TypeError(f"Expected iterable, got {type(data)}") diff --git a/src/zarr/store/__init__.py b/src/zarr/store/__init__.py new file mode 100644 index 0000000000..b1c3a5f720 --- /dev/null +++ b/src/zarr/store/__init__.py @@ -0,0 +1,5 @@ +# flake8: noqa +from zarr.store.core import StorePath, StoreLike, make_store_path +from zarr.store.remote import RemoteStore +from zarr.store.local import LocalStore +from zarr.store.memory import MemoryStore diff --git a/src/zarr/v3/store/core.py b/src/zarr/store/core.py similarity index 94% rename from src/zarr/v3/store/core.py rename to src/zarr/store/core.py index 16714d9e30..29506aa619 100644 --- a/src/zarr/v3/store/core.py +++ b/src/zarr/store/core.py @@ -3,9 +3,9 @@ from pathlib import Path from typing import Any, Optional, Tuple, Union -from zarr.v3.common import BytesLike -from zarr.v3.abc.store import Store -from zarr.v3.store.local import LocalStore +from zarr.common import BytesLike +from zarr.abc.store import Store +from zarr.store.local import LocalStore def _dereference_path(root: str, path: str) -> str: diff --git a/src/zarr/v3/store/local.py b/src/zarr/store/local.py similarity index 98% rename from src/zarr/v3/store/local.py rename to src/zarr/store/local.py index 8f02b904c0..1e9e880875 100644 --- a/src/zarr/v3/store/local.py +++ b/src/zarr/store/local.py @@ -5,8 +5,8 @@ from pathlib import Path from typing import Union, Optional, List, Tuple -from zarr.v3.abc.store import Store -from zarr.v3.common import BytesLike, concurrent_map, to_thread +from zarr.abc.store import Store +from zarr.common import BytesLike, concurrent_map, to_thread def _get(path: Path, byte_range: Optional[Tuple[int, Optional[int]]] = None) -> bytes: diff --git a/src/zarr/v3/store/memory.py b/src/zarr/store/memory.py similarity index 97% rename from src/zarr/v3/store/memory.py rename to src/zarr/store/memory.py index afacfa4321..9661b6cea7 100644 --- a/src/zarr/v3/store/memory.py +++ b/src/zarr/store/memory.py @@ -2,8 +2,8 @@ from typing import Optional, MutableMapping, List, Tuple -from zarr.v3.common import BytesLike -from zarr.v3.abc.store import Store +from zarr.common import BytesLike +from zarr.abc.store import Store # TODO: this store could easily be extended to wrap any MutuableMapping store from v2 diff --git a/src/zarr/v3/store/remote.py b/src/zarr/store/remote.py similarity index 96% rename from src/zarr/v3/store/remote.py rename to src/zarr/store/remote.py index 0e6fc84e08..c42cf3f56d 100644 --- a/src/zarr/v3/store/remote.py +++ b/src/zarr/store/remote.py @@ -2,9 +2,9 @@ from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union -from zarr.v3.abc.store import Store -from zarr.v3.store.core import _dereference_path -from zarr.v3.common import BytesLike +from zarr.abc.store import Store +from zarr.store.core import _dereference_path +from zarr.common import BytesLike if TYPE_CHECKING: diff --git a/src/zarr/sync.py b/src/zarr/sync.py index 49684a51ee..d9665b4c58 100644 --- a/src/zarr/sync.py +++ b/src/zarr/sync.py @@ -1,48 +1,120 @@ -import os -from collections import defaultdict -from threading import Lock +from __future__ import annotations -import fasteners +import asyncio +import threading +from typing import ( + Any, + AsyncIterator, + Coroutine, + List, + Optional, + TypeVar, +) +from typing_extensions import ParamSpec +from zarr.config import SyncConfiguration -class ThreadSynchronizer: - """Provides synchronization using thread locks.""" - def __init__(self): - self.mutex = Lock() - self.locks = defaultdict(Lock) +# From https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py - def __getitem__(self, item): - with self.mutex: - return self.locks[item] +iothread: List[Optional[threading.Thread]] = [None] # dedicated IO thread +loop: List[Optional[asyncio.AbstractEventLoop]] = [ + None +] # global event loop for any non-async instance +_lock: Optional[threading.Lock] = None # global lock placeholder +get_running_loop = asyncio.get_running_loop - def __getstate__(self): - return True - def __setstate__(self, *args): - # reinitialize from scratch - self.__init__() +def _get_lock() -> threading.Lock: + """Allocate or return a threading lock. + + The lock is allocated on first use to allow setting one lock per forked process. + """ + global _lock + if not _lock: + _lock = threading.Lock() + return _lock -class ProcessSynchronizer: - """Provides synchronization using file locks via the - `fasteners `_ - package. +async def _runner(event: threading.Event, coro: Coroutine, result_box: List[Optional[Any]]): + try: + result_box[0] = await coro + except Exception as ex: + result_box[0] = ex + finally: + event.set() - Parameters - ---------- - path : string - Path to a directory on a file system that is shared by all processes. - N.B., this should be a *different* path to where you store the array. +def sync(coro: Coroutine, loop: Optional[asyncio.AbstractEventLoop] = None): + """ + Make loop run coroutine until it returns. Runs in other thread + + Examples + -------- + >>> sync(async_function(), existing_loop) """ + if loop is None: + # NB: if the loop is not running *yet*, it is OK to submit work + # and we will wait for it + loop = _get_loop() + if loop is None or loop.is_closed(): + raise RuntimeError("Loop is not running") + try: + loop0 = asyncio.events.get_running_loop() + if loop0 is loop: + raise NotImplementedError("Calling sync() from within a running loop") + except RuntimeError: + pass + result_box: List[Optional[Any]] = [None] + event = threading.Event() + asyncio.run_coroutine_threadsafe(_runner(event, coro, result_box), loop) + while True: + # this loops allows thread to get interrupted + if event.wait(1): + break + + return_result = result_box[0] + if isinstance(return_result, BaseException): + raise return_result + else: + return return_result + + +def _get_loop(): + """Create or return the default fsspec IO loop + + The loop will be running on a separate thread. + """ + if loop[0] is None: + with _get_lock(): + # repeat the check just in case the loop got filled between the + # previous two calls from another thread + if loop[0] is None: + new_loop = asyncio.new_event_loop() + loop[0] = new_loop + th = threading.Thread(target=new_loop.run_forever, name="zarrIO") + th.daemon = True + th.start() + iothread[0] = th + return loop[0] + + +P = ParamSpec("P") +T = TypeVar("T") + + +class SyncMixin: + _sync_configuration: SyncConfiguration - def __init__(self, path): - self.path = path + def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: + # TODO: refactor this to to take *args and **kwargs and pass those to the method + # this should allow us to better type the sync wrapper + return sync(coroutine, loop=self._sync_configuration.asyncio_loop) - def __getitem__(self, item): - path = os.path.join(self.path, item) - lock = fasteners.InterProcessLock(path) - return lock + def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: + async def iter_to_list() -> List[T]: + # TODO: replace with generators so we don't materialize the entire iterator at once + async_iterator = await coroutine + return [item async for item in async_iterator] - # pickling and unpickling should be handled automatically + return self._sync(iter_to_list()) diff --git a/src/zarr/v2/__init__.py b/src/zarr/v2/__init__.py new file mode 100644 index 0000000000..27c7595580 --- /dev/null +++ b/src/zarr/v2/__init__.py @@ -0,0 +1,54 @@ +# flake8: noqa +from zarr.v2.codecs import * +from zarr.v2.convenience import ( + consolidate_metadata, + copy, + copy_all, + copy_store, + load, + open, + open_consolidated, + save, + save_array, + save_group, + tree, +) +from zarr.v2.core import Array +from zarr.v2.creation import ( + array, + create, + empty, + empty_like, + full, + full_like, + ones, + ones_like, + open_array, + open_like, + zeros, + zeros_like, +) +from zarr.v2.errors import CopyError, MetadataError +from zarr.v2.hierarchy import Group, group, open_group +from zarr.v2.n5 import N5Store, N5FSStore +from zarr.v2.storage import ( + ABSStore, + DBMStore, + DictStore, + DirectoryStore, + KVStore, + LMDBStore, + LRUStoreCache, + MemoryStore, + MongoDBStore, + NestedDirectoryStore, + RedisStore, + SQLiteStore, + TempStore, + ZipStore, +) +from zarr.v2.sync import ProcessSynchronizer, ThreadSynchronizer +from zarr._version import version as __version__ + +# in case setuptools scm screw up and find version to be 0.0.0 +assert not __version__.startswith("0.0.0") diff --git a/src/zarr/v3/abc/__init__.py b/src/zarr/v2/_storage/__init__.py similarity index 100% rename from src/zarr/v3/abc/__init__.py rename to src/zarr/v2/_storage/__init__.py diff --git a/src/zarr/_storage/absstore.py b/src/zarr/v2/_storage/absstore.py similarity index 98% rename from src/zarr/_storage/absstore.py rename to src/zarr/v2/_storage/absstore.py index d8e292535c..ee03d44bd4 100644 --- a/src/zarr/_storage/absstore.py +++ b/src/zarr/v2/_storage/absstore.py @@ -2,8 +2,8 @@ import warnings from numcodecs.compat import ensure_bytes -from zarr.util import normalize_storage_path -from zarr._storage.store import Store +from zarr.v2.util import normalize_storage_path +from zarr.v2._storage.store import Store __doctest_requires__ = { ("ABSStore", "ABSStore.*"): ["azure.storage.blob"], diff --git a/src/zarr/_storage/store.py b/src/zarr/v2/_storage/store.py similarity index 96% rename from src/zarr/_storage/store.py rename to src/zarr/v2/_storage/store.py index 6e13b08cc7..ec1dbf0565 100644 --- a/src/zarr/_storage/store.py +++ b/src/zarr/v2/_storage/store.py @@ -1,9 +1,9 @@ from collections.abc import MutableMapping from typing import Any, List, Mapping, Optional, Sequence, Union -from zarr.meta import Metadata2 -from zarr.util import normalize_storage_path -from zarr.context import Context +from zarr.v2.meta import Metadata2 +from zarr.v2.util import normalize_storage_path +from zarr.v2.context import Context # v2 store keys @@ -83,7 +83,7 @@ def _ensure_store(store: Any): We'll do this conversion in a few places automatically """ - from zarr.storage import KVStore # avoid circular import + from zarr.v2.storage import KVStore # avoid circular import if isinstance(store, BaseStore): if not store._store_version == 2: @@ -111,7 +111,7 @@ def _ensure_store(store: Any): raise ValueError( "Starting with Zarr 2.11.0, stores must be subclasses of " "BaseStore, if your store exposes the MutableMapping interface " - f"wrap it in Zarr.storage.KVStore. Got {store}" + f"wrap it in zarr.v2.storage.KVStore. Got {store}" ) def getitems( diff --git a/src/zarr/attrs.py b/src/zarr/v2/attrs.py similarity index 98% rename from src/zarr/attrs.py rename to src/zarr/v2/attrs.py index 89cfefc22e..af23d43b9e 100644 --- a/src/zarr/attrs.py +++ b/src/zarr/v2/attrs.py @@ -2,8 +2,8 @@ import warnings from collections.abc import MutableMapping -from zarr._storage.store import Store -from zarr.util import json_dumps +from zarr.v2._storage.store import Store +from zarr.v2.util import json_dumps class Attributes(MutableMapping[str, Any]): diff --git a/src/zarr/codecs.py b/src/zarr/v2/codecs.py similarity index 100% rename from src/zarr/codecs.py rename to src/zarr/v2/codecs.py diff --git a/src/zarr/context.py b/src/zarr/v2/context.py similarity index 100% rename from src/zarr/context.py rename to src/zarr/v2/context.py diff --git a/src/zarr/convenience.py b/src/zarr/v2/convenience.py similarity index 91% rename from src/zarr/convenience.py rename to src/zarr/v2/convenience.py index e4bbade527..6355a11af9 100644 --- a/src/zarr/convenience.py +++ b/src/zarr/v2/convenience.py @@ -4,22 +4,22 @@ import os import re from collections.abc import Mapping, MutableMapping -from zarr.core import Array -from zarr.creation import array as _create_array -from zarr.creation import open_array -from zarr.errors import CopyError, PathNotFoundError -from zarr.hierarchy import Group -from zarr.hierarchy import group as _create_group -from zarr.hierarchy import open_group -from zarr.meta import json_dumps, json_loads -from zarr.storage import ( +from zarr.v2.core import Array +from zarr.v2.creation import array as _create_array +from zarr.v2.creation import open_array +from zarr.v2.errors import CopyError, PathNotFoundError +from zarr.v2.hierarchy import Group +from zarr.v2.hierarchy import group as _create_group +from zarr.v2.hierarchy import open_group +from zarr.v2.meta import json_dumps, json_loads +from zarr.v2.storage import ( contains_array, contains_group, normalize_store_arg, BaseStore, ConsolidatedMetadataStore, ) -from zarr.util import TreeViewer, buffer_size, normalize_storage_path +from zarr.v2.util import TreeViewer, buffer_size, normalize_storage_path from typing import Any, Union @@ -50,17 +50,17 @@ def open(store: StoreLike = None, mode: str = "a", *, path=None, **kwargs): path : str or None, optional The path within the store to open. **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. + Additional parameters are passed through to :func:`zarr.v2.creation.open_array` or + :func:`zarr.v2.hierarchy.open_group`. Returns ------- - z : :class:`zarr.core.Array` or :class:`zarr.hierarchy.Group` + z : :class:`zarr.v2.core.Array` or :class:`zarr.v2.hierarchy.Group` Array or group, depending on what exists in the given store. See Also -------- - zarr.creation.open_array, zarr.hierarchy.open_group + zarr.v2.creation.open_array, zarr.v2.hierarchy.open_group Examples -------- @@ -69,24 +69,24 @@ def open(store: StoreLike = None, mode: str = "a", *, path=None, **kwargs): >>> import zarr >>> store = 'data/example.zarr' - >>> zw = zarr.open(store, mode='w', shape=100, dtype='i4') # open new array + >>> zw = zarr.v2.open(store, mode='w', shape=100, dtype='i4') # open new array >>> zw - - >>> za = zarr.open(store, mode='a') # open existing array for reading and writing + + >>> za = zarr.v2.open(store, mode='a') # open existing array for reading and writing >>> za - - >>> zr = zarr.open(store, mode='r') # open existing array read-only + + >>> zr = zarr.v2.open(store, mode='r') # open existing array read-only >>> zr - - >>> gw = zarr.open(store, mode='w') # open new group, overwriting previous data + + >>> gw = zarr.v2.open(store, mode='w') # open new group, overwriting previous data >>> gw - - >>> ga = zarr.open(store, mode='a') # open existing group for reading and writing + + >>> ga = zarr.v2.open(store, mode='a') # open existing group for reading and writing >>> ga - - >>> gr = zarr.open(store, mode='r') # open existing group read-only + + >>> gr = zarr.v2.open(store, mode='r') # open existing group read-only >>> gr - + """ @@ -147,14 +147,14 @@ def save_array(store: StoreLike, arr, *, path=None, **kwargs): >>> import zarr >>> import numpy as np >>> arr = np.arange(10000) - >>> zarr.save_array('data/example.zarr', arr) - >>> zarr.load('data/example.zarr') + >>> zarr.v2.save_array('data/example.zarr', arr) + >>> zarr.v2.load('data/example.zarr') array([ 0, 1, 2, ..., 9997, 9998, 9999]) Save an array to a single file (uses a :class:`ZipStore`):: - >>> zarr.save_array('data/example.zip', arr) - >>> zarr.load('data/example.zip') + >>> zarr.v2.save_array('data/example.zip', arr) + >>> zarr.v2.load('data/example.zip') array([ 0, 1, 2, ..., 9997, 9998, 9999]) """ @@ -193,8 +193,8 @@ def save_group(store: StoreLike, *args, path=None, **kwargs): >>> import numpy as np >>> a1 = np.arange(10000) >>> a2 = np.arange(10000, 0, -1) - >>> zarr.save_group('data/example.zarr', a1, a2) - >>> loader = zarr.load('data/example.zarr') + >>> zarr.v2.save_group('data/example.zarr', a1, a2) + >>> loader = zarr.v2.load('data/example.zarr') >>> loader >>> loader['arr_0'] @@ -204,8 +204,8 @@ def save_group(store: StoreLike, *args, path=None, **kwargs): Save several arrays using named keyword arguments:: - >>> zarr.save_group('data/example.zarr', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zarr') + >>> zarr.v2.save_group('data/example.zarr', foo=a1, bar=a2) + >>> loader = zarr.v2.load('data/example.zarr') >>> loader >>> loader['foo'] @@ -215,8 +215,8 @@ def save_group(store: StoreLike, *args, path=None, **kwargs): Store several arrays in a single zip file (uses a :class:`ZipStore`):: - >>> zarr.save_group('data/example.zip', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zip') + >>> zarr.v2.save_group('data/example.zip', foo=a1, bar=a2) + >>> loader = zarr.v2.load('data/example.zip') >>> loader >>> loader['foo'] @@ -269,14 +269,14 @@ def save(store: StoreLike, *args, path=None, **kwargs): >>> import zarr >>> import numpy as np >>> arr = np.arange(10000) - >>> zarr.save('data/example.zarr', arr) - >>> zarr.load('data/example.zarr') + >>> zarr.v2.save('data/example.zarr', arr) + >>> zarr.v2.load('data/example.zarr') array([ 0, 1, 2, ..., 9997, 9998, 9999]) Save an array to a Zip file (uses a :class:`ZipStore`):: - >>> zarr.save('data/example.zip', arr) - >>> zarr.load('data/example.zip') + >>> zarr.v2.save('data/example.zip', arr) + >>> zarr.v2.load('data/example.zip') array([ 0, 1, 2, ..., 9997, 9998, 9999]) Save several arrays to a directory on the file system (uses a @@ -286,8 +286,8 @@ def save(store: StoreLike, *args, path=None, **kwargs): >>> import numpy as np >>> a1 = np.arange(10000) >>> a2 = np.arange(10000, 0, -1) - >>> zarr.save('data/example.zarr', a1, a2) - >>> loader = zarr.load('data/example.zarr') + >>> zarr.v2.save('data/example.zarr', a1, a2) + >>> loader = zarr.v2.load('data/example.zarr') >>> loader >>> loader['arr_0'] @@ -297,8 +297,8 @@ def save(store: StoreLike, *args, path=None, **kwargs): Save several arrays using named keyword arguments:: - >>> zarr.save('data/example.zarr', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zarr') + >>> zarr.v2.save('data/example.zarr', foo=a1, bar=a2) + >>> loader = zarr.v2.load('data/example.zarr') >>> loader >>> loader['foo'] @@ -308,8 +308,8 @@ def save(store: StoreLike, *args, path=None, **kwargs): Store several arrays in a single zip file (uses a :class:`ZipStore`):: - >>> zarr.save('data/example.zip', foo=a1, bar=a2) - >>> loader = zarr.load('data/example.zip') + >>> zarr.v2.save('data/example.zip', foo=a1, bar=a2) + >>> loader = zarr.v2.load('data/example.zip') >>> loader >>> loader['foo'] @@ -413,7 +413,7 @@ def tree(grp, expand=False, level=None): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> g4 = g3.create_group('baz') @@ -428,9 +428,9 @@ def tree(grp, expand=False, level=None): └── foo >>> import h5py >>> h5f = h5py.File('data/example.h5', mode='w') - >>> zarr.copy_all(g1, h5f) + >>> zarr.v2.copy_all(g1, h5f) (5, 0, 800) - >>> zarr.tree(h5f) + >>> zarr.v2.tree(h5f) / ├── bar │ ├── baz @@ -440,7 +440,7 @@ def tree(grp, expand=False, level=None): See Also -------- - zarr.hierarchy.Group.tree + zarr.v2.hierarchy.Group.tree Notes ----- @@ -564,8 +564,8 @@ def copy_store( -------- >>> import zarr - >>> store1 = zarr.DirectoryStore('data/example.zarr') - >>> root = zarr.group(store1, overwrite=True) + >>> store1 = zarr.v2.DirectoryStore('data/example.zarr') + >>> root = zarr.v2.group(store1, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.create_group('bar') >>> baz = bar.create_dataset('baz', shape=100, chunks=50, dtype='i8') @@ -577,8 +577,8 @@ def copy_store( └── bar └── baz (100,) int64 >>> from sys import stdout - >>> store2 = zarr.ZipStore('data/example.zip', mode='w') - >>> zarr.copy_store(store1, store2, log=stdout) + >>> store2 = zarr.v2.ZipStore('data/example.zip', mode='w') + >>> zarr.v2.copy_store(store1, store2, log=stdout) copy .zgroup copy foo/.zgroup copy foo/bar/.zgroup @@ -587,7 +587,7 @@ def copy_store( copy foo/bar/baz/1 all done: 6 copied, 0 skipped, 566 bytes copied (6, 0, 566) - >>> new_root = zarr.group(store2) + >>> new_root = zarr.v2.group(store2) >>> new_root.tree() / └── foo @@ -772,15 +772,15 @@ def copy( >>> foo = source.create_group('foo') >>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) >>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,)) - >>> zarr.tree(source) + >>> zarr.v2.tree(source) / ├── foo │ └── bar │ └── baz (100,) int64 └── spam (100,) int64 - >>> dest = zarr.group() + >>> dest = zarr.v2.group() >>> from sys import stdout - >>> zarr.copy(source['foo'], dest, log=stdout) + >>> zarr.v2.copy(source['foo'], dest, log=stdout) copy /foo copy /foo/bar copy /foo/bar/baz (100,) int64 @@ -797,29 +797,29 @@ def copy( the destination. Here are some examples of these options, also using ``dry_run=True`` to find out what would happen without actually copying anything:: - >>> source = zarr.group() - >>> dest = zarr.group() + >>> source = zarr.v2.group() + >>> dest = zarr.v2.group() >>> baz = source.create_dataset('foo/bar/baz', data=np.arange(100)) >>> spam = source.create_dataset('foo/spam', data=np.arange(1000)) >>> existing_spam = dest.create_dataset('foo/spam', data=np.arange(1000)) >>> from sys import stdout >>> try: - ... zarr.copy(source['foo'], dest, log=stdout, dry_run=True) - ... except zarr.CopyError as e: + ... zarr.v2.copy(source['foo'], dest, log=stdout, dry_run=True) + ... except zarr.v2.CopyError as e: ... print(e) ... copy /foo copy /foo/bar copy /foo/bar/baz (100,) int64 an object 'spam' already exists in destination '/foo' - >>> zarr.copy(source['foo'], dest, log=stdout, if_exists='replace', dry_run=True) + >>> zarr.v2.copy(source['foo'], dest, log=stdout, if_exists='replace', dry_run=True) copy /foo copy /foo/bar copy /foo/bar/baz (100,) int64 copy /foo/spam (1000,) int64 dry run: 4 copied, 0 skipped (4, 0, 0) - >>> zarr.copy(source['foo'], dest, log=stdout, if_exists='skip', dry_run=True) + >>> zarr.v2.copy(source['foo'], dest, log=stdout, if_exists='skip', dry_run=True) copy /foo copy /foo/bar copy /foo/bar/baz (100,) int64 @@ -1104,15 +1104,15 @@ def copy_all( >>> foo = source.create_group('foo') >>> baz = foo.create_dataset('bar/baz', data=np.arange(100), chunks=(50,)) >>> spam = source.create_dataset('spam', data=np.arange(100, 200), chunks=(30,)) - >>> zarr.tree(source) + >>> zarr.v2.tree(source) / ├── foo │ └── bar │ └── baz (100,) int64 └── spam (100,) int64 - >>> dest = zarr.group() + >>> dest = zarr.v2.group() >>> import sys - >>> zarr.copy_all(source, dest, log=sys.stdout) + >>> zarr.v2.copy_all(source, dest, log=sys.stdout) copy /foo copy /foo/bar copy /foo/bar/baz (100,) int64 @@ -1197,7 +1197,7 @@ def consolidate_metadata(store: BaseStore, metadata_key=".zmetadata", *, path="" Returns ------- - g : :class:`zarr.hierarchy.Group` + g : :class:`zarr.v2.hierarchy.Group` Group instance, opened with the new consolidated metadata. See Also @@ -1252,12 +1252,12 @@ def open_consolidated(store: StoreLike, metadata_key=".zmetadata", mode="r+", ** changes to metadata including creation of new arrays or group are not allowed. **kwargs - Additional parameters are passed through to :func:`zarr.creation.open_array` or - :func:`zarr.hierarchy.open_group`. + Additional parameters are passed through to :func:`zarr.v2.creation.open_array` or + :func:`zarr.v2.hierarchy.open_group`. Returns ------- - g : :class:`zarr.hierarchy.Group` + g : :class:`zarr.v2.hierarchy.Group` Group instance, opened with the consolidated metadata. See Also diff --git a/src/zarr/core.py b/src/zarr/v2/core.py similarity index 97% rename from src/zarr/core.py rename to src/zarr/v2/core.py index 06dcb32063..273d2857e8 100644 --- a/src/zarr/core.py +++ b/src/zarr/v2/core.py @@ -10,12 +10,12 @@ import numpy as np from numcodecs.compat import ensure_bytes -from zarr._storage.store import _prefix_to_attrs_key -from zarr.attrs import Attributes -from zarr.codecs import AsType, get_codec -from zarr.context import Context -from zarr.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError -from zarr.indexing import ( +from zarr.v2._storage.store import _prefix_to_attrs_key +from zarr.v2.attrs import Attributes +from zarr.v2.codecs import AsType, get_codec +from zarr.v2.context import Context +from zarr.v2.errors import ArrayNotFoundError, ReadOnlyError, ArrayIndexError +from zarr.v2.indexing import ( BasicIndexer, CoordinateIndexer, MaskIndexer, @@ -35,14 +35,14 @@ is_scalar, pop_fields, ) -from zarr.storage import ( +from zarr.v2.storage import ( _prefix_to_array_key, KVStore, getsize, listdir, normalize_store_arg, ) -from zarr.util import ( +from zarr.v2.util import ( ConstantMap, UncompressedPartialReadBufferV3, all_equal, @@ -535,7 +535,7 @@ def islice(self, start=None, end=None): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100)) + >>> z = zarr.v2.array(np.arange(100)) Iterate over part of the array: >>> for value in z.islice(25, 30): value; @@ -604,7 +604,7 @@ def __getitem__(self, selection): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100)) + >>> z = zarr.v2.array(np.arange(100)) Retrieve a single item:: @@ -631,7 +631,7 @@ def __getitem__(self, selection): Setup a 2-dimensional array:: - >>> z = zarr.array(np.arange(100).reshape(10, 10)) + >>> z = zarr.v2.array(np.arange(100).reshape(10, 10)) Retrieve an item:: @@ -688,7 +688,7 @@ def __getitem__(self, selection): ... (b'bbb', 2, 8.4), ... (b'ccc', 3, 12.6)], ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) + >>> z = zarr.v2.array(a) >>> z['foo'] array([b'aaa', b'bbb', b'ccc'], dtype='|S3') @@ -755,7 +755,7 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100)) + >>> z = zarr.v2.array(np.arange(100)) Retrieve a single item:: @@ -777,7 +777,7 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): Setup a 2-dimensional array:: - >>> z = zarr.array(np.arange(100).reshape(10, 10)) + >>> z = zarr.v2.array(np.arange(100).reshape(10, 10)) Retrieve an item:: @@ -820,7 +820,7 @@ def get_basic_selection(self, selection=Ellipsis, out=None, fields=None): ... (b'bbb', 2, 8.4), ... (b'ccc', 3, 12.6)], ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) + >>> z = zarr.v2.array(a) >>> z.get_basic_selection(slice(2), fields='foo') array([b'aaa', b'bbb'], dtype='|S3') @@ -926,7 +926,7 @@ def get_orthogonal_selection(self, selection, out=None, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10)) + >>> z = zarr.v2.array(np.arange(100).reshape(10, 10)) Retrieve rows and columns via any combination of int, slice, integer array and/or Boolean array:: @@ -1034,7 +1034,7 @@ def get_coordinate_selection(self, selection, out=None, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10)) + >>> z = zarr.v2.array(np.arange(100).reshape(10, 10)) Retrieve items by specifying their coordinates:: @@ -1115,7 +1115,7 @@ def get_block_selection(self, selection, out=None, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10), chunks=(3, 3)) + >>> z = zarr.v2.array(np.arange(100).reshape(10, 10), chunks=(3, 3)) Retrieve items by specifying their block coordinates:: @@ -1201,7 +1201,7 @@ def get_mask_selection(self, selection, out=None, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.array(np.arange(100).reshape(10, 10)) + >>> z = zarr.v2.array(np.arange(100).reshape(10, 10)) Retrieve items by specifying a mask:: @@ -1299,7 +1299,7 @@ def __setitem__(self, selection, value): Setup a 1-dimensional array:: >>> import zarr - >>> z = zarr.zeros(100, dtype=int) + >>> z = zarr.v2.zeros(100, dtype=int) Set all array elements to the same scalar value:: @@ -1316,7 +1316,7 @@ def __setitem__(self, selection, value): Setup a 2-dimensional array:: - >>> z = zarr.zeros((5, 5), dtype=int) + >>> z = zarr.v2.zeros((5, 5), dtype=int) Set all array elements to the same scalar value:: @@ -1339,7 +1339,7 @@ def __setitem__(self, selection, value): ... (b'bbb', 2, 8.4), ... (b'ccc', 3, 12.6)], ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) + >>> z = zarr.v2.array(a) >>> z['foo'] = b'zzz' >>> z[...] array([(b'zzz', 1, 4.2), (b'zzz', 2, 8.4), (b'zzz', 3, 12.6)], @@ -1401,7 +1401,7 @@ def set_basic_selection(self, selection, value, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.zeros(100, dtype=int) + >>> z = zarr.v2.zeros(100, dtype=int) Set all array elements to the same scalar value:: @@ -1418,7 +1418,7 @@ def set_basic_selection(self, selection, value, fields=None): Setup a 2-dimensional array:: - >>> z = zarr.zeros((5, 5), dtype=int) + >>> z = zarr.v2.zeros((5, 5), dtype=int) Set all array elements to the same scalar value:: @@ -1442,7 +1442,7 @@ def set_basic_selection(self, selection, value, fields=None): ... (b'bbb', 2, 8.4), ... (b'ccc', 3, 12.6)], ... dtype=[('foo', 'S3'), ('bar', 'i4'), ('baz', 'f8')]) - >>> z = zarr.array(a) + >>> z = zarr.v2.array(a) >>> z.set_basic_selection(slice(0, 2), b'zzz', fields='foo') >>> z[:] array([(b'zzz', 1, 4.2), (b'zzz', 2, 8.4), (b'ccc', 3, 12.6)], @@ -1497,7 +1497,7 @@ def set_orthogonal_selection(self, selection, value, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.zeros((5, 5), dtype=int) + >>> z = zarr.v2.zeros((5, 5), dtype=int) Set data for a selection of rows:: @@ -1588,7 +1588,7 @@ def set_coordinate_selection(self, selection, value, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.zeros((5, 5), dtype=int) + >>> z = zarr.v2.zeros((5, 5), dtype=int) Set data for a selection of items:: @@ -1671,7 +1671,7 @@ def set_block_selection(self, selection, value, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.zeros((6, 6), dtype=int, chunks=2) + >>> z = zarr.v2.zeros((6, 6), dtype=int, chunks=2) Set data for a selection of items:: @@ -1756,7 +1756,7 @@ def set_mask_selection(self, selection, value, fields=None): >>> import zarr >>> import numpy as np - >>> z = zarr.zeros((5, 5), dtype=int) + >>> z = zarr.v2.zeros((5, 5), dtype=int) Set data for a selection of items:: @@ -2323,16 +2323,16 @@ def info(self): Examples -------- >>> import zarr - >>> z = zarr.zeros(1000000, chunks=100000, dtype='i4') + >>> z = zarr.v2.zeros(1000000, chunks=100000, dtype='i4') >>> z.info - Type : zarr.core.Array + Type : zarr.v2.core.Array Data type : int32 Shape : (1000000,) Chunk shape : (100000,) Order : C Read-only : False Compressor : Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0) - Store type : zarr.storage.KVStore + Store type : zarr.v2.storage.KVStore No. bytes : 4000000 (3.8M) No. bytes stored : 320 Storage ratio : 12500.0 @@ -2402,13 +2402,13 @@ def digest(self, hashname="sha1"): -------- >>> import binascii >>> import zarr - >>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.empty(shape=(10000, 10000), chunks=(1000, 1000)) >>> binascii.hexlify(z.digest()) b'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' - >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.zeros(shape=(10000, 10000), chunks=(1000, 1000)) >>> binascii.hexlify(z.digest()) b'7162d416d26a68063b66ed1f30e0a866e4abed60' - >>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) + >>> z = zarr.v2.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) >>> binascii.hexlify(z.digest()) b'cb387af37410ae5a3222e893cf3373e4e4f22816' """ @@ -2434,13 +2434,13 @@ def hexdigest(self, hashname="sha1"): Examples -------- >>> import zarr - >>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.empty(shape=(10000, 10000), chunks=(1000, 1000)) >>> z.hexdigest() '041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' - >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.zeros(shape=(10000, 10000), chunks=(1000, 1000)) >>> z.hexdigest() '7162d416d26a68063b66ed1f30e0a866e4abed60' - >>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) + >>> z = zarr.v2.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) >>> z.hexdigest() 'cb387af37410ae5a3222e893cf3373e4e4f22816' """ @@ -2500,7 +2500,7 @@ def resize(self, *args): Examples -------- >>> import zarr - >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.zeros(shape=(10000, 10000), chunks=(1000, 1000)) >>> z.shape (10000, 10000) >>> z.resize(20000, 10000) @@ -2590,7 +2590,7 @@ def append(self, data, axis=0): >>> import numpy as np >>> import zarr >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000) - >>> z = zarr.array(a, chunks=(1000, 100)) + >>> z = zarr.v2.array(a, chunks=(1000, 100)) >>> z.shape (10000, 1000) >>> z.append(a) @@ -2686,10 +2686,10 @@ def view( >>> np.random.seed(42) >>> labels = ['female', 'male'] >>> data = np.random.choice(labels, size=10000) - >>> filters = [zarr.Categorize(labels=labels, + >>> filters = [zarr.v2.Categorize(labels=labels, ... dtype=data.dtype, ... astype='u1')] - >>> a = zarr.array(data, chunks=1000, filters=filters) + >>> a = zarr.v2.array(data, chunks=1000, filters=filters) >>> a[:] array(['female', 'male', 'female', ..., 'male', 'male', 'female'], dtype='>> data = np.random.randint(0, 2, size=10000, dtype='u1') - >>> a = zarr.array(data, chunks=1000) + >>> a = zarr.v2.array(data, chunks=1000) >>> a[:] array([0, 0, 1, ..., 1, 0, 0], dtype=uint8) >>> v = a.view(dtype=bool) @@ -2727,7 +2727,7 @@ def view( data is interpreted correctly: >>> data = np.arange(10000, dtype='u2') - >>> a = zarr.array(data, chunks=1000) + >>> a = zarr.v2.array(data, chunks=1000) >>> a[:10] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint16) >>> v = a.view(dtype='u1', shape=20000, chunks=2000) @@ -2738,7 +2738,7 @@ def view( Change fill value for uninitialized chunks: - >>> a = zarr.full(10000, chunks=1000, fill_value=-1, dtype='i1') + >>> a = zarr.v2.full(10000, chunks=1000, fill_value=-1, dtype='i1') >>> a[:] array([-1, -1, -1, ..., -1, -1, -1], dtype=int8) >>> v = a.view(fill_value=42) @@ -2747,7 +2747,7 @@ def view( Note that resizing or appending to views is not permitted: - >>> a = zarr.empty(10000) + >>> a = zarr.v2.empty(10000) >>> v = a.view() >>> try: ... v.resize(20000) @@ -2820,7 +2820,7 @@ def astype(self, dtype): >>> import zarr >>> import numpy as np >>> data = np.arange(100, dtype=np.uint8) - >>> a = zarr.array(data, chunks=10) + >>> a = zarr.v2.array(data, chunks=10) >>> a[:] array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, diff --git a/src/zarr/creation.py b/src/zarr/v2/creation.py similarity index 92% rename from src/zarr/creation.py rename to src/zarr/v2/creation.py index c93178c0e7..d0ba00603d 100644 --- a/src/zarr/creation.py +++ b/src/zarr/v2/creation.py @@ -4,13 +4,13 @@ import numpy as np from numcodecs.registry import codec_registry -from zarr.core import Array -from zarr.errors import ( +from zarr.v2.core import Array +from zarr.v2.errors import ( ArrayNotFoundError, ContainsArrayError, ContainsGroupError, ) -from zarr.storage import ( +from zarr.v2.storage import ( contains_array, contains_group, default_compressor, @@ -18,7 +18,7 @@ normalize_storage_path, normalize_store_arg, ) -from zarr.util import normalize_dimension_separator +from zarr.v2.util import normalize_dimension_separator def create( @@ -114,7 +114,7 @@ def create( Returns ------- - z : zarr.core.Array + z : zarr.v2.core.Array Examples -------- @@ -122,37 +122,37 @@ def create( Create an array with default settings:: >>> import zarr - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.create((10000, 10000), chunks=(1000, 1000)) >>> z - + Create an array with different some different configuration options:: >>> from numcodecs import Blosc >>> compressor = Blosc(cname='zstd', clevel=1, shuffle=Blosc.BITSHUFFLE) - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='i1', order='F', + >>> z = zarr.v2.create((10000, 10000), chunks=(1000, 1000), dtype='i1', order='F', ... compressor=compressor) >>> z - + To create an array with object dtype requires a filter that can handle Python object encoding, e.g., `MsgPack` or `Pickle` from `numcodecs`:: >>> from numcodecs import MsgPack - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype=object, + >>> z = zarr.v2.create((10000, 10000), chunks=(1000, 1000), dtype=object, ... object_codec=MsgPack()) >>> z - + Example with some filters, and also storing chunks separately from metadata:: >>> from numcodecs import Quantize, Adler32 >>> store, chunk_store = dict(), dict() - >>> z = zarr.create((10000, 10000), chunks=(1000, 1000), dtype='f8', + >>> z = zarr.v2.create((10000, 10000), chunks=(1000, 1000), dtype='f8', ... filters=[Quantize(digits=2, dtype='f8'), Adler32()], ... store=store, chunk_store=chunk_store) >>> z - + """ @@ -274,7 +274,7 @@ def _kwargs_compat(compressor, fill_value, kwargs): def empty(shape, **kwargs): """Create an empty array. - For parameter definitions see :func:`zarr.creation.create`. + For parameter definitions see :func:`zarr.v2.creation.create`. Notes ----- @@ -290,14 +290,14 @@ def zeros(shape, **kwargs): """Create an array, with zero being used as the default value for uninitialized portions of the array. - For parameter definitions see :func:`zarr.creation.create`. + For parameter definitions see :func:`zarr.v2.creation.create`. Examples -------- >>> import zarr - >>> z = zarr.zeros((10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.zeros((10000, 10000), chunks=(1000, 1000)) >>> z - + >>> z[:2, :2] array([[0., 0.], [0., 0.]]) @@ -311,14 +311,14 @@ def ones(shape, **kwargs): """Create an array, with one being used as the default value for uninitialized portions of the array. - For parameter definitions see :func:`zarr.creation.create`. + For parameter definitions see :func:`zarr.v2.creation.create`. Examples -------- >>> import zarr - >>> z = zarr.ones((10000, 10000), chunks=(1000, 1000)) + >>> z = zarr.v2.ones((10000, 10000), chunks=(1000, 1000)) >>> z - + >>> z[:2, :2] array([[1., 1.], [1., 1.]]) @@ -332,14 +332,14 @@ def full(shape, fill_value, **kwargs): """Create an array, with `fill_value` being used as the default value for uninitialized portions of the array. - For parameter definitions see :func:`zarr.creation.create`. + For parameter definitions see :func:`zarr.v2.creation.create`. Examples -------- >>> import zarr - >>> z = zarr.full((10000, 10000), chunks=(1000, 1000), fill_value=42) + >>> z = zarr.v2.full((10000, 10000), chunks=(1000, 1000), fill_value=42) >>> z - + >>> z[:2, :2] array([[42., 42.], [42., 42.]]) @@ -370,16 +370,16 @@ def array(data, **kwargs): """Create an array filled with `data`. The `data` argument should be a NumPy array or array-like object. For - other parameter definitions see :func:`zarr.creation.create`. + other parameter definitions see :func:`zarr.v2.creation.create`. Examples -------- >>> import numpy as np >>> import zarr >>> a = np.arange(100000000).reshape(10000, 10000) - >>> z = zarr.array(a, chunks=(1000, 1000)) + >>> z = zarr.v2.array(a, chunks=(1000, 1000)) >>> z - + """ @@ -517,20 +517,20 @@ def open_array( Returns ------- - z : zarr.core.Array + z : zarr.v2.core.Array Examples -------- >>> import numpy as np >>> import zarr - >>> z1 = zarr.open_array('data/example.zarr', mode='w', shape=(10000, 10000), + >>> z1 = zarr.v2.open_array('data/example.zarr', mode='w', shape=(10000, 10000), ... chunks=(1000, 1000), fill_value=0) >>> z1[:] = np.arange(100000000).reshape(10000, 10000) >>> z1 - - >>> z2 = zarr.open_array('data/example.zarr', mode='r') + + >>> z2 = zarr.v2.open_array('data/example.zarr', mode='r') >>> z2 - + >>> np.all(z1[:] == z2[:]) True diff --git a/src/zarr/errors.py b/src/zarr/v2/errors.py similarity index 100% rename from src/zarr/errors.py rename to src/zarr/v2/errors.py diff --git a/src/zarr/hierarchy.py b/src/zarr/v2/hierarchy.py similarity index 93% rename from src/zarr/hierarchy.py rename to src/zarr/v2/hierarchy.py index 9044c1681e..acd65750e3 100644 --- a/src/zarr/hierarchy.py +++ b/src/zarr/v2/hierarchy.py @@ -4,9 +4,9 @@ import numpy as np -from zarr.attrs import Attributes -from zarr.core import Array -from zarr.creation import ( +from zarr.v2.attrs import Attributes +from zarr.v2.core import Array +from zarr.v2.creation import ( array, create, empty, @@ -18,13 +18,13 @@ zeros, zeros_like, ) -from zarr.errors import ( +from zarr.v2.errors import ( ContainsArrayError, ContainsGroupError, GroupNotFoundError, ReadOnlyError, ) -from zarr.storage import ( +from zarr.v2.storage import ( _prefix_to_group_key, BaseStore, MemoryStore, @@ -39,7 +39,7 @@ rmdir, ) -from zarr.util import ( +from zarr.v2.util import ( InfoReporter, TreeViewer, is_valid_python_name, @@ -259,7 +259,7 @@ def __iter__(self): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) @@ -363,7 +363,7 @@ def __contains__(self, item): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> d1 = g1.create_dataset('bar', shape=100, chunks=10) >>> 'foo' in g1 @@ -390,14 +390,14 @@ def __getitem__(self, item): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> d1 = g1.create_dataset('foo/bar/baz', shape=100, chunks=10) >>> g1['foo'] - + >>> g1['foo/bar'] - + >>> g1['foo/bar/baz'] - + """ path = self._item_path(item) @@ -462,7 +462,7 @@ def group_keys(self): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) @@ -483,15 +483,15 @@ def groups(self): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) >>> for n, v in g1.groups(): ... print(n, type(v)) - bar - foo + bar + foo """ @@ -523,7 +523,7 @@ def array_keys(self, recurse=False): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) @@ -547,15 +547,15 @@ def arrays(self, recurse=False): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> d1 = g1.create_dataset('baz', shape=100, chunks=10) >>> d2 = g1.create_dataset('quux', shape=200, chunks=20) >>> for n, v in g1.arrays(): ... print(n, type(v)) - baz - quux + baz + quux """ return self._array_iter(keys_only=False, method="arrays", recurse=recurse) @@ -580,7 +580,7 @@ def visitvalues(self, func): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> g4 = g3.create_group('baz') @@ -588,13 +588,13 @@ def visitvalues(self, func): >>> def print_visitor(obj): ... print(obj) >>> g1.visitvalues(print_visitor) - - - - + + + + >>> g3.visitvalues(print_visitor) - - + + """ @@ -619,7 +619,7 @@ def visit(self, func): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> g4 = g3.create_group('baz') @@ -649,11 +649,11 @@ def visit(self, func): It is created as follows: - >>> root = zarr.group() + >>> root = zarr.v2.group() >>> foo = root.create_group("foo") >>> bar = root.create_group("bar") >>> root.create_group("aaa").create_group("bbb").create_group("ccc").create_group("aaa") - + For ``find``, the first path that matches a given pattern (for example "aaa") is returned. Note that a non-None value is returned in the visit @@ -725,7 +725,7 @@ def visititems(self, func): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> g4 = g3.create_group('baz') @@ -733,13 +733,13 @@ def visititems(self, func): >>> def print_visitor(name, obj): ... print((name, obj)) >>> g1.visititems(print_visitor) - ('bar', ) - ('bar/baz', ) - ('bar/quux', ) - ('foo', ) + ('bar', ) + ('bar/baz', ) + ('bar/quux', ) + ('foo', ) >>> g3.visititems(print_visitor) - ('baz', ) - ('quux', ) + ('baz', ) + ('quux', ) """ @@ -759,7 +759,7 @@ def tree(self, expand=False, level=None): Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> g4 = g3.create_group('baz') @@ -821,12 +821,12 @@ def create_group(self, name, overwrite=False): Returns ------- - g : zarr.hierarchy.Group + g : zarr.v2.hierarchy.Group Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.create_group('foo') >>> g3 = g1.create_group('bar') >>> g4 = g1.create_group('baz/quux') @@ -866,12 +866,12 @@ def require_group(self, name, overwrite=False): Returns ------- - g : zarr.hierarchy.Group + g : zarr.v2.hierarchy.Group Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> g2 = g1.require_group('foo') >>> g3 = g1.require_group('foo') >>> g2 == g3 @@ -929,7 +929,7 @@ def create_dataset(self, name, **kwargs): Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. - synchronizer : zarr.sync.ArraySynchronizer, optional + synchronizer : zarr.v2.sync.ArraySynchronizer, optional Array synchronizer. filters : sequence of Codecs, optional Sequence of filters to use to encode chunk data prior to @@ -946,20 +946,20 @@ def create_dataset(self, name, **kwargs): Returns ------- - a : zarr.core.Array + a : zarr.v2.core.Array Examples -------- >>> import zarr - >>> g1 = zarr.group() + >>> g1 = zarr.v2.group() >>> d1 = g1.create_dataset('foo', shape=(10000, 10000), ... chunks=(1000, 1000)) >>> d1 - + >>> d2 = g1.create_dataset('bar/baz/qux', shape=(100, 100, 100), ... chunks=(100, 10, 10)) >>> d2 - + """ assert "mode" not in kwargs @@ -989,7 +989,7 @@ def require_dataset(self, name, shape, dtype=None, exact=False, **kwargs): Arrays are known as "datasets" in HDF5 terminology. For compatibility with h5py, Zarr groups also implement the create_dataset() method. - Other `kwargs` are as per :func:`zarr.hierarchy.Group.create_dataset`. + Other `kwargs` are as per :func:`zarr.v2.hierarchy.Group.create_dataset`. Parameters ---------- @@ -1049,7 +1049,7 @@ def _require_dataset_nosync(self, name, shape, dtype=None, exact=False, **kwargs def create(self, name, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.create`.""" + :func:`zarr.v2.creation.create`.""" return self._write_op(self._create_nosync, name, **kwargs) def _create_nosync(self, name, **kwargs): @@ -1060,7 +1060,7 @@ def _create_nosync(self, name, **kwargs): def empty(self, name, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.empty`.""" + :func:`zarr.v2.creation.empty`.""" return self._write_op(self._empty_nosync, name, **kwargs) def _empty_nosync(self, name, **kwargs): @@ -1071,7 +1071,7 @@ def _empty_nosync(self, name, **kwargs): def zeros(self, name, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.zeros`.""" + :func:`zarr.v2.creation.zeros`.""" return self._write_op(self._zeros_nosync, name, **kwargs) def _zeros_nosync(self, name, **kwargs): @@ -1082,7 +1082,7 @@ def _zeros_nosync(self, name, **kwargs): def ones(self, name, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.ones`.""" + :func:`zarr.v2.creation.ones`.""" return self._write_op(self._ones_nosync, name, **kwargs) def _ones_nosync(self, name, **kwargs): @@ -1093,7 +1093,7 @@ def _ones_nosync(self, name, **kwargs): def full(self, name, fill_value, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.full`.""" + :func:`zarr.v2.creation.full`.""" return self._write_op(self._full_nosync, name, fill_value, **kwargs) def _full_nosync(self, name, fill_value, **kwargs): @@ -1110,7 +1110,7 @@ def _full_nosync(self, name, fill_value, **kwargs): def array(self, name, data, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.array`.""" + :func:`zarr.v2.creation.array`.""" return self._write_op(self._array_nosync, name, data, **kwargs) def _array_nosync(self, name, data, **kwargs): @@ -1121,7 +1121,7 @@ def _array_nosync(self, name, data, **kwargs): def empty_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.empty_like`.""" + :func:`zarr.v2.creation.empty_like`.""" return self._write_op(self._empty_like_nosync, name, data, **kwargs) def _empty_like_nosync(self, name, data, **kwargs): @@ -1134,7 +1134,7 @@ def _empty_like_nosync(self, name, data, **kwargs): def zeros_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.zeros_like`.""" + :func:`zarr.v2.creation.zeros_like`.""" return self._write_op(self._zeros_like_nosync, name, data, **kwargs) def _zeros_like_nosync(self, name, data, **kwargs): @@ -1147,7 +1147,7 @@ def _zeros_like_nosync(self, name, data, **kwargs): def ones_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.ones_like`.""" + :func:`zarr.v2.creation.ones_like`.""" return self._write_op(self._ones_like_nosync, name, data, **kwargs) def _ones_like_nosync(self, name, data, **kwargs): @@ -1160,7 +1160,7 @@ def _ones_like_nosync(self, name, data, **kwargs): def full_like(self, name, data, **kwargs): """Create an array. Keyword arguments as per - :func:`zarr.creation.full_like`.""" + :func:`zarr.v2.creation.full_like`.""" return self._write_op(self._full_like_nosync, name, data, **kwargs) def _full_like_nosync(self, name, data, **kwargs): @@ -1252,23 +1252,23 @@ def group( Returns ------- - g : zarr.hierarchy.Group + g : zarr.v2.hierarchy.Group Examples -------- Create a group in memory:: >>> import zarr - >>> g = zarr.group() + >>> g = zarr.v2.group() >>> g - + Create a group with a different store:: - >>> store = zarr.DirectoryStore('data/example.zarr') - >>> g = zarr.group(store=store, overwrite=True) + >>> store = zarr.v2.DirectoryStore('data/example.zarr') + >>> g = zarr.v2.group(store=store, overwrite=True) >>> g - + """ @@ -1336,19 +1336,19 @@ def open_group( Returns ------- - g : zarr.hierarchy.Group + g : zarr.v2.hierarchy.Group Examples -------- >>> import zarr - >>> root = zarr.open_group('data/example.zarr', mode='w') + >>> root = zarr.v2.open_group('data/example.zarr', mode='w') >>> foo = root.create_group('foo') >>> bar = root.create_group('bar') >>> root - - >>> root2 = zarr.open_group('data/example.zarr', mode='a') + + >>> root2 = zarr.v2.open_group('data/example.zarr', mode='a') >>> root2 - + >>> root == root2 True diff --git a/src/zarr/v2/indexing.py b/src/zarr/v2/indexing.py new file mode 100644 index 0000000000..1c11409d05 --- /dev/null +++ b/src/zarr/v2/indexing.py @@ -0,0 +1,1080 @@ +import collections +import itertools +import math +import numbers + +import numpy as np + + +from zarr.v2.errors import ( + ArrayIndexError, + NegativeStepError, + err_too_many_indices, + VindexInvalidSelectionError, + BoundsCheckError, +) + + +def is_integer(x): + """True if x is an integer (both pure Python or NumPy). + + Note that Python's bool is considered an integer too. + """ + return isinstance(x, numbers.Integral) + + +def is_integer_list(x): + """True if x is a list of integers. + + This function assumes ie *does not check* that all elements of the list + have the same type. Mixed type lists will result in other errors that will + bubble up anyway. + """ + return isinstance(x, list) and len(x) > 0 and is_integer(x[0]) + + +def is_integer_array(x, ndim=None): + t = not np.isscalar(x) and hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype.kind in "ui" + if ndim is not None: + t = t and len(x.shape) == ndim + return t + + +def is_bool_array(x, ndim=None): + t = hasattr(x, "shape") and hasattr(x, "dtype") and x.dtype == bool + if ndim is not None: + t = t and len(x.shape) == ndim + return t + + +def is_scalar(value, dtype): + if np.isscalar(value): + return True + if isinstance(value, tuple) and dtype.names and len(value) == len(dtype.names): + return True + return False + + +def is_pure_fancy_indexing(selection, ndim): + """Check whether a selection contains only scalars or integer array-likes. + + Parameters + ---------- + selection : tuple, slice, or scalar + A valid selection value for indexing into arrays. + + Returns + ------- + is_pure : bool + True if the selection is a pure fancy indexing expression (ie not mixed + with boolean or slices). + """ + if ndim == 1: + if is_integer_list(selection) or is_integer_array(selection): + return True + # if not, we go through the normal path below, because a 1-tuple + # of integers is also allowed. + no_slicing = ( + isinstance(selection, tuple) + and len(selection) == ndim + and not (any(isinstance(elem, slice) or elem is Ellipsis for elem in selection)) + ) + return ( + no_slicing + and all( + is_integer(elem) or is_integer_list(elem) or is_integer_array(elem) + for elem in selection + ) + and any(is_integer_list(elem) or is_integer_array(elem) for elem in selection) + ) + + +def is_pure_orthogonal_indexing(selection, ndim): + if not ndim: + return False + + # Case 1: Selection is a single iterable of integers + if is_integer_list(selection) or is_integer_array(selection, ndim=1): + return True + + # Case two: selection contains either zero or one integer iterables. + # All other selection elements are slices or integers + return ( + isinstance(selection, tuple) + and len(selection) == ndim + and sum(is_integer_list(elem) or is_integer_array(elem) for elem in selection) <= 1 + and all( + is_integer_list(elem) or is_integer_array(elem) or isinstance(elem, (int, slice)) + for elem in selection + ) + ) + + +def normalize_integer_selection(dim_sel, dim_len): + # normalize type to int + dim_sel = int(dim_sel) + + # handle wraparound + if dim_sel < 0: + dim_sel = dim_len + dim_sel + + # handle out of bounds + if dim_sel >= dim_len or dim_sel < 0: + raise BoundsCheckError(dim_len) + + return dim_sel + + +ChunkDimProjection = collections.namedtuple( + "ChunkDimProjection", ("dim_chunk_ix", "dim_chunk_sel", "dim_out_sel") +) +"""A mapping from chunk to output array for a single dimension. + +Parameters +---------- +dim_chunk_ix + Index of chunk. +dim_chunk_sel + Selection of items from chunk array. +dim_out_sel + Selection of items in target (output) array. + +""" + + +class IntDimIndexer: + def __init__(self, dim_sel, dim_len, dim_chunk_len): + # normalize + dim_sel = normalize_integer_selection(dim_sel, dim_len) + + # store attributes + self.dim_sel = dim_sel + self.dim_len = dim_len + self.dim_chunk_len = dim_chunk_len + self.nitems = 1 + + def __iter__(self): + dim_chunk_ix = self.dim_sel // self.dim_chunk_len + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_chunk_sel = self.dim_sel - dim_offset + dim_out_sel = None + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +def ceildiv(a, b): + return math.ceil(a / b) + + +class SliceDimIndexer: + def __init__(self, dim_sel, dim_len, dim_chunk_len): + # normalize + self.start, self.stop, self.step = dim_sel.indices(dim_len) + if self.step < 1: + raise NegativeStepError() + + # store attributes + self.dim_len = dim_len + self.dim_chunk_len = dim_chunk_len + self.nitems = max(0, ceildiv((self.stop - self.start), self.step)) + self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) + + def __iter__(self): + # figure out the range of chunks we need to visit + dim_chunk_ix_from = self.start // self.dim_chunk_len + dim_chunk_ix_to = ceildiv(self.stop, self.dim_chunk_len) + + # iterate over chunks in range + for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): + # compute offsets for chunk within overall array + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) + + # determine chunk length, accounting for trailing chunk + dim_chunk_len = dim_limit - dim_offset + + if self.start < dim_offset: + # selection starts before current chunk + dim_chunk_sel_start = 0 + remainder = (dim_offset - self.start) % self.step + if remainder: + dim_chunk_sel_start += self.step - remainder + # compute number of previous items, provides offset into output array + dim_out_offset = ceildiv((dim_offset - self.start), self.step) + + else: + # selection starts within current chunk + dim_chunk_sel_start = self.start - dim_offset + dim_out_offset = 0 + + if self.stop > dim_limit: + # selection ends after current chunk + dim_chunk_sel_stop = dim_chunk_len + + else: + # selection ends within current chunk + dim_chunk_sel_stop = self.stop - dim_offset + + dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) + dim_chunk_nitems = ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) + + # If there are no elements on the selection within this chunk, then skip + if dim_chunk_nitems == 0: + continue + + dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems) + + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +def check_selection_length(selection, shape): + if len(selection) > len(shape): + err_too_many_indices(selection, shape) + + +def replace_ellipsis(selection, shape): + selection = ensure_tuple(selection) + + # count number of ellipsis present + n_ellipsis = sum(1 for i in selection if i is Ellipsis) + + if n_ellipsis > 1: + # more than 1 is an error + raise IndexError("an index can only have a single ellipsis ('...')") + + elif n_ellipsis == 1: + # locate the ellipsis, count how many items to left and right + n_items_l = selection.index(Ellipsis) # items to left of ellipsis + n_items_r = len(selection) - (n_items_l + 1) # items to right of ellipsis + n_items = len(selection) - 1 # all non-ellipsis items + + if n_items >= len(shape): + # ellipsis does nothing, just remove it + selection = tuple(i for i in selection if i != Ellipsis) + + else: + # replace ellipsis with as many slices are needed for number of dims + new_item = selection[:n_items_l] + ((slice(None),) * (len(shape) - n_items)) + if n_items_r: + new_item += selection[-n_items_r:] + selection = new_item + + # fill out selection if not completely specified + if len(selection) < len(shape): + selection += (slice(None),) * (len(shape) - len(selection)) + + # check selection not too long + check_selection_length(selection, shape) + + return selection + + +def replace_lists(selection): + return tuple( + np.asarray(dim_sel) if isinstance(dim_sel, list) else dim_sel for dim_sel in selection + ) + + +def ensure_tuple(v): + if not isinstance(v, tuple): + v = (v,) + return v + + +ChunkProjection = collections.namedtuple( + "ChunkProjection", ("chunk_coords", "chunk_selection", "out_selection") +) +"""A mapping of items from chunk to output array. Can be used to extract items from the +chunk array for loading into an output array. Can also be used to extract items from a +value array for setting/updating in a chunk array. + +Parameters +---------- +chunk_coords + Indices of chunk. +chunk_selection + Selection of items from chunk array. +out_selection + Selection of items in target (output) array. + +""" + + +def is_slice(s): + return isinstance(s, slice) + + +def is_contiguous_slice(s): + return is_slice(s) and (s.step is None or s.step == 1) + + +def is_positive_slice(s): + return is_slice(s) and (s.step is None or s.step >= 1) + + +def is_contiguous_selection(selection): + selection = ensure_tuple(selection) + return all((is_integer_array(s) or is_contiguous_slice(s) or s == Ellipsis) for s in selection) + + +def is_basic_selection(selection): + selection = ensure_tuple(selection) + return all(is_integer(s) or is_positive_slice(s) for s in selection) + + +# noinspection PyProtectedMember +class BasicIndexer: + def __init__(self, selection, array): + # handle ellipsis + selection = replace_ellipsis(selection, array._shape) + + # setup per-dimension indexers + dim_indexers = [] + for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): + if is_integer(dim_sel): + dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif is_slice(dim_sel): + dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) + + else: + raise IndexError( + "unsupported selection item for basic indexing; " + "expected integer or slice, got {!r}".format(type(dim_sel)) + ) + + dim_indexers.append(dim_indexer) + + self.dim_indexers = dim_indexers + self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) + self.drop_axes = None + + def __iter__(self): + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +class BoolArrayDimIndexer: + def __init__(self, dim_sel, dim_len, dim_chunk_len): + # check number of dimensions + if not is_bool_array(dim_sel, 1): + raise IndexError( + "Boolean arrays in an orthogonal selection must " "be 1-dimensional only" + ) + + # check shape + if dim_sel.shape[0] != dim_len: + raise IndexError( + "Boolean array has the wrong length for dimension; " "expected {}, got {}".format( + dim_len, dim_sel.shape[0] + ) + ) + + # store attributes + self.dim_sel = dim_sel + self.dim_len = dim_len + self.dim_chunk_len = dim_chunk_len + self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) + + # precompute number of selected items for each chunk + self.chunk_nitems = np.zeros(self.nchunks, dtype="i8") + for dim_chunk_ix in range(self.nchunks): + dim_offset = dim_chunk_ix * self.dim_chunk_len + self.chunk_nitems[dim_chunk_ix] = np.count_nonzero( + self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] + ) + self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) + self.nitems = self.chunk_nitems_cumsum[-1] + self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] + + def __iter__(self): + # iterate over chunks with at least one item + for dim_chunk_ix in self.dim_chunk_ixs: + # find region in chunk + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_chunk_sel = self.dim_sel[dim_offset : dim_offset + self.dim_chunk_len] + + # pad out if final chunk + if dim_chunk_sel.shape[0] < self.dim_chunk_len: + tmp = np.zeros(self.dim_chunk_len, dtype=bool) + tmp[: dim_chunk_sel.shape[0]] = dim_chunk_sel + dim_chunk_sel = tmp + + # find region in output + if dim_chunk_ix == 0: + start = 0 + else: + start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] + stop = self.chunk_nitems_cumsum[dim_chunk_ix] + dim_out_sel = slice(start, stop) + + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +class Order: + UNKNOWN = 0 + INCREASING = 1 + DECREASING = 2 + UNORDERED = 3 + + @staticmethod + def check(a): + diff = np.diff(a) + diff_positive = diff >= 0 + n_diff_positive = np.count_nonzero(diff_positive) + all_increasing = n_diff_positive == len(diff_positive) + any_increasing = n_diff_positive > 0 + if all_increasing: + order = Order.INCREASING + elif any_increasing: + order = Order.UNORDERED + else: + order = Order.DECREASING + return order + + +def wraparound_indices(x, dim_len): + loc_neg = x < 0 + if np.any(loc_neg): + x[loc_neg] = x[loc_neg] + dim_len + + +def boundscheck_indices(x, dim_len): + if np.any(x < 0) or np.any(x >= dim_len): + raise BoundsCheckError(dim_len) + + +class IntArrayDimIndexer: + """Integer array selection against a single dimension.""" + + def __init__( + self, + dim_sel, + dim_len, + dim_chunk_len, + wraparound=True, + boundscheck=True, + order=Order.UNKNOWN, + ): + # ensure 1d array + dim_sel = np.asanyarray(dim_sel) + if not is_integer_array(dim_sel, 1): + raise IndexError( + "integer arrays in an orthogonal selection must be " "1-dimensional only" + ) + + # handle wraparound + if wraparound: + wraparound_indices(dim_sel, dim_len) + + # handle out of bounds + if boundscheck: + boundscheck_indices(dim_sel, dim_len) + + # store attributes + self.dim_len = dim_len + self.dim_chunk_len = dim_chunk_len + self.nchunks = ceildiv(self.dim_len, self.dim_chunk_len) + self.nitems = len(dim_sel) + + # determine which chunk is needed for each selection item + # note: for dense integer selections, the division operation here is the + # bottleneck + dim_sel_chunk = dim_sel // dim_chunk_len + + # determine order of indices + if order == Order.UNKNOWN: + order = Order.check(dim_sel) + self.order = order + + if self.order == Order.INCREASING: + self.dim_sel = dim_sel + self.dim_out_sel = None + elif self.order == Order.DECREASING: + self.dim_sel = dim_sel[::-1] + # TODO should be possible to do this without creating an arange + self.dim_out_sel = np.arange(self.nitems - 1, -1, -1) + else: + # sort indices to group by chunk + self.dim_out_sel = np.argsort(dim_sel_chunk) + self.dim_sel = np.take(dim_sel, self.dim_out_sel) + + # precompute number of selected items for each chunk + self.chunk_nitems = np.bincount(dim_sel_chunk, minlength=self.nchunks) + + # find chunks that we need to visit + self.dim_chunk_ixs = np.nonzero(self.chunk_nitems)[0] + + # compute offsets into the output array + self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) + + def __iter__(self): + for dim_chunk_ix in self.dim_chunk_ixs: + # find region in output + if dim_chunk_ix == 0: + start = 0 + else: + start = self.chunk_nitems_cumsum[dim_chunk_ix - 1] + stop = self.chunk_nitems_cumsum[dim_chunk_ix] + if self.order == Order.INCREASING: + dim_out_sel = slice(start, stop) + else: + dim_out_sel = self.dim_out_sel[start:stop] + + # find region in chunk + dim_offset = dim_chunk_ix * self.dim_chunk_len + dim_chunk_sel = self.dim_sel[start:stop] - dim_offset + + yield ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) + + +def slice_to_range(s: slice, l: int): # noqa: E741 + return range(*s.indices(l)) + + +def ix_(selection, shape): + """Convert an orthogonal selection to a numpy advanced (fancy) selection, like numpy.ix_ + but with support for slices and single ints.""" + + # normalisation + selection = replace_ellipsis(selection, shape) + + # replace slice and int as these are not supported by numpy.ix_ + selection = [ + slice_to_range(dim_sel, dim_len) + if isinstance(dim_sel, slice) + else [dim_sel] + if is_integer(dim_sel) + else dim_sel + for dim_sel, dim_len in zip(selection, shape) + ] + + # now get numpy to convert to a coordinate selection + selection = np.ix_(*selection) + + return selection + + +def oindex(a, selection): + """Implementation of orthogonal indexing with slices and ints.""" + selection = replace_ellipsis(selection, a.shape) + drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) + selection = ix_(selection, a.shape) + result = a[selection] + if drop_axes: + result = result.squeeze(axis=drop_axes) + return result + + +def oindex_set(a, selection, value): + selection = replace_ellipsis(selection, a.shape) + drop_axes = tuple(i for i, s in enumerate(selection) if is_integer(s)) + selection = ix_(selection, a.shape) + if not np.isscalar(value) and drop_axes: + value = np.asanyarray(value) + value_selection = [slice(None)] * len(a.shape) + for i in drop_axes: + value_selection[i] = np.newaxis + value_selection = tuple(value_selection) + value = value[value_selection] + a[selection] = value + + +# noinspection PyProtectedMember +class OrthogonalIndexer: + def __init__(self, selection, array): + # handle ellipsis + selection = replace_ellipsis(selection, array._shape) + + # normalize list to array + selection = replace_lists(selection) + + # setup per-dimension indexers + dim_indexers = [] + for dim_sel, dim_len, dim_chunk_len in zip(selection, array._shape, array._chunks): + if is_integer(dim_sel): + dim_indexer = IntDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif isinstance(dim_sel, slice): + dim_indexer = SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif is_integer_array(dim_sel): + dim_indexer = IntArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) + + elif is_bool_array(dim_sel): + dim_indexer = BoolArrayDimIndexer(dim_sel, dim_len, dim_chunk_len) + + else: + raise IndexError( + "unsupported selection item for orthogonal indexing; " + "expected integer, slice, integer array or Boolean " + "array, got {!r}".format(type(dim_sel)) + ) + + dim_indexers.append(dim_indexer) + + self.array = array + self.dim_indexers = dim_indexers + self.shape = tuple(s.nitems for s in self.dim_indexers if not isinstance(s, IntDimIndexer)) + self.is_advanced = not is_basic_selection(selection) + if self.is_advanced: + self.drop_axes = tuple( + i + for i, dim_indexer in enumerate(self.dim_indexers) + if isinstance(dim_indexer, IntDimIndexer) + ) + else: + self.drop_axes = None + + def __iter__(self): + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) + + # handle advanced indexing arrays orthogonally + if self.is_advanced: + # N.B., numpy doesn't support orthogonal indexing directly as yet, + # so need to work around via np.ix_. Also np.ix_ does not support a + # mixture of arrays and slices or integers, so need to convert slices + # and integers into ranges. + chunk_selection = ix_(chunk_selection, self.array._chunks) + + # special case for non-monotonic indices + if not is_basic_selection(out_selection): + out_selection = ix_(out_selection, self.shape) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +class OIndex: + def __init__(self, array): + self.array = array + + def __getitem__(self, selection): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + return self.array.get_orthogonal_selection(selection, fields=fields) + + def __setitem__(self, selection, value): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + return self.array.set_orthogonal_selection(selection, value, fields=fields) + + +# noinspection PyProtectedMember +class BlockIndexer: + def __init__(self, selection, array): + # handle ellipsis + selection = replace_ellipsis(selection, array._shape) + + # normalize list to array + selection = replace_lists(selection) + + # setup per-dimension indexers + dim_indexers = [] + for dim_sel, dim_len, dim_chunk_size in zip(selection, array._shape, array._chunks): + dim_numchunks = int(np.ceil(dim_len / dim_chunk_size)) + + if is_integer(dim_sel): + if dim_sel < 0: + dim_sel = dim_numchunks + dim_sel + + start = dim_sel * dim_chunk_size + stop = start + dim_chunk_size + slice_ = slice(start, stop) + + elif is_slice(dim_sel): + start = dim_sel.start if dim_sel.start is not None else 0 + stop = dim_sel.stop if dim_sel.stop is not None else dim_numchunks + + if dim_sel.step not in {1, None}: + raise IndexError( + "unsupported selection item for block indexing; " + "expected integer or slice with step=1, got {!r}".format(type(dim_sel)) + ) + + # Can't reuse wraparound_indices because it expects a numpy array + # We have integers here. + if start < 0: + start = dim_numchunks + start + if stop < 0: + stop = dim_numchunks + stop + + start = start * dim_chunk_size + stop = stop * dim_chunk_size + slice_ = slice(start, stop) + + else: + raise IndexError( + "unsupported selection item for block indexing; " + "expected integer or slice, got {!r}".format(type(dim_sel)) + ) + + dim_indexer = SliceDimIndexer(slice_, dim_len, dim_chunk_size) + dim_indexers.append(dim_indexer) + + if start >= dim_len or start < 0: + raise BoundsCheckError(dim_len) + + self.dim_indexers = dim_indexers + self.shape = tuple(s.nitems for s in self.dim_indexers) + self.drop_axes = None + + def __iter__(self): + for dim_projections in itertools.product(*self.dim_indexers): + chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) + chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) + out_selection = tuple( + p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None + ) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +class BlockIndex: + def __init__(self, array): + self.array = array + + def __getitem__(self, selection): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + return self.array.get_block_selection(selection, fields=fields) + + def __setitem__(self, selection, value): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + return self.array.set_block_selection(selection, value, fields=fields) + + +# noinspection PyProtectedMember +def is_coordinate_selection(selection, array): + return (len(selection) == len(array._shape)) and all( + is_integer(dim_sel) or is_integer_array(dim_sel) for dim_sel in selection + ) + + +# noinspection PyProtectedMember +def is_mask_selection(selection, array): + return ( + len(selection) == 1 and is_bool_array(selection[0]) and selection[0].shape == array._shape + ) + + +# noinspection PyProtectedMember +class CoordinateIndexer: + def __init__(self, selection, array): + # some initial normalization + selection = ensure_tuple(selection) + selection = tuple([i] if is_integer(i) else i for i in selection) + selection = replace_lists(selection) + + # validation + if not is_coordinate_selection(selection, array): + raise IndexError( + "invalid coordinate selection; expected one integer " + "(coordinate) array per dimension of the target array, " + "got {!r}".format(selection) + ) + + # handle wraparound, boundscheck + for dim_sel, dim_len in zip(selection, array.shape): + # handle wraparound + wraparound_indices(dim_sel, dim_len) + + # handle out of bounds + boundscheck_indices(dim_sel, dim_len) + + # compute chunk index for each point in the selection + chunks_multi_index = tuple( + dim_sel // dim_chunk_len for (dim_sel, dim_chunk_len) in zip(selection, array._chunks) + ) + + # broadcast selection - this will raise error if array dimensions don't match + selection = np.broadcast_arrays(*selection) + chunks_multi_index = np.broadcast_arrays(*chunks_multi_index) + + # remember shape of selection, because we will flatten indices for processing + self.sel_shape = selection[0].shape if selection[0].shape else (1,) + + # flatten selection + selection = [dim_sel.reshape(-1) for dim_sel in selection] + chunks_multi_index = [dim_chunks.reshape(-1) for dim_chunks in chunks_multi_index] + + # ravel chunk indices + chunks_raveled_indices = np.ravel_multi_index(chunks_multi_index, dims=array._cdata_shape) + + # group points by chunk + if np.any(np.diff(chunks_raveled_indices) < 0): + # optimisation, only sort if needed + sel_sort = np.argsort(chunks_raveled_indices) + selection = tuple(dim_sel[sel_sort] for dim_sel in selection) + else: + sel_sort = None + + # store attributes + self.selection = selection + self.sel_sort = sel_sort + self.shape = selection[0].shape if selection[0].shape else (1,) + self.drop_axes = None + self.array = array + + # precompute number of selected items for each chunk + self.chunk_nitems = np.bincount(chunks_raveled_indices, minlength=array.nchunks) + self.chunk_nitems_cumsum = np.cumsum(self.chunk_nitems) + # locate the chunks we need to process + self.chunk_rixs = np.nonzero(self.chunk_nitems)[0] + + # unravel chunk indices + self.chunk_mixs = np.unravel_index(self.chunk_rixs, array._cdata_shape) + + def __iter__(self): + # iterate over chunks + for i, chunk_rix in enumerate(self.chunk_rixs): + chunk_coords = tuple(m[i] for m in self.chunk_mixs) + if chunk_rix == 0: + start = 0 + else: + start = self.chunk_nitems_cumsum[chunk_rix - 1] + stop = self.chunk_nitems_cumsum[chunk_rix] + if self.sel_sort is None: + out_selection = slice(start, stop) + else: + out_selection = self.sel_sort[start:stop] + + chunk_offsets = tuple( + dim_chunk_ix * dim_chunk_len + for dim_chunk_ix, dim_chunk_len in zip(chunk_coords, self.array._chunks) + ) + chunk_selection = tuple( + dim_sel[start:stop] - dim_chunk_offset + for (dim_sel, dim_chunk_offset) in zip(self.selection, chunk_offsets) + ) + + yield ChunkProjection(chunk_coords, chunk_selection, out_selection) + + +# noinspection PyProtectedMember +class MaskIndexer(CoordinateIndexer): + def __init__(self, selection, array): + # some initial normalization + selection = ensure_tuple(selection) + selection = replace_lists(selection) + + # validation + if not is_mask_selection(selection, array): + raise IndexError( + "invalid mask selection; expected one Boolean (mask)" + "array with the same shape as the target array, got {!r}".format(selection) + ) + + # convert to indices + selection = np.nonzero(selection[0]) + + # delegate the rest to superclass + super().__init__(selection, array) + + +class VIndex: + def __init__(self, array): + self.array = array + + def __getitem__(self, selection): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + if is_coordinate_selection(selection, self.array): + return self.array.get_coordinate_selection(selection, fields=fields) + elif is_mask_selection(selection, self.array): + return self.array.get_mask_selection(selection, fields=fields) + else: + raise VindexInvalidSelectionError(selection) + + def __setitem__(self, selection, value): + fields, selection = pop_fields(selection) + selection = ensure_tuple(selection) + selection = replace_lists(selection) + if is_coordinate_selection(selection, self.array): + self.array.set_coordinate_selection(selection, value, fields=fields) + elif is_mask_selection(selection, self.array): + self.array.set_mask_selection(selection, value, fields=fields) + else: + raise VindexInvalidSelectionError(selection) + + +def check_fields(fields, dtype): + # early out + if fields is None: + return dtype + # check type + if not isinstance(fields, (str, list, tuple)): + raise IndexError( + "'fields' argument must be a string or list of strings; found " "{!r}".format( + type(fields) + ) + ) + if fields: + if dtype.names is None: + raise IndexError("invalid 'fields' argument, array does not have any fields") + try: + if isinstance(fields, str): + # single field selection + out_dtype = dtype[fields] + else: + # multiple field selection + out_dtype = np.dtype([(f, dtype[f]) for f in fields]) + except KeyError as e: + raise IndexError("invalid 'fields' argument, field not found: {!r}".format(e)) + else: + return out_dtype + else: + return dtype + + +def check_no_multi_fields(fields): + if isinstance(fields, list): + if len(fields) == 1: + return fields[0] + elif len(fields) > 1: + raise IndexError("multiple fields are not supported for this operation") + return fields + + +def pop_fields(selection): + if isinstance(selection, str): + # single field selection + fields = selection + selection = () + elif not isinstance(selection, tuple): + # single selection item, no fields + fields = None + # leave selection as-is + else: + # multiple items, split fields from selection items + fields = [f for f in selection if isinstance(f, str)] + fields = fields[0] if len(fields) == 1 else fields + selection = tuple(s for s in selection if not isinstance(s, str)) + selection = selection[0] if len(selection) == 1 else selection + return fields, selection + + +def make_slice_selection(selection): + ls = [] + for dim_selection in selection: + if is_integer(dim_selection): + ls.append(slice(int(dim_selection), int(dim_selection) + 1, 1)) + elif isinstance(dim_selection, np.ndarray): + if len(dim_selection) == 1: + ls.append(slice(int(dim_selection[0]), int(dim_selection[0]) + 1, 1)) + else: + raise ArrayIndexError() + else: + ls.append(dim_selection) + return ls + + +class PartialChunkIterator: + """Iterator to retrieve the specific coordinates of requested data + from within a compressed chunk. + + Parameters + ---------- + selection : tuple + tuple of slice objects to take from the chunk + arr_shape : shape of chunk to select data from + + Attributes + ----------- + arr_shape + selection + + Returns + ------- + Tuple with 3 elements: + + start: int + elements offset in the chunk to read from + nitems: int + number of elements to read in the chunk from start + partial_out_selection: list of slices + indices of a temporary empty array of size `Array._chunks` to assign + the decompressed data to after the partial read. + + Notes + ----- + An array is flattened when compressed with blosc, so this iterator takes + the wanted selection of an array and determines the wanted coordinates + of the flattened, compressed data to be read and then decompressed. The + decompressed data is then placed in a temporary empty array of size + `Array._chunks` at the indices yielded as partial_out_selection. + Once all the slices yielded by this iterator have been read, decompressed + and written to the temporary array, the wanted slice of the chunk can be + indexed from the temporary array and written to the out_selection slice + of the out array. + + """ + + def __init__(self, selection, arr_shape): + selection = make_slice_selection(selection) + self.arr_shape = arr_shape + + # number of selection dimensions can't be greater than the number of chunk dimensions + if len(selection) > len(self.arr_shape): + raise ValueError( + "Selection has more dimensions then the array:\n" + f"selection dimensions = {len(selection)}\n" + f"array dimensions = {len(self.arr_shape)}" + ) + + # any selection can not be out of the range of the chunk + selection_shape = np.empty(self.arr_shape)[tuple(selection)].shape + if any( + selection_dim < 0 or selection_dim > arr_dim + for selection_dim, arr_dim in zip(selection_shape, self.arr_shape) + ): + raise IndexError( + "a selection index is out of range for the dimension" + ) # pragma: no cover + + for i, dim_size in enumerate(self.arr_shape[::-1]): + index = len(self.arr_shape) - (i + 1) + if index <= len(selection) - 1: + slice_size = selection_shape[index] + if slice_size == dim_size and index > 0: + selection.pop() + else: + break + + chunk_loc_slices = [] + last_dim_slice = None if selection[-1].step > 1 else selection.pop() + for arr_shape_i, sl in zip(arr_shape, selection): + dim_chunk_loc_slices = [] + assert isinstance(sl, slice) + for x in slice_to_range(sl, arr_shape_i): + dim_chunk_loc_slices.append(slice(x, x + 1, 1)) + chunk_loc_slices.append(dim_chunk_loc_slices) + if last_dim_slice: + chunk_loc_slices.append([last_dim_slice]) + self.chunk_loc_slices = list(itertools.product(*chunk_loc_slices)) + + def __iter__(self): + chunk1 = self.chunk_loc_slices[0] + nitems = (chunk1[-1].stop - chunk1[-1].start) * np.prod( + self.arr_shape[len(chunk1) :], dtype=int + ) + for partial_out_selection in self.chunk_loc_slices: + start = 0 + for i, sl in enumerate(partial_out_selection): + start += sl.start * np.prod(self.arr_shape[i + 1 :], dtype=int) + yield start, nitems, partial_out_selection diff --git a/src/zarr/meta.py b/src/zarr/v2/meta.py similarity index 99% rename from src/zarr/meta.py rename to src/zarr/v2/meta.py index 7cca228a14..ee9cc57389 100644 --- a/src/zarr/meta.py +++ b/src/zarr/v2/meta.py @@ -4,8 +4,8 @@ import numpy as np -from zarr.errors import MetadataError -from zarr.util import json_dumps, json_loads +from zarr.v2.errors import MetadataError +from zarr.v2.util import json_dumps, json_loads from typing import cast, Union, Any, List, Mapping as MappingType, TYPE_CHECKING diff --git a/src/zarr/meta_v1.py b/src/zarr/v2/meta_v1.py similarity index 97% rename from src/zarr/meta_v1.py rename to src/zarr/v2/meta_v1.py index 4ac381f2ca..881b9191eb 100644 --- a/src/zarr/meta_v1.py +++ b/src/zarr/v2/meta_v1.py @@ -2,7 +2,7 @@ import numpy as np -from zarr.errors import MetadataError +from zarr.v2.errors import MetadataError def decode_metadata(b): diff --git a/src/zarr/n5.py b/src/zarr/v2/n5.py similarity index 98% rename from src/zarr/n5.py rename to src/zarr/v2/n5.py index 1293d1739b..92b0f37924 100644 --- a/src/zarr/n5.py +++ b/src/zarr/v2/n5.py @@ -11,12 +11,12 @@ from numcodecs.compat import ndarray_copy from numcodecs.registry import get_codec, register_codec -from .meta import ZARR_FORMAT, json_dumps, json_loads -from .storage import FSStore -from .storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path -from .storage import array_meta_key as zarr_array_meta_key -from .storage import attrs_key as zarr_attrs_key -from .storage import group_meta_key as zarr_group_meta_key +from zarr.v2.meta import ZARR_FORMAT, json_dumps, json_loads +from zarr.v2.storage import FSStore +from zarr.v2.storage import NestedDirectoryStore, _prog_ckey, _prog_number, normalize_storage_path +from zarr.v2.storage import array_meta_key as zarr_array_meta_key +from zarr.v2.storage import attrs_key as zarr_attrs_key +from zarr.v2.storage import group_meta_key as zarr_group_meta_key N5_FORMAT = "2.0.0" diff --git a/src/zarr/storage.py b/src/zarr/v2/storage.py similarity index 96% rename from src/zarr/storage.py rename to src/zarr/v2/storage.py index ae596756f8..56deeeb555 100644 --- a/src/zarr/storage.py +++ b/src/zarr/v2/storage.py @@ -38,9 +38,9 @@ from numcodecs.compat import ensure_bytes, ensure_text, ensure_contiguous_ndarray_like from numcodecs.registry import codec_registry -from zarr.context import Context +from zarr.v2.context import Context -from zarr.errors import ( +from zarr.v2.errors import ( MetadataError, BadCompressorError, ContainsArrayError, @@ -48,8 +48,8 @@ FSPathExistNotDir, ReadOnlyError, ) -from zarr.meta import encode_array_metadata, encode_group_metadata -from zarr.util import ( +from zarr.v2.meta import encode_array_metadata, encode_group_metadata +from zarr.v2.util import ( buffer_size, json_loads, nolock, @@ -64,8 +64,8 @@ ensure_contiguous_ndarray_or_bytes, ) -from zarr._storage.absstore import ABSStore # noqa: F401 -from zarr._storage.store import ( # noqa: F401 +from zarr.v2._storage.absstore import ABSStore # noqa: F401 +from zarr.v2._storage.store import ( # noqa: F401 _listdir_from_keys, _rename_from_keys, _rmdir_from_keys, @@ -89,11 +89,11 @@ try: # noinspection PyUnresolvedReferences - from zarr.codecs import Blosc + from zarr.v2.codecs import Blosc default_compressor = Blosc() except ImportError: # pragma: no cover - from zarr.codecs import Zlib + from zarr.v2.codecs import Zlib default_compressor = Zlib() @@ -146,7 +146,7 @@ def normalize_store_arg(store: Any, storage_options=None, mode="r") -> BaseStore if store.endswith(".zip"): return ZipStore(store, mode=mode) elif store.endswith(".n5"): - from zarr.n5 import N5Store + from zarr.v2.n5 import N5Store return N5Store(store) else: @@ -310,7 +310,7 @@ def init_array( -------- Initialize an array store:: - >>> from zarr.storage import init_array, KVStore + >>> from zarr.v2.storage import init_array, KVStore >>> store = KVStore(dict()) >>> init_array(store, shape=(10000, 10000), chunks=(1000, 1000)) >>> sorted(store.keys()) @@ -649,16 +649,16 @@ class MemoryStore(Store): This is the default class used when creating a group. E.g.:: >>> import zarr - >>> g = zarr.group() + >>> g = zarr.v2.group() >>> type(g.store) - + Note that the default class when creating an array is the built-in :class:`KVStore` class, i.e.:: - >>> z = zarr.zeros(100) + >>> z = zarr.v2.zeros(100) >>> type(z.store) - + Notes ----- @@ -860,8 +860,8 @@ class DirectoryStore(Store): Store a single array:: >>> import zarr - >>> store = zarr.DirectoryStore('data/array.zarr') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> store = zarr.v2.DirectoryStore('data/array.zarr') + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 Each chunk of the array is stored as a separate file on the file system, @@ -873,8 +873,8 @@ class DirectoryStore(Store): Store a group:: - >>> store = zarr.DirectoryStore('data/group.zarr') - >>> root = zarr.group(store=store, overwrite=True) + >>> store = zarr.v2.DirectoryStore('data/group.zarr') + >>> root = zarr.v2.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 @@ -1447,8 +1447,8 @@ class NestedDirectoryStore(DirectoryStore): Store a single array:: >>> import zarr - >>> store = zarr.NestedDirectoryStore('data/array.zarr') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> store = zarr.v2.NestedDirectoryStore('data/array.zarr') + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 Each chunk of the array is stored as a separate file on the file system, @@ -1464,8 +1464,8 @@ class NestedDirectoryStore(DirectoryStore): Store a group:: - >>> store = zarr.NestedDirectoryStore('data/group.zarr') - >>> root = zarr.group(store=store, overwrite=True) + >>> store = zarr.v2.NestedDirectoryStore('data/group.zarr') + >>> root = zarr.v2.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 @@ -1536,15 +1536,15 @@ class ZipStore(Store): Store a single array:: >>> import zarr - >>> store = zarr.ZipStore('data/array.zip', mode='w') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store) + >>> store = zarr.v2.ZipStore('data/array.zip', mode='w') + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store) >>> z[...] = 42 >>> store.close() # don't forget to call this when you're done Store a group:: - >>> store = zarr.ZipStore('data/group.zip', mode='w') - >>> root = zarr.group(store=store) + >>> store = zarr.v2.ZipStore('data/group.zip', mode='w') + >>> root = zarr.v2.group(store=store) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 @@ -1555,8 +1555,8 @@ class ZipStore(Store): class also supports the context manager protocol, which ensures the ``close()`` method is called on leaving the context, e.g.:: - >>> with zarr.ZipStore('data/array.zip', mode='w') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store) + >>> with zarr.v2.ZipStore('data/array.zip', mode='w') as store: + ... z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store) ... z[...] = 42 ... # no need to call store.close() @@ -1569,8 +1569,8 @@ class also supports the context manager protocol, which ensures the ``close()`` triggered if you attempt to write data to a Zarr array more than once, e.g.:: - >>> store = zarr.ZipStore('data/example.zip', mode='w') - >>> z = zarr.zeros(100, chunks=10, store=store) + >>> store = zarr.v2.ZipStore('data/example.zip', mode='w') + >>> z = zarr.v2.zeros(100, chunks=10, store=store) >>> # first write OK ... z[...] = 42 >>> # second write generates warnings @@ -1581,22 +1581,22 @@ class also supports the context manager protocol, which ensures the ``close()`` once to a Zarr array, but the write operations are not aligned with chunk boundaries, e.g.:: - >>> store = zarr.ZipStore('data/example.zip', mode='w') - >>> z = zarr.zeros(100, chunks=10, store=store) + >>> store = zarr.v2.ZipStore('data/example.zip', mode='w') + >>> z = zarr.v2.zeros(100, chunks=10, store=store) >>> z[5:15] = 42 >>> # write overlaps chunk previously written, generates warnings ... z[15:25] = 42 # doctest: +SKIP To avoid creating duplicate entries, only write data once, and align writes with chunk boundaries. This alignment is done automatically if you call - ``z[...] = ...`` or create an array from existing data via :func:`zarr.array`. + ``z[...] = ...`` or create an array from existing data via :func:`zarr.v2.array`. Alternatively, use a :class:`DirectoryStore` when writing the data, then manually Zip the directory and use the Zip file for subsequent reads. Take note that the files in the Zip file must be relative to the root of the Zarr archive. You may find it easier to create such a Zip file with ``7z``, e.g.:: - 7z a -tzip archive.zarr.zip archive.zarr/. + 7z a -tzip archive.zarr.v2.zip archive.zarr/. Safe to write in multiple threads but not in multiple processes. @@ -1841,15 +1841,15 @@ class DBMStore(Store): Store a single array:: >>> import zarr - >>> store = zarr.DBMStore('data/array.db') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> store = zarr.v2.DBMStore('data/array.db') + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 >>> store.close() # don't forget to call this when you're done Store a group:: - >>> store = zarr.DBMStore('data/group.db') - >>> root = zarr.group(store=store, overwrite=True) + >>> store = zarr.v2.DBMStore('data/group.db') + >>> root = zarr.v2.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 @@ -1860,8 +1860,8 @@ class DBMStore(Store): DBMStore class also supports the context manager protocol, which ensures the ``close()`` method is called on leaving the context, e.g.:: - >>> with zarr.DBMStore('data/array.db') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> with zarr.v2.DBMStore('data/array.db') as store: + ... z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) ... z[...] = 42 ... # no need to call store.close() @@ -1871,8 +1871,8 @@ class DBMStore(Store): Berkeley DB database can be used:: >>> import bsddb3 - >>> store = zarr.DBMStore('data/array.bdb', open=bsddb3.btopen) - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> store = zarr.v2.DBMStore('data/array.bdb', open=bsddb3.btopen) + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 >>> store.close() @@ -2040,15 +2040,15 @@ class LMDBStore(Store): Store a single array:: >>> import zarr - >>> store = zarr.LMDBStore('data/array.mdb') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> store = zarr.v2.LMDBStore('data/array.mdb') + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 >>> store.close() # don't forget to call this when you're done Store a group:: - >>> store = zarr.LMDBStore('data/group.mdb') - >>> root = zarr.group(store=store, overwrite=True) + >>> store = zarr.v2.LMDBStore('data/group.mdb') + >>> root = zarr.v2.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 @@ -2059,8 +2059,8 @@ class LMDBStore(Store): DBMStore class also supports the context manager protocol, which ensures the ``close()`` method is called on leaving the context, e.g.:: - >>> with zarr.LMDBStore('data/array.mdb') as store: - ... z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> with zarr.v2.LMDBStore('data/array.mdb') as store: + ... z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) ... z[...] = 42 ... # no need to call store.close() @@ -2216,8 +2216,8 @@ class LRUStoreCache(Store): >>> import zarr >>> s3 = s3fs.S3FileSystem(anon=True, client_kwargs=dict(region_name='eu-west-2')) >>> store = s3fs.S3Map(root='zarr-demo/store', s3=s3, check=False) - >>> cache = zarr.LRUStoreCache(store, max_size=2**28) - >>> root = zarr.group(store=cache) # doctest: +REMOTE_DATA + >>> cache = zarr.v2.LRUStoreCache(store, max_size=2**28) + >>> root = zarr.v2.group(store=cache) # doctest: +REMOTE_DATA >>> z = root['foo/bar/baz'] # doctest: +REMOTE_DATA >>> from timeit import timeit >>> # first data access is relatively slow, retrieved from store @@ -2410,15 +2410,15 @@ class SQLiteStore(Store): Store a single array:: >>> import zarr - >>> store = zarr.SQLiteStore('data/array.sqldb') - >>> z = zarr.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) + >>> store = zarr.v2.SQLiteStore('data/array.sqldb') + >>> z = zarr.v2.zeros((10, 10), chunks=(5, 5), store=store, overwrite=True) >>> z[...] = 42 >>> store.close() # don't forget to call this when you're done Store a group:: - >>> store = zarr.SQLiteStore('data/group.sqldb') - >>> root = zarr.group(store=store, overwrite=True) + >>> store = zarr.v2.SQLiteStore('data/group.sqldb') + >>> root = zarr.v2.group(store=store, overwrite=True) >>> foo = root.create_group('foo') >>> bar = foo.zeros('bar', shape=(10, 10), chunks=(5, 5)) >>> bar[...] = 42 @@ -2751,7 +2751,7 @@ class ConsolidatedMetadataStore(Store): The purpose of this class, is to be able to get all of the metadata for a given array in a single read operation from the underlying storage. - See :func:`zarr.convenience.consolidate_metadata` for how to create this + See :func:`zarr.v2.convenience.consolidate_metadata` for how to create this single metadata key. This class loads from the one key, and stores the data in a dict, so that @@ -2760,7 +2760,7 @@ class ConsolidatedMetadataStore(Store): This class is read-only, and attempts to change the array metadata will fail, but changing the data is possible. If the backend storage is changed directly, then the metadata stored here could become obsolete, and - :func:`zarr.convenience.consolidate_metadata` should be called again and the class + :func:`zarr.v2.convenience.consolidate_metadata` should be called again and the class re-invoked. The use case is for write once, read many times. .. versionadded:: 2.3 @@ -2777,7 +2777,7 @@ class ConsolidatedMetadataStore(Store): See Also -------- - zarr.convenience.consolidate_metadata, zarr.convenience.open_consolidated + zarr.v2.convenience.consolidate_metadata, zarr.v2.convenience.open_consolidated """ diff --git a/src/zarr/v2/sync.py b/src/zarr/v2/sync.py new file mode 100644 index 0000000000..49684a51ee --- /dev/null +++ b/src/zarr/v2/sync.py @@ -0,0 +1,48 @@ +import os +from collections import defaultdict +from threading import Lock + +import fasteners + + +class ThreadSynchronizer: + """Provides synchronization using thread locks.""" + + def __init__(self): + self.mutex = Lock() + self.locks = defaultdict(Lock) + + def __getitem__(self, item): + with self.mutex: + return self.locks[item] + + def __getstate__(self): + return True + + def __setstate__(self, *args): + # reinitialize from scratch + self.__init__() + + +class ProcessSynchronizer: + """Provides synchronization using file locks via the + `fasteners `_ + package. + + Parameters + ---------- + path : string + Path to a directory on a file system that is shared by all processes. + N.B., this should be a *different* path to where you store the array. + + """ + + def __init__(self, path): + self.path = path + + def __getitem__(self, item): + path = os.path.join(self.path, item) + lock = fasteners.InterProcessLock(path) + return lock + + # pickling and unpickling should be handled automatically diff --git a/src/zarr/util.py b/src/zarr/v2/util.py similarity index 100% rename from src/zarr/util.py rename to src/zarr/v2/util.py diff --git a/src/zarr/v3/__init__.py b/src/zarr/v3/__init__.py deleted file mode 100644 index 3441fa67be..0000000000 --- a/src/zarr/v3/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import annotations - -from typing import Union - -import zarr.v3.codecs # noqa: F401 -from zarr.v3.array import Array, AsyncArray # noqa: F401 -from zarr.v3.array_v2 import ArrayV2 -from zarr.v3.config import RuntimeConfiguration # noqa: F401 -from zarr.v3.group import AsyncGroup, Group # noqa: F401 -from zarr.v3.metadata import runtime_configuration # noqa: F401 -from zarr.v3.store import ( # noqa: F401 - StoreLike, - make_store_path, -) -from zarr.v3.sync import sync as _sync - - -async def open_auto_async( - store: StoreLike, - runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(), -) -> Union[AsyncArray, AsyncGroup]: - store_path = make_store_path(store) - try: - return await AsyncArray.open(store_path, runtime_configuration=runtime_configuration_) - except KeyError: - return await AsyncGroup.open(store_path, runtime_configuration=runtime_configuration_) - - -def open_auto( - store: StoreLike, - runtime_configuration_: RuntimeConfiguration = RuntimeConfiguration(), -) -> Union[Array, ArrayV2, Group]: - object = _sync( - open_auto_async(store, runtime_configuration_), - runtime_configuration_.asyncio_loop, - ) - if isinstance(object, AsyncArray): - return Array(object) - if isinstance(object, AsyncGroup): - return Group(object) - raise TypeError(f"Unexpected object type. Got {type(object)}.") diff --git a/src/zarr/v3/codecs/__init__.py b/src/zarr/v3/codecs/__init__.py deleted file mode 100644 index 474344ec25..0000000000 --- a/src/zarr/v3/codecs/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from __future__ import annotations - -from zarr.v3.codecs.blosc import BloscCodec, BloscCname, BloscShuffle # noqa: F401 -from zarr.v3.codecs.bytes import BytesCodec, Endian # noqa: F401 -from zarr.v3.codecs.crc32c_ import Crc32cCodec # noqa: F401 -from zarr.v3.codecs.gzip import GzipCodec # noqa: F401 -from zarr.v3.codecs.sharding import ShardingCodec, ShardingCodecIndexLocation # noqa: F401 -from zarr.v3.codecs.transpose import TransposeCodec # noqa: F401 -from zarr.v3.codecs.zstd import ZstdCodec # noqa: F401 diff --git a/src/zarr/v3/indexing.py b/src/zarr/v3/indexing.py deleted file mode 100644 index 15adad111d..0000000000 --- a/src/zarr/v3/indexing.py +++ /dev/null @@ -1,208 +0,0 @@ -from __future__ import annotations - -import itertools -import math -from typing import Iterator, List, NamedTuple, Optional, Tuple - -from zarr.v3.common import ChunkCoords, Selection, SliceSelection, product - - -def _ensure_tuple(v: Selection) -> SliceSelection: - if not isinstance(v, tuple): - v = (v,) - return v - - -def _err_too_many_indices(selection: SliceSelection, shape: ChunkCoords): - raise IndexError( - "too many indices for array; expected {}, got {}".format(len(shape), len(selection)) - ) - - -def _err_negative_step(): - raise IndexError("only slices with step >= 1 are supported") - - -def _check_selection_length(selection: SliceSelection, shape: ChunkCoords): - if len(selection) > len(shape): - _err_too_many_indices(selection, shape) - - -def _ensure_selection( - selection: Selection, - shape: ChunkCoords, -) -> SliceSelection: - selection = _ensure_tuple(selection) - - # fill out selection if not completely specified - if len(selection) < len(shape): - selection += (slice(None),) * (len(shape) - len(selection)) - - # check selection not too long - _check_selection_length(selection, shape) - - return selection - - -class _ChunkDimProjection(NamedTuple): - dim_chunk_ix: int - dim_chunk_sel: slice - dim_out_sel: Optional[slice] - - -def _ceildiv(a, b): - return math.ceil(a / b) - - -class _SliceDimIndexer: - dim_sel: slice - dim_len: int - dim_chunk_len: int - nitems: int - - start: int - stop: int - step: int - - def __init__(self, dim_sel: slice, dim_len: int, dim_chunk_len: int): - self.start, self.stop, self.step = dim_sel.indices(dim_len) - if self.step < 1: - _err_negative_step() - - self.dim_len = dim_len - self.dim_chunk_len = dim_chunk_len - self.nitems = max(0, _ceildiv((self.stop - self.start), self.step)) - self.nchunks = _ceildiv(self.dim_len, self.dim_chunk_len) - - def __iter__(self) -> Iterator[_ChunkDimProjection]: - # figure out the range of chunks we need to visit - dim_chunk_ix_from = self.start // self.dim_chunk_len - dim_chunk_ix_to = _ceildiv(self.stop, self.dim_chunk_len) - - # iterate over chunks in range - for dim_chunk_ix in range(dim_chunk_ix_from, dim_chunk_ix_to): - # compute offsets for chunk within overall array - dim_offset = dim_chunk_ix * self.dim_chunk_len - dim_limit = min(self.dim_len, (dim_chunk_ix + 1) * self.dim_chunk_len) - - # determine chunk length, accounting for trailing chunk - dim_chunk_len = dim_limit - dim_offset - - if self.start < dim_offset: - # selection starts before current chunk - dim_chunk_sel_start = 0 - remainder = (dim_offset - self.start) % self.step - if remainder: - dim_chunk_sel_start += self.step - remainder - # compute number of previous items, provides offset into output array - dim_out_offset = _ceildiv((dim_offset - self.start), self.step) - - else: - # selection starts within current chunk - dim_chunk_sel_start = self.start - dim_offset - dim_out_offset = 0 - - if self.stop > dim_limit: - # selection ends after current chunk - dim_chunk_sel_stop = dim_chunk_len - - else: - # selection ends within current chunk - dim_chunk_sel_stop = self.stop - dim_offset - - dim_chunk_sel = slice(dim_chunk_sel_start, dim_chunk_sel_stop, self.step) - dim_chunk_nitems = _ceildiv((dim_chunk_sel_stop - dim_chunk_sel_start), self.step) - dim_out_sel = slice(dim_out_offset, dim_out_offset + dim_chunk_nitems) - - yield _ChunkDimProjection(dim_chunk_ix, dim_chunk_sel, dim_out_sel) - - -class _ChunkProjection(NamedTuple): - chunk_coords: ChunkCoords - chunk_selection: SliceSelection - out_selection: SliceSelection - - -class BasicIndexer: - dim_indexers: List[_SliceDimIndexer] - shape: ChunkCoords - - def __init__( - self, - selection: Selection, - shape: Tuple[int, ...], - chunk_shape: Tuple[int, ...], - ): - # setup per-dimension indexers - self.dim_indexers = [ - _SliceDimIndexer(dim_sel, dim_len, dim_chunk_len) - for dim_sel, dim_len, dim_chunk_len in zip( - _ensure_selection(selection, shape), shape, chunk_shape - ) - ] - self.shape = tuple(s.nitems for s in self.dim_indexers) - - def __iter__(self) -> Iterator[_ChunkProjection]: - for dim_projections in itertools.product(*self.dim_indexers): - chunk_coords = tuple(p.dim_chunk_ix for p in dim_projections) - chunk_selection = tuple(p.dim_chunk_sel for p in dim_projections) - out_selection = tuple( - p.dim_out_sel for p in dim_projections if p.dim_out_sel is not None - ) - - yield _ChunkProjection(chunk_coords, chunk_selection, out_selection) - - -def morton_order_iter(chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: - def decode_morton(z: int, chunk_shape: ChunkCoords) -> ChunkCoords: - # Inspired by compressed morton code as implemented in Neuroglancer - # https://github.com/google/neuroglancer/blob/master/src/neuroglancer/datasource/precomputed/volume.md#compressed-morton-code - bits = tuple(math.ceil(math.log2(c)) for c in chunk_shape) - max_coords_bits = max(*bits) - input_bit = 0 - input_value = z - out = [0 for _ in range(len(chunk_shape))] - - for coord_bit in range(max_coords_bits): - for dim in range(len(chunk_shape)): - if coord_bit < bits[dim]: - bit = (input_value >> input_bit) & 1 - out[dim] |= bit << coord_bit - input_bit += 1 - return tuple(out) - - for i in range(product(chunk_shape)): - yield decode_morton(i, chunk_shape) - - -def c_order_iter(chunks_per_shard: ChunkCoords) -> Iterator[ChunkCoords]: - return itertools.product(*(range(x) for x in chunks_per_shard)) - - -def is_total_slice(item: Selection, shape: ChunkCoords): - """Determine whether `item` specifies a complete slice of array with the - given `shape`. Used to optimize __setitem__ operations on the Chunk - class.""" - - # N.B., assume shape is normalized - if item == slice(None): - return True - if isinstance(item, slice): - item = (item,) - if isinstance(item, tuple): - return all( - ( - isinstance(dim_sel, slice) - and ( - (dim_sel == slice(None)) - or ((dim_sel.stop - dim_sel.start == dim_len) and (dim_sel.step in [1, None])) - ) - ) - for dim_sel, dim_len in zip(item, shape) - ) - else: - raise TypeError("expected slice or tuple of slices, found %r" % item) - - -def all_chunk_coords(shape: ChunkCoords, chunk_shape: ChunkCoords) -> Iterator[ChunkCoords]: - return itertools.product(*(range(0, _ceildiv(s, c)) for s, c in zip(shape, chunk_shape))) diff --git a/src/zarr/v3/store/__init__.py b/src/zarr/v3/store/__init__.py deleted file mode 100644 index 2268381d2a..0000000000 --- a/src/zarr/v3/store/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -# flake8: noqa -from zarr.v3.store.core import StorePath, StoreLike, make_store_path -from zarr.v3.store.remote import RemoteStore -from zarr.v3.store.local import LocalStore -from zarr.v3.store.memory import MemoryStore diff --git a/src/zarr/v3/sync.py b/src/zarr/v3/sync.py deleted file mode 100644 index 41dfeadba9..0000000000 --- a/src/zarr/v3/sync.py +++ /dev/null @@ -1,120 +0,0 @@ -from __future__ import annotations - -import asyncio -import threading -from typing import ( - Any, - AsyncIterator, - Coroutine, - List, - Optional, - TypeVar, -) -from typing_extensions import ParamSpec - -from zarr.v3.config import SyncConfiguration - - -# From https://github.com/fsspec/filesystem_spec/blob/master/fsspec/asyn.py - -iothread: List[Optional[threading.Thread]] = [None] # dedicated IO thread -loop: List[Optional[asyncio.AbstractEventLoop]] = [ - None -] # global event loop for any non-async instance -_lock: Optional[threading.Lock] = None # global lock placeholder -get_running_loop = asyncio.get_running_loop - - -def _get_lock() -> threading.Lock: - """Allocate or return a threading lock. - - The lock is allocated on first use to allow setting one lock per forked process. - """ - global _lock - if not _lock: - _lock = threading.Lock() - return _lock - - -async def _runner(event: threading.Event, coro: Coroutine, result_box: List[Optional[Any]]): - try: - result_box[0] = await coro - except Exception as ex: - result_box[0] = ex - finally: - event.set() - - -def sync(coro: Coroutine, loop: Optional[asyncio.AbstractEventLoop] = None): - """ - Make loop run coroutine until it returns. Runs in other thread - - Examples - -------- - >>> sync(async_function(), existing_loop) - """ - if loop is None: - # NB: if the loop is not running *yet*, it is OK to submit work - # and we will wait for it - loop = _get_loop() - if loop is None or loop.is_closed(): - raise RuntimeError("Loop is not running") - try: - loop0 = asyncio.events.get_running_loop() - if loop0 is loop: - raise NotImplementedError("Calling sync() from within a running loop") - except RuntimeError: - pass - result_box: List[Optional[Any]] = [None] - event = threading.Event() - asyncio.run_coroutine_threadsafe(_runner(event, coro, result_box), loop) - while True: - # this loops allows thread to get interrupted - if event.wait(1): - break - - return_result = result_box[0] - if isinstance(return_result, BaseException): - raise return_result - else: - return return_result - - -def _get_loop(): - """Create or return the default fsspec IO loop - - The loop will be running on a separate thread. - """ - if loop[0] is None: - with _get_lock(): - # repeat the check just in case the loop got filled between the - # previous two calls from another thread - if loop[0] is None: - new_loop = asyncio.new_event_loop() - loop[0] = new_loop - th = threading.Thread(target=new_loop.run_forever, name="zarrIO") - th.daemon = True - th.start() - iothread[0] = th - return loop[0] - - -P = ParamSpec("P") -T = TypeVar("T") - - -class SyncMixin: - _sync_configuration: SyncConfiguration - - def _sync(self, coroutine: Coroutine[Any, Any, T]) -> T: - # TODO: refactor this to to take *args and **kwargs and pass those to the method - # this should allow us to better type the sync wrapper - return sync(coroutine, loop=self._sync_configuration.asyncio_loop) - - def _sync_iter(self, coroutine: Coroutine[Any, Any, AsyncIterator[T]]) -> List[T]: - async def iter_to_list() -> List[T]: - # TODO: replace with generators so we don't materialize the entire iterator at once - async_iterator = await coroutine - return [item async for item in async_iterator] - - return self._sync(iter_to_list()) diff --git a/tests/__init__.py b/tests/v2/__init__.py similarity index 100% rename from tests/__init__.py rename to tests/v2/__init__.py diff --git a/tests/conftest.py b/tests/v2/conftest.py similarity index 60% rename from tests/conftest.py rename to tests/v2/conftest.py index aa73b8691e..a7a445c640 100644 --- a/tests/conftest.py +++ b/tests/v2/conftest.py @@ -6,3 +6,8 @@ @pytest.fixture(params=[str, pathlib.Path]) def path_type(request): return request.param + + +@pytest.fixture +def project_root(request): + return request.config.rootpath diff --git a/tests/data/store.zip b/tests/v2/data/store.zip similarity index 100% rename from tests/data/store.zip rename to tests/v2/data/store.zip diff --git a/tests/data/store/foo b/tests/v2/data/store/foo similarity index 100% rename from tests/data/store/foo rename to tests/v2/data/store/foo diff --git a/tests/test_attrs.py b/tests/v2/test_attrs.py similarity index 97% rename from tests/test_attrs.py rename to tests/v2/test_attrs.py index 2575163840..b477b8befe 100644 --- a/tests/test_attrs.py +++ b/tests/v2/test_attrs.py @@ -1,13 +1,11 @@ import json -import pathlib import pytest -import zarr -from zarr.attrs import Attributes -from zarr.storage import KVStore, DirectoryStore +from zarr.v2.attrs import Attributes +from zarr.v2.storage import KVStore, DirectoryStore from .util import CountingDict -from zarr.hierarchy import group +from zarr.v2.hierarchy import group def _init_store(): @@ -36,8 +34,7 @@ def test_storage(self): d = json.loads(str(store[attrs_key], "utf-8")) assert dict(foo="bar", baz=42) == d - def test_utf8_encoding(self): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent + def test_utf8_encoding(self, project_root): fixdir = project_root / "fixture" testdir = fixdir / "utf8attrs" if not testdir.exists(): # pragma: no cover diff --git a/tests/test_convenience.py b/tests/v2/test_convenience.py similarity index 99% rename from tests/test_convenience.py rename to tests/v2/test_convenience.py index d50533e847..f558a8800f 100644 --- a/tests/test_convenience.py +++ b/tests/v2/test_convenience.py @@ -8,8 +8,8 @@ from numcodecs import Adler32, Zlib from numpy.testing import assert_array_equal -import zarr -from zarr.convenience import ( +import zarr.v2 as zarr +from zarr.v2.convenience import ( consolidate_metadata, copy, copy_store, @@ -21,10 +21,10 @@ save_array, copy_all, ) -from zarr.core import Array -from zarr.errors import CopyError -from zarr.hierarchy import Group, group -from zarr.storage import ( +from zarr.v2.core import Array +from zarr.v2.errors import CopyError +from zarr.v2.hierarchy import Group, group +from zarr.v2.storage import ( ConsolidatedMetadataStore, FSStore, MemoryStore, diff --git a/tests/test_core.py b/tests/v2/test_core.py similarity index 99% rename from tests/test_core.py rename to tests/v2/test_core.py index 6303371793..197461d129 100644 --- a/tests/test_core.py +++ b/tests/v2/test_core.py @@ -30,15 +30,15 @@ from numcodecs.tests.common import greetings from numpy.testing import assert_array_almost_equal, assert_array_equal -import zarr -from zarr._storage.store import ( +import zarr.v2 +from zarr.v2._storage.store import ( BaseStore, ) -from zarr.core import Array -from zarr.meta import json_loads -from zarr.n5 import N5Store, N5FSStore, n5_keywords -from zarr.storage import ( +from zarr.v2.core import Array +from zarr.v2.meta import json_loads +from zarr.v2.n5 import N5Store, N5FSStore, n5_keywords +from zarr.v2.storage import ( ABSStore, DBMStore, DirectoryStore, @@ -55,7 +55,7 @@ normalize_store_arg, ) -from zarr.util import buffer_size +from zarr.v2.util import buffer_size from .util import abs_container, skip_test_env_var, have_fsspec, mktemp # noinspection PyMethodMayBeStatic @@ -721,7 +721,7 @@ def test_resize_2d(self): # checks that resizing preserves metadata if self.dimension_separator == "/": - z_ = zarr.open(z.store) + z_ = zarr.v2.open(z.store) if hasattr(z_, "dimension_separator"): assert z_.dimension_separator == self.dimension_separator z_.store.close() @@ -2495,7 +2495,7 @@ def test_issue_1279(tmpdir): """See """ data = np.arange(25).reshape((5, 5)) - ds = zarr.create( + ds = zarr.v2.create( shape=data.shape, chunks=(5, 5), dtype=data.dtype, @@ -2506,7 +2506,7 @@ def test_issue_1279(tmpdir): ds[:] = data - ds_reopened = zarr.open_array(store=FSStore(url=str(tmpdir), mode="r")) + ds_reopened = zarr.v2.open_array(store=FSStore(url=str(tmpdir), mode="r")) written_data = ds_reopened[:] assert_array_equal(data, written_data) diff --git a/tests/test_creation.py b/tests/v2/test_creation.py similarity index 98% rename from tests/test_creation.py rename to tests/v2/test_creation.py index 369d755700..08073a8ac3 100644 --- a/tests/test_creation.py +++ b/tests/v2/test_creation.py @@ -7,9 +7,9 @@ import pytest from numpy.testing import assert_array_equal -from zarr.codecs import Zlib -from zarr.core import Array -from zarr.creation import ( +from zarr.v2.codecs import Zlib +from zarr.v2.core import Array +from zarr.v2.creation import ( array, create, empty, @@ -23,10 +23,10 @@ zeros, zeros_like, ) -from zarr.hierarchy import open_group -from zarr.n5 import N5Store -from zarr.storage import DirectoryStore, KVStore -from zarr.sync import ThreadSynchronizer +from zarr.v2.hierarchy import open_group +from zarr.v2.n5 import N5Store +from zarr.v2.storage import DirectoryStore, KVStore +from zarr.v2.sync import ThreadSynchronizer from .util import mktemp, have_fsspec diff --git a/tests/test_dim_separator.py b/tests/v2/test_dim_separator.py similarity index 87% rename from tests/test_dim_separator.py rename to tests/v2/test_dim_separator.py index 4276d1829d..b0e9d0ecc8 100644 --- a/tests/test_dim_separator.py +++ b/tests/v2/test_dim_separator.py @@ -4,9 +4,9 @@ from numpy.testing import assert_array_equal from functools import partial -import zarr -from zarr.core import Array -from zarr.storage import DirectoryStore, NestedDirectoryStore, FSStore +import zarr.v2 +from zarr.v2.core import Array +from zarr.v2.storage import DirectoryStore, NestedDirectoryStore, FSStore from .util import have_fsspec @@ -41,7 +41,7 @@ def dataset(tmpdir, request): kwargs = {} if which.startswith("static"): - project_root = pathlib.Path(zarr.__file__).resolve().parent.parent + project_root = pathlib.Path(zarr.v2.__file__).resolve().parent.parent suffix = which[len("static_") :] static = project_root / "fixture" / suffix @@ -59,7 +59,7 @@ def dataset(tmpdir, request): # store the data - should be one-time operation s = generator(str(static)) - a = zarr.open(store=s, mode="w", shape=(2, 2), dtype=" CuPyCPUCompressor: if compressor: - compressor = getattr(zarr.codecs, compressor)() + compressor = getattr(zarr.v2.codecs, compressor)() return CuPyCPUCompressor(compressor) diff --git a/tests/test_n5.py b/tests/v2/test_n5.py similarity index 93% rename from tests/test_n5.py rename to tests/v2/test_n5.py index 755d60b607..238e9b2c6e 100644 --- a/tests/test_n5.py +++ b/tests/v2/test_n5.py @@ -1,8 +1,8 @@ import pytest -from zarr.n5 import N5ChunkWrapper, N5FSStore -from zarr.creation import create -from zarr.storage import atexit_rmtree +from zarr.v2.n5 import N5ChunkWrapper, N5FSStore +from zarr.v2.creation import create +from zarr.v2.storage import atexit_rmtree from numcodecs import GZip import numpy as np from typing import Tuple diff --git a/tests/test_storage.py b/tests/v2/test_storage.py similarity index 99% rename from tests/test_storage.py rename to tests/v2/test_storage.py index 5d82b879ad..b6877aa713 100644 --- a/tests/test_storage.py +++ b/tests/v2/test_storage.py @@ -31,7 +31,7 @@ # from zarr.meta import ZARR_FORMAT, decode_array_metadata # from zarr.n5 import N5Store, N5FSStore, N5_FORMAT, n5_attrs_key -from zarr.storage import ( +from zarr.v2.storage import ( # ABSStore, # ConsolidatedMetadataStore, # DBMStore, @@ -68,8 +68,8 @@ # from zarr.tests.util import CountingDict, have_fsspec, skip_test_env_var, abs_container, mktemp # from zarr.util import ConstantMap, json_dumps -from zarr.v3.abc.store import Store -from zarr.v3.store import MemoryStore as KVStore, LocalStore +from zarr.abc.store import Store +from zarr.store import MemoryStore as KVStore, LocalStore # @contextmanager diff --git a/tests/test_storage_v3.py b/tests/v2/test_storage_v3.py similarity index 100% rename from tests/test_storage_v3.py rename to tests/v2/test_storage_v3.py diff --git a/tests/test_sync.py b/tests/v2/test_sync.py similarity index 97% rename from tests/test_sync.py rename to tests/v2/test_sync.py index 9d805ee2c1..8bf1304dc2 100644 --- a/tests/test_sync.py +++ b/tests/v2/test_sync.py @@ -9,11 +9,11 @@ import numpy as np from numpy.testing import assert_array_equal -from zarr.attrs import Attributes -from zarr.core import Array -from zarr.hierarchy import Group -from zarr.storage import DirectoryStore, KVStore, atexit_rmtree, init_array, init_group -from zarr.sync import ProcessSynchronizer, ThreadSynchronizer +from zarr.v2.attrs import Attributes +from zarr.v2.core import Array +from zarr.v2.hierarchy import Group +from zarr.v2.storage import DirectoryStore, KVStore, atexit_rmtree, init_array, init_group +from zarr.v2.sync import ProcessSynchronizer, ThreadSynchronizer # zarr_version fixture must be imported although not used directly here from .test_attrs import TestAttributes # noqa diff --git a/tests/test_util.py b/tests/v2/test_util.py similarity index 99% rename from tests/test_util.py rename to tests/v2/test_util.py index 1f7efc9214..35c355693a 100644 --- a/tests/test_util.py +++ b/tests/v2/test_util.py @@ -4,8 +4,8 @@ import numpy as np import pytest -from zarr.core import Array -from zarr.util import ( +from zarr.v2.core import Array +from zarr.v2.util import ( ConstantMap, all_equal, flatten, diff --git a/tests/util.py b/tests/v2/util.py similarity index 97% rename from tests/util.py rename to tests/v2/util.py index 8e53bf3b63..12c5e379f6 100644 --- a/tests/util.py +++ b/tests/v2/util.py @@ -2,9 +2,9 @@ import os import tempfile from typing import Any, Mapping, Sequence -from zarr.context import Context +from zarr.v2.context import Context -from zarr.storage import Store +from zarr.v2.storage import Store import pytest diff --git a/tests/v3/test_codecs.py b/tests/v3/test_codecs.py index 333c2094bf..ffd225668b 100644 --- a/tests/v3/test_codecs.py +++ b/tests/v3/test_codecs.py @@ -7,12 +7,12 @@ import numpy as np import pytest -import zarr -from zarr.v3.abc.codec import Codec -from zarr.v3.array import Array, AsyncArray -from zarr.v3.common import Selection -from zarr.v3.indexing import morton_order_iter -from zarr.v3.codecs import ( +import zarr.v2 +from zarr.abc.codec import Codec +from zarr.array import Array, AsyncArray +from zarr.common import Selection +from zarr.indexing import morton_order_iter +from zarr.codecs import ( ShardingCodec, ShardingCodecIndexLocation, BloscCodec, @@ -21,10 +21,10 @@ TransposeCodec, ZstdCodec, ) -from zarr.v3.metadata import runtime_configuration +from zarr.metadata import runtime_configuration -from zarr.v3.abc.store import Store -from zarr.v3.store import MemoryStore, StorePath +from zarr.abc.store import Store +from zarr.store import MemoryStore, StorePath @dataclass(frozen=True) @@ -286,7 +286,7 @@ async def test_order( if not with_sharding: # Compare with zarr-python - z = zarr.create( + z = zarr.v2.create( shape=data.shape, chunks=(32, 8), dtype="