From afdbb75a2898bff58f9bbc4613aed1e397b9169e Mon Sep 17 00:00:00 2001 From: David Stansby Date: Sat, 19 Oct 2024 16:44:11 +0100 Subject: [PATCH 01/12] Add some more numpydoc rules/associated fixes (#2399) * Add GL09 validation * Add SS02 rule * Add SS04 * Enable PR02 rule --- pyproject.toml | 13 +++- src/zarr/abc/codec.py | 20 +++--- src/zarr/abc/store.py | 2 +- src/zarr/api/asynchronous.py | 24 ++++---- src/zarr/core/array.py | 42 +++++-------- src/zarr/core/buffer/core.py | 2 +- src/zarr/core/buffer/cpu.py | 2 +- src/zarr/core/buffer/gpu.py | 2 +- src/zarr/core/chunk_grids.py | 10 +-- src/zarr/core/group.py | 114 ++++++++++++++++------------------- src/zarr/core/indexing.py | 12 ++-- src/zarr/core/metadata/v2.py | 5 +- src/zarr/core/metadata/v3.py | 4 +- src/zarr/storage/common.py | 20 +++--- src/zarr/storage/logging.py | 8 +-- src/zarr/storage/memory.py | 4 +- tests/test_api.py | 2 +- 17 files changed, 137 insertions(+), 149 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 574b09b076..80cb71ff83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -329,4 +329,15 @@ ignore = [ [tool.numpydoc_validation] # See https://numpydoc.readthedocs.io/en/latest/validation.html#built-in-validation-checks for list of checks -checks = ["GL06", "GL07", "GL10", "PR03", "PR05", "PR06"] +checks = [ + "GL06", + "GL07", + "GL09", + "GL10", + "SS02", + "SS04", + "PR02", + "PR03", + "PR05", + "PR06", +] diff --git a/src/zarr/abc/codec.py b/src/zarr/abc/codec.py index 3548874409..f27152e84c 100644 --- a/src/zarr/abc/codec.py +++ b/src/zarr/abc/codec.py @@ -85,7 +85,7 @@ def evolve_from_array_spec(self, array_spec: ArraySpec) -> Self: Parameters ---------- - chunk_spec : ArraySpec + array_spec : ArraySpec Returns ------- @@ -99,11 +99,11 @@ def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: Chun Parameters ---------- - shape: ChunkCoords + shape : ChunkCoords The array shape - dtype: np.dtype[Any] + dtype : np.dtype[Any] The array data type - chunk_grid: ChunkGrid + chunk_grid : ChunkGrid The array chunk grid """ ... @@ -292,11 +292,11 @@ def validate(self, *, shape: ChunkCoords, dtype: np.dtype[Any], chunk_grid: Chun Parameters ---------- - shape: ChunkCoords + shape : ChunkCoords The array shape - dtype: np.dtype[Any] + dtype : np.dtype[Any] The array data type - chunk_grid: ChunkGrid + chunk_grid : ChunkGrid The array chunk grid """ ... @@ -308,7 +308,7 @@ def compute_encoded_size(self, byte_length: int, array_spec: ArraySpec) -> int: Parameters ---------- - input_byte_length : int + byte_length : int array_spec : ArraySpec Returns @@ -327,7 +327,7 @@ async def decode( Parameters ---------- - chunks_and_specs : Iterable[tuple[Buffer | None, ArraySpec]] + chunk_bytes_and_specs : Iterable[tuple[Buffer | None, ArraySpec]] Ordered set of encoded chunks with their accompanying chunk spec. Returns @@ -346,7 +346,7 @@ async def encode( Parameters ---------- - chunks_and_specs : Iterable[tuple[NDBuffer | None, ArraySpec]] + chunk_arrays_and_specs : Iterable[tuple[NDBuffer | None, ArraySpec]] Ordered set of to-be-encoded chunks with their accompanying chunk spec. Returns diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 40c8129afe..3e233e8a1d 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -162,7 +162,7 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: Parameters ---------- - mode: AccessModeLiteral + mode : AccessModeLiteral The new mode to use. Returns diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index e500562c4c..2c423ff59b 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -68,7 +68,7 @@ def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoords | None]: - """helper function to get the shape and chunks from an array-like object""" + """Helper function to get the shape and chunks from an array-like object""" shape = None chunks = None @@ -86,7 +86,7 @@ def _get_shape_chunks(a: ArrayLike | Any) -> tuple[ChunkCoords | None, ChunkCoor def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]: - """set default values for shape and chunks if they are not present in the array-like object""" + """Set default values for shape and chunks if they are not present in the array-like object""" new = kwargs.copy() @@ -121,7 +121,7 @@ def _like_args(a: ArrayLike, kwargs: dict[str, Any]) -> dict[str, Any]: def _handle_zarr_version_or_format( *, zarr_version: ZarrFormat | None, zarr_format: ZarrFormat | None ) -> ZarrFormat | None: - """handle the deprecated zarr_version kwarg and return zarr_format""" + """Handle the deprecated zarr_version kwarg and return zarr_format""" if zarr_format is not None and zarr_version is not None and zarr_format != zarr_version: raise ValueError( f"zarr_format {zarr_format} does not match zarr_version {zarr_version}, please only set one" @@ -135,7 +135,7 @@ def _handle_zarr_version_or_format( def _default_zarr_version() -> ZarrFormat: - """return the default zarr_version""" + """Return the default zarr_version""" return cast(ZarrFormat, int(config.get("default_zarr_version", 3))) @@ -152,9 +152,9 @@ async def consolidate_metadata( Parameters ---------- - store: StoreLike + store : StoreLike The store-like object whose metadata you wish to consolidate. - path: str, optional + path : str, optional A path to a group in the store to consolidate at. Only children below that group will be consolidated. @@ -341,13 +341,13 @@ async def save( ---------- store : Store or str Store or path to directory in file system or name of zip file. - args : ndarray + *args : ndarray NumPy arrays with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. path : str or None, optional The path within the group where the arrays will be saved. - kwargs + **kwargs NumPy arrays with data to save. """ zarr_format = _handle_zarr_version_or_format(zarr_version=zarr_version, zarr_format=zarr_format) @@ -386,7 +386,7 @@ async def save_array( storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. - kwargs + **kwargs Passed through to :func:`create`, e.g., compressor. """ zarr_format = ( @@ -423,7 +423,7 @@ async def save_group( ---------- store : Store or str Store or path to directory in file system or name of zip file. - args : ndarray + *args : ndarray NumPy arrays with data to save. zarr_format : {2, 3, None}, optional The zarr format to use when saving. @@ -432,7 +432,7 @@ async def save_group( storage_options : dict If using an fsspec URL to create the store, these will be passed to the backend implementation. Ignored otherwise. - kwargs + **kwargs NumPy arrays with data to save. """ zarr_format = ( @@ -479,7 +479,7 @@ async def array( ---------- data : array_like The data to fill the array with. - kwargs + **kwargs Passed through to :func:`create`. Returns diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index da477056ee..0418d1dc52 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -184,8 +184,6 @@ class AsyncArray(Generic[T_ArrayMetadata]): The metadata of the array. store_path : StorePath The path to the Zarr store. - codec_pipeline : CodecPipeline, optional - The codec pipeline used for encoding and decoding chunks, by default None. order : {'C', 'F'}, optional The order of the array data in memory, by default None. @@ -857,9 +855,9 @@ def _iter_chunk_coords( Parameters ---------- - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields @@ -878,9 +876,9 @@ def _iter_chunk_keys( Parameters ---------- - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields @@ -901,9 +899,9 @@ def _iter_chunk_regions( Parameters ---------- - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields @@ -1151,17 +1149,7 @@ async def info(self) -> None: @dataclass(frozen=True) class Array: - """Instantiate an array from an initialized store. - - Parameters - ---------- - store : StoreLike - The array store that has already been initialized. - shape : ChunkCoords - The shape of the array. - dtype : npt.DTypeLike - The dtype of the array. - """ + """Instantiate an array from an initialized store.""" _async_array: AsyncArray[ArrayV3Metadata] | AsyncArray[ArrayV2Metadata] @@ -1419,9 +1407,9 @@ def _iter_chunk_coords( Parameters ---------- - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields @@ -1456,9 +1444,9 @@ def _iter_chunk_keys( Parameters ---------- - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields @@ -1478,9 +1466,9 @@ def _iter_chunk_regions( Parameters ---------- - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The origin of the selection relative to the array's chunk grid. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection in chunk grid coordinates. Yields @@ -2231,7 +2219,7 @@ def get_mask_selection( Parameters ---------- - selection : ndarray, bool + mask : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. out : NDBuffer, optional @@ -2314,7 +2302,7 @@ def set_mask_selection( Parameters ---------- - selection : ndarray, bool + mask : ndarray, bool A Boolean array of the same shape as the array against which the selection is being made. value : npt.ArrayLike diff --git a/src/zarr/core/buffer/core.py b/src/zarr/core/buffer/core.py index 1fbf58c618..9a07583c93 100644 --- a/src/zarr/core/buffer/core.py +++ b/src/zarr/core/buffer/core.py @@ -308,7 +308,7 @@ class NDBuffer: Parameters ---------- - ndarray_like + array : ndarray_like ndarray-like object that is convertible to a regular Numpy array. """ diff --git a/src/zarr/core/buffer/cpu.py b/src/zarr/core/buffer/cpu.py index 187e2d82dc..5019075496 100644 --- a/src/zarr/core/buffer/cpu.py +++ b/src/zarr/core/buffer/cpu.py @@ -138,7 +138,7 @@ class NDBuffer(core.NDBuffer): Parameters ---------- - ndarray_like + array ndarray-like object that is convertible to a regular Numpy array. """ diff --git a/src/zarr/core/buffer/gpu.py b/src/zarr/core/buffer/gpu.py index d5daba0e9a..6941c8897e 100644 --- a/src/zarr/core/buffer/gpu.py +++ b/src/zarr/core/buffer/gpu.py @@ -132,7 +132,7 @@ class NDBuffer(core.NDBuffer): Parameters ---------- - ndarray_like + array ndarray-like object that is convertible to a regular Numpy array. """ diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index 77734056b3..aace45d438 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -42,15 +42,15 @@ def _guess_chunks( Parameters ---------- - shape: ChunkCoords + shape : ChunkCoords The chunk shape. - typesize: int + typesize : int The size, in bytes, of each element of the chunk. - increment_bytes: int = 256 * 1024 + increment_bytes : int = 256 * 1024 The number of bytes used to increment or decrement the target chunk size in bytes. - min_bytes: int = 128 * 1024 + min_bytes : int = 128 * 1024 The soft lower bound on the final chunk size in bytes. - max_bytes: int = 64 * 1024 * 1024 + max_bytes : int = 64 * 1024 * 1024 The hard upper bound on the final chunk size in bytes. Returns diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index d797ed7370..ba68213574 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -425,9 +425,9 @@ async def open( Parameters ---------- - store: StoreLike - zarr_format: {2, 3}, optional - use_consolidated: bool or str, default None + store : StoreLike + zarr_format : {2, 3}, optional + use_consolidated : bool or str, default None Whether to use consolidated metadata. By default, consolidated metadata is used if it's present in the @@ -897,32 +897,32 @@ async def create_array( Parameters ---------- - name: str + name : str The name of the array. - shape: tuple[int, ...] + shape : tuple[int, ...] The shape of the array. - dtype: np.DtypeLike = float64 + dtype : np.DtypeLike = float64 The data type of the array. - chunk_shape: tuple[int, ...] | None = None + chunk_shape : tuple[int, ...] | None = None The shape of the chunks of the array. V3 only. - chunk_key_encoding: ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None + chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None A specification of how the chunk keys are represented in storage. - codecs: Iterable[Codec | dict[str, JSON]] | None = None + codecs : Iterable[Codec | dict[str, JSON]] | None = None An iterable of Codec or dict serializations thereof. The elements of this collection specify the transformation from array values to stored bytes. - dimension_names: Iterable[str] | None = None + dimension_names : Iterable[str] | None = None The names of the dimensions of the array. V3 only. - chunks: ChunkCoords | None = None + chunks : ChunkCoords | None = None The shape of the chunks of the array. V2 only. - dimension_separator: Literal[".", "/"] | None = None + dimension_separator : Literal[".", "/"] | None = None The delimiter used for the chunk keys. - order: Literal["C", "F"] | None = None + order : Literal["C", "F"] | None = None The memory order of the array. - filters: list[dict[str, JSON]] | None = None + filters : list[dict[str, JSON]] | None = None Filters for the array. - compressor: dict[str, JSON] | None = None + compressor : dict[str, JSON] | None = None The compressor for the array. - exists_ok: bool = False + exists_ok : bool = False If True, a pre-existing array or group at the path of this array will be overwritten. If False, the presence of a pre-existing array or group is an error. @@ -965,7 +965,7 @@ async def create_dataset( ---------- name : str Array name. - kwargs : dict + **kwargs : dict Additional arguments passed to :func:`zarr.AsyncGroup.create_array`. Returns @@ -1368,7 +1368,7 @@ def get(self, path: str, default: DefaultT | None = None) -> Array | Group | Def Parameters ---------- - key : str + path : str Group member name. default : object Default value to return if key is not found (default: None). @@ -1516,8 +1516,6 @@ def require_group(self, name: str, **kwargs: Any) -> Group: ---------- name : str Group name. - overwrite : bool, optional - Overwrite any existing group with given `name` if present. Returns ------- @@ -1567,36 +1565,36 @@ def create_array( Parameters ---------- - name: str + name : str The name of the array. - shape: tuple[int, ...] + shape : tuple[int, ...] The shape of the array. - dtype: np.DtypeLike = float64 + dtype : np.DtypeLike = float64 The data type of the array. - chunk_shape: tuple[int, ...] | None = None + chunk_shape : tuple[int, ...] | None = None The shape of the chunks of the array. V3 only. - chunk_key_encoding: ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None + chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None A specification of how the chunk keys are represented in storage. - codecs: Iterable[Codec | dict[str, JSON]] | None = None + codecs : Iterable[Codec | dict[str, JSON]] | None = None An iterable of Codec or dict serializations thereof. The elements of this collection specify the transformation from array values to stored bytes. - dimension_names: Iterable[str] | None = None + dimension_names : Iterable[str] | None = None The names of the dimensions of the array. V3 only. - chunks: ChunkCoords | None = None + chunks : ChunkCoords | None = None The shape of the chunks of the array. V2 only. - dimension_separator: Literal[".", "/"] | None = None + dimension_separator : Literal[".", "/"] | None = None The delimiter used for the chunk keys. - order: Literal["C", "F"] | None = None + order : Literal["C", "F"] | None = None The memory order of the array. - filters: list[dict[str, JSON]] | None = None + filters : list[dict[str, JSON]] | None = None Filters for the array. - compressor: dict[str, JSON] | None = None + compressor : dict[str, JSON] | None = None The compressor for the array. - exists_ok: bool = False + exists_ok : bool = False If True, a pre-existing array or group at the path of this array will be overwritten. If False, the presence of a pre-existing array or group is an error. - data: npt.ArrayLike | None = None + data : npt.ArrayLike | None = None Array data to initialize the array with. Returns @@ -1638,7 +1636,7 @@ def create_dataset(self, name: str, **kwargs: Any) -> Array: ---------- name : str Array name. - kwargs : dict + **kwargs : dict Additional arguments passed to :func:`zarr.Group.create_array` Returns @@ -1663,13 +1661,8 @@ def require_dataset(self, name: str, **kwargs: Any) -> Array: ---------- name : str Array name. - shape : int or tuple of ints - Array shape. - dtype : str or dtype, optional - NumPy dtype. - exact : bool, optional - If True, require `dtype` to match exactly. If false, require - `dtype` can be cast from array dtype. + **kwargs : + See :func:`zarr.Group.create_dataset`. Returns ------- @@ -1690,13 +1683,8 @@ def require_array(self, name: str, **kwargs: Any) -> Array: ---------- name : str Array name. - shape : int or tuple of ints - Array shape. - dtype : str or dtype, optional - NumPy dtype. - exact : bool, optional - If True, require `dtype` to match exactly. If false, require - `dtype` can be cast from array dtype. + **kwargs : + See :func:`zarr.Group.create_array`. Returns ------- @@ -1772,36 +1760,36 @@ def array( Parameters ---------- - name: str + name : str The name of the array. - shape: tuple[int, ...] + shape : tuple[int, ...] The shape of the array. - dtype: np.DtypeLike = float64 + dtype : np.DtypeLike = float64 The data type of the array. - chunk_shape: tuple[int, ...] | None = None + chunk_shape : tuple[int, ...] | None = None The shape of the chunks of the array. V3 only. - chunk_key_encoding: ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None + chunk_key_encoding : ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] | tuple[Literal["v2"], Literal[".", "/"]] | None = None A specification of how the chunk keys are represented in storage. - codecs: Iterable[Codec | dict[str, JSON]] | None = None + codecs : Iterable[Codec | dict[str, JSON]] | None = None An iterable of Codec or dict serializations thereof. The elements of this collection specify the transformation from array values to stored bytes. - dimension_names: Iterable[str] | None = None + dimension_names : Iterable[str] | None = None The names of the dimensions of the array. V3 only. - chunks: ChunkCoords | None = None + chunks : ChunkCoords | None = None The shape of the chunks of the array. V2 only. - dimension_separator: Literal[".", "/"] | None = None + dimension_separator : Literal[".", "/"] | None = None The delimiter used for the chunk keys. - order: Literal["C", "F"] | None = None + order : Literal["C", "F"] | None = None The memory order of the array. - filters: list[dict[str, JSON]] | None = None + filters : list[dict[str, JSON]] | None = None Filters for the array. - compressor: dict[str, JSON] | None = None + compressor : dict[str, JSON] | None = None The compressor for the array. - exists_ok: bool = False + exists_ok : bool = False If True, a pre-existing array or group at the path of this array will be overwritten. If False, the presence of a pre-existing array or group is an error. - data: npt.ArrayLike | None = None + data : npt.ArrayLike | None = None Array data to initialize the array with. Returns diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index d2e29b3b55..bffe5270d6 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -113,13 +113,13 @@ def _iter_grid( Parameters ---------- - grid_shape: Sequence[int] + grid_shape : Sequence[int] The size of the domain to iterate over. - origin: Sequence[int] | None, default=None + origin : Sequence[int] | None, default=None The first coordinate of the domain to return. - selection_shape: Sequence[int] | None, default=None + selection_shape : Sequence[int] | None, default=None The shape of the selection. - order: Literal["lexicographic"], default="lexicographic" + order : Literal["lexicographic"], default="lexicographic" The linear indexing order to use. Returns @@ -310,7 +310,7 @@ def normalize_integer_selection(dim_sel: int, dim_len: int) -> int: class ChunkDimProjection(NamedTuple): """A mapping from chunk to output array for a single dimension. - Parameters + Attributes ---------- dim_chunk_ix Index of chunk. @@ -482,7 +482,7 @@ class ChunkProjection(NamedTuple): chunk array for loading into an output array. Can also be used to extract items from a value array for setting/updating in a chunk array. - Parameters + Attributes ---------- chunk_coords Indices of chunk. diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index c5f34d2776..2e18336050 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -266,12 +266,13 @@ def parse_fill_value(fill_value: object, dtype: np.dtype[Any]) -> Any: Parameters ---------- - fill_value: Any + fill_value : Any A potential fill value. - dtype: np.dtype[Any] + dtype : np.dtype[Any] A numpy dtype. Returns + ------- An instance of `dtype`, or `None`, or any python object (in the case of an object dtype) """ diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 8aedd2b7b6..e9d2f92d8a 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -423,9 +423,9 @@ def parse_fill_value( Parameters ---------- - fill_value: Any + fill_value : Any A potential fill value. - dtype: str + dtype : str A valid Zarr V3 DataType. Returns diff --git a/src/zarr/storage/common.py b/src/zarr/storage/common.py index b640a7729b..9ed8c274d9 100644 --- a/src/zarr/storage/common.py +++ b/src/zarr/storage/common.py @@ -124,7 +124,7 @@ async def exists(self) -> bool: return await self.store.exists(self.path) def __truediv__(self, other: str) -> StorePath: - """combine this store path with another path""" + """Combine this store path with another path""" return self.__class__(self.store, _dereference_path(self.path, other)) def __str__(self) -> str: @@ -191,7 +191,7 @@ async def make_store_path( ---------- store_like : StoreLike | None The object to convert to a `StorePath` object. - path: str | None, optional + path : str | None, optional The path to use when creating the `StorePath` object. If None, the default path is the empty string. mode : AccessModeLiteral | None, optional @@ -286,9 +286,9 @@ async def ensure_no_existing_node(store_path: StorePath, zarr_format: ZarrFormat Parameters ---------- - store_path: StorePath + store_path : StorePath The storage location to check. - zarr_format: ZarrFormat + zarr_format : ZarrFormat The Zarr format to check. Raises @@ -318,7 +318,7 @@ async def _contains_node_v3(store_path: StorePath) -> Literal["array", "group", Parameters ---------- - store_path: StorePath + store_path : StorePath The location in storage to check. Returns @@ -352,7 +352,7 @@ async def _contains_node_v2(store_path: StorePath) -> Literal["array", "group", Parameters ---------- - store_path: StorePath + store_path : StorePath The location in storage to check. Returns @@ -379,9 +379,9 @@ async def contains_array(store_path: StorePath, zarr_format: ZarrFormat) -> bool Parameters ---------- - store_path: StorePath + store_path : StorePath The StorePath to check for an existing group. - zarr_format: + zarr_format : The zarr format to check for. Returns @@ -415,9 +415,9 @@ async def contains_group(store_path: StorePath, zarr_format: ZarrFormat) -> bool Parameters ---------- - store_path: StorePath + store_path : StorePath The StorePath to check for an existing group. - zarr_format: + zarr_format : The zarr format to check for. Returns diff --git a/src/zarr/storage/logging.py b/src/zarr/storage/logging.py index a29661729f..66fd1687e8 100644 --- a/src/zarr/storage/logging.py +++ b/src/zarr/storage/logging.py @@ -22,16 +22,16 @@ class LoggingStore(Store): Parameters ---------- - store: Store + store : Store Store to wrap - log_level: str + log_level : str Log level - log_handler: logging.Handler + log_handler : logging.Handler Log handler Attributes ---------- - counter: dict + counter : dict Counter of number of times each method has been called """ diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index 673c2a75d5..a5e5e28ef8 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -187,7 +187,7 @@ class GpuMemoryStore(MemoryStore): Parameters ---------- - store_dict: MutableMapping, optional + store_dict : MutableMapping, optional A mutable mapping with string keys and :class:`zarr.core.buffer.gpu.Buffer` values. """ @@ -218,7 +218,7 @@ def from_dict(cls, store_dict: MutableMapping[str, Buffer]) -> Self: Parameters ---------- - store_dict: mapping + store_dict : mapping A mapping of strings keys to arbitrary Buffers. The buffer data will be moved into a :class:`gpu.Buffer`. diff --git a/tests/test_api.py b/tests/test_api.py index 4952254f65..9b7b4f8b9a 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -116,7 +116,7 @@ async def test_open_group(memory_store: MemoryStore) -> None: async def test_open_group_unspecified_version( tmpdir: pathlib.Path, zarr_format: ZarrFormat ) -> None: - """regression test for https://github.com/zarr-developers/zarr-python/issues/2175""" + """Regression test for https://github.com/zarr-developers/zarr-python/issues/2175""" # create a group with specified zarr format (could be 2, 3, or None) _ = await zarr.api.asynchronous.open_group( From 9dd9ac640f215dc1f9176979940b9f419e51e25a Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Sat, 19 Oct 2024 17:55:14 +0200 Subject: [PATCH 02/12] Enforce ruff/refurb rules (FURB) (#2373) * Apply ruff/refurb rule FURB110 FURB110 Replace ternary `if` expression with `or` operator * Apply ruff/refurb rule FURB118 FURB118 Use `operator.itemgetter(0)` instead of defining a lambda * Apply ruff/refurb rule FURB140 FURB140 Use `itertools.starmap` instead of the generator * Apply ruff/refurb rule FURB188 FURB188 Prefer `removesuffix` over conditionally replacing with slice. * Apply ruff/refurb rules (FURB) --------- Co-authored-by: Joe Hamman --- pyproject.toml | 1 + src/zarr/abc/store.py | 3 ++- src/zarr/core/array.py | 5 +++-- src/zarr/core/chunk_grids.py | 2 +- src/zarr/core/common.py | 3 ++- src/zarr/core/indexing.py | 4 ++-- src/zarr/storage/memory.py | 3 +-- src/zarr/storage/zip.py | 3 +-- tests/test_group.py | 19 ++++++++++++------- 9 files changed, 25 insertions(+), 18 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 80cb71ff83..84ec8b9a5d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -214,6 +214,7 @@ extend-select = [ "B", # flake8-bugbear "C4", # flake8-comprehensions "FLY", # flynt + "FURB", # refurb "G", # flake8-logging-format "I", # isort "ISC", # flake8-implicit-str-concat diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index 3e233e8a1d..a995a6bf38 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from asyncio import gather +from itertools import starmap from typing import TYPE_CHECKING, NamedTuple, Protocol, runtime_checkable if TYPE_CHECKING: @@ -282,7 +283,7 @@ async def _set_many(self, values: Iterable[tuple[str, Buffer]]) -> None: """ Insert multiple (key, value) pairs into storage. """ - await gather(*(self.set(key, value) for key, value in values)) + await gather(*starmap(self.set, values)) return @property diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 0418d1dc52..6e3430c41a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -3,6 +3,7 @@ import json from asyncio import gather from dataclasses import dataclass, field, replace +from itertools import starmap from logging import getLogger from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload @@ -816,7 +817,7 @@ def cdata_shape(self) -> ChunkCoords: Tuple[int] The shape of the chunk grid for this array. """ - return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) + return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=False))) @property def nchunks(self) -> int: @@ -1385,7 +1386,7 @@ def cdata_shape(self) -> ChunkCoords: """ The shape of the chunk grid for this array. """ - return tuple(ceildiv(s, c) for s, c in zip(self.shape, self.chunks, strict=False)) + return tuple(starmap(ceildiv, zip(self.shape, self.chunks, strict=False))) @property def nchunks(self) -> int: diff --git a/src/zarr/core/chunk_grids.py b/src/zarr/core/chunk_grids.py index aace45d438..ed7f8a1f45 100644 --- a/src/zarr/core/chunk_grids.py +++ b/src/zarr/core/chunk_grids.py @@ -188,6 +188,6 @@ def all_chunk_coords(self, array_shape: ChunkCoords) -> Iterator[ChunkCoords]: def get_nchunks(self, array_shape: ChunkCoords) -> int: return reduce( operator.mul, - (ceildiv(s, c) for s, c in zip(array_shape, self.chunk_shape, strict=True)), + itertools.starmap(ceildiv, zip(array_shape, self.chunk_shape, strict=True)), 1, ) diff --git a/src/zarr/core/common.py b/src/zarr/core/common.py index 0bc6245cb5..f3f49b0d5d 100644 --- a/src/zarr/core/common.py +++ b/src/zarr/core/common.py @@ -5,6 +5,7 @@ import operator from collections.abc import Iterable, Mapping from enum import Enum +from itertools import starmap from typing import ( TYPE_CHECKING, Any, @@ -52,7 +53,7 @@ async def concurrent_map( items: Iterable[T], func: Callable[..., Awaitable[V]], limit: int | None = None ) -> list[V]: if limit is None: - return await asyncio.gather(*[func(*item) for item in items]) + return await asyncio.gather(*list(starmap(func, items))) else: sem = asyncio.Semaphore(limit) diff --git a/src/zarr/core/indexing.py b/src/zarr/core/indexing.py index bffe5270d6..f1d5fd16d1 100644 --- a/src/zarr/core/indexing.py +++ b/src/zarr/core/indexing.py @@ -1129,7 +1129,7 @@ def __init__( chunks_multi_index_broadcast = np.broadcast_arrays(*chunks_multi_index) # remember shape of selection, because we will flatten indices for processing - sel_shape = selection_broadcast[0].shape if selection_broadcast[0].shape else (1,) + sel_shape = selection_broadcast[0].shape or (1,) # flatten selection selection_broadcast = tuple(dim_sel.reshape(-1) for dim_sel in selection_broadcast) @@ -1150,7 +1150,7 @@ def __init__( else: sel_sort = None - shape = selection_broadcast[0].shape if selection_broadcast[0].shape else (1,) + shape = selection_broadcast[0].shape or (1,) # precompute number of selected items for each chunk chunk_nitems = np.bincount(chunks_raveled_indices, minlength=nchunks) diff --git a/src/zarr/storage/memory.py b/src/zarr/storage/memory.py index a5e5e28ef8..f942d57b95 100644 --- a/src/zarr/storage/memory.py +++ b/src/zarr/storage/memory.py @@ -156,8 +156,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - if prefix.endswith("/"): - prefix = prefix[:-1] + prefix = prefix.rstrip("/") if prefix == "": keys_unique = {k.split("/")[0] for k in self._store_dict} diff --git a/src/zarr/storage/zip.py b/src/zarr/storage/zip.py index c9cb579586..204a381bdb 100644 --- a/src/zarr/storage/zip.py +++ b/src/zarr/storage/zip.py @@ -245,8 +245,7 @@ async def list_prefix(self, prefix: str) -> AsyncGenerator[str, None]: async def list_dir(self, prefix: str) -> AsyncGenerator[str, None]: # docstring inherited - if prefix.endswith("/"): - prefix = prefix[:-1] + prefix = prefix.rstrip("/") keys = self._zf.namelist() seen = set() diff --git a/tests/test_group.py b/tests/test_group.py index 2530f64ff4..f4063f6ef9 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -1,6 +1,7 @@ from __future__ import annotations import contextlib +import operator import pickle import warnings from typing import TYPE_CHECKING, Any, Literal @@ -533,14 +534,14 @@ def test_group_child_iterators(store: Store, zarr_format: ZarrFormat, consolidat ConsolidatedMetadata(metadata={}), ) - result = sorted(group.groups(), key=lambda x: x[0]) + result = sorted(group.groups(), key=operator.itemgetter(0)) assert result == expected_groups - assert sorted(group.groups(), key=lambda x: x[0]) == expected_groups + assert sorted(group.groups(), key=operator.itemgetter(0)) == expected_groups assert sorted(group.group_keys()) == expected_group_keys assert sorted(group.group_values(), key=lambda x: x.name) == expected_group_values - assert sorted(group.arrays(), key=lambda x: x[0]) == expected_arrays + assert sorted(group.arrays(), key=operator.itemgetter(0)) == expected_arrays assert sorted(group.array_keys()) == expected_array_keys assert sorted(group.array_values(), key=lambda x: x.name) == expected_array_values @@ -1000,7 +1001,7 @@ async def test_group_members_async(store: Store, consolidated_metadata: bool) -> g2 = await g1.create_group("g2") # immediate children - children = sorted([x async for x in group.members()], key=lambda x: x[0]) + children = sorted([x async for x in group.members()], key=operator.itemgetter(0)) assert children == [ ("a0", a0), ("g0", g0), @@ -1010,7 +1011,7 @@ async def test_group_members_async(store: Store, consolidated_metadata: bool) -> assert nmembers == 2 # partial - children = sorted([x async for x in group.members(max_depth=1)], key=lambda x: x[0]) + children = sorted([x async for x in group.members(max_depth=1)], key=operator.itemgetter(0)) expected = [ ("a0", a0), ("g0", g0), @@ -1022,7 +1023,9 @@ async def test_group_members_async(store: Store, consolidated_metadata: bool) -> assert nmembers == 4 # all children - all_children = sorted([x async for x in group.members(max_depth=None)], key=lambda x: x[0]) + all_children = sorted( + [x async for x in group.members(max_depth=None)], key=operator.itemgetter(0) + ) expected = [ ("a0", a0), ("g0", g0), @@ -1053,7 +1056,9 @@ async def test_group_members_async(store: Store, consolidated_metadata: bool) -> "consolidated_metadata", None, ) - all_children = sorted([x async for x in group.members(max_depth=None)], key=lambda x: x[0]) + all_children = sorted( + [x async for x in group.members(max_depth=None)], key=operator.itemgetter(0) + ) assert len(all_children) == 4 nmembers = await group.nmembers(max_depth=None) assert nmembers == 4 From 649915f1e1b457ba45fccdac7ec862d335d25136 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sat, 19 Oct 2024 15:54:26 -0700 Subject: [PATCH 03/12] chore(ci): removed unused codeql action (#2414) --- .github/workflows/codeql-analysis.yml | 72 --------------------------- 1 file changed, 72 deletions(-) delete mode 100644 .github/workflows/codeql-analysis.yml diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml deleted file mode 100644 index bb3d433629..0000000000 --- a/.github/workflows/codeql-analysis.yml +++ /dev/null @@ -1,72 +0,0 @@ -# For most projects, this workflow file will not need changing; you simply need -# to commit it to your repository. -# -# You may wish to alter this file to override the set of languages analyzed, -# or to provide custom queries or build logic. -# -# ******** NOTE ******** -# We have attempted to detect the languages in your repository. Please check -# the `language` matrix defined below to confirm you have the correct set of -# supported CodeQL languages. -# -name: "CodeQL" - -on: - push: - branches: [ "main" ] - pull_request: - # The branches below must be a subset of the branches above - branches: [ "main" ] - schedule: - - cron: '29 0 * * 1' - -jobs: - analyze: - name: Analyze - runs-on: ubuntu-latest - permissions: - actions: read - contents: read - security-events: write - - strategy: - fail-fast: false - matrix: - language: [ 'python' ] - # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] - # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support - - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - # Initializes the CodeQL tools for scanning. - - name: Initialize CodeQL - uses: github/codeql-action/init@v3 - with: - languages: ${{ matrix.language }} - # If you wish to specify custom queries, you can do so here or in a config file. - # By default, queries listed here will override any specified in a config file. - # Prefix the list here with "+" to use these queries and those in the config file. - - # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs - # queries: security-extended,security-and-quality - - - # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). - # If this step fails, then you should remove it and run the build manually (see below) - - name: Autobuild - uses: github/codeql-action/autobuild@v3 - - # ℹī¸ Command-line programs to run using the OS shell. - # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun - - # If the Autobuild fails above, remove it and uncomment the following three lines. - # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. - - # - run: | - # echo "Run, Build Application using script" - # ./location_of_script_within_repo/buildscript.sh - - - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v3 From 329612e6cc809091fe6458b6c45d8c6add96dd8b Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Sun, 20 Oct 2024 08:27:23 -0700 Subject: [PATCH 04/12] fix(group): deprecate positional args in Group.{zeros,ones,etc.} (#2416) * fix(group): deprecate positional args in Group.{zeros,ones,etc.} * fixup --- src/zarr/core/group.py | 11 +++++++++++ tests/test_group.py | 20 ++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/src/zarr/core/group.py b/src/zarr/core/group.py index ba68213574..6e54b7ec9b 100644 --- a/src/zarr/core/group.py +++ b/src/zarr/core/group.py @@ -14,6 +14,7 @@ from typing_extensions import deprecated import zarr.api.asynchronous as async_api +from zarr._compat import _deprecate_positional_args from zarr.abc.metadata import Metadata from zarr.abc.store import Store, set_or_delete from zarr.core.array import Array, AsyncArray, _build_parents @@ -1531,6 +1532,7 @@ def create(self, *args: Any, **kwargs: Any) -> Array: # Backwards compatibility for 2.x return self.create_array(*args, **kwargs) + @_deprecate_positional_args def create_array( self, name: str, @@ -1692,15 +1694,19 @@ def require_array(self, name: str, **kwargs: Any) -> Array: """ return Array(self._sync(self._async_group.require_array(name, **kwargs))) + @_deprecate_positional_args def empty(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.empty(name=name, shape=shape, **kwargs))) + @_deprecate_positional_args def zeros(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.zeros(name=name, shape=shape, **kwargs))) + @_deprecate_positional_args def ones(self, *, name: str, shape: ChunkCoords, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.ones(name=name, shape=shape, **kwargs))) + @_deprecate_positional_args def full( self, *, name: str, shape: ChunkCoords, fill_value: Any | None, **kwargs: Any ) -> Array: @@ -1710,15 +1716,19 @@ def full( ) ) + @_deprecate_positional_args def empty_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.empty_like(name=name, data=data, **kwargs))) + @_deprecate_positional_args def zeros_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.zeros_like(name=name, data=data, **kwargs))) + @_deprecate_positional_args def ones_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.ones_like(name=name, data=data, **kwargs))) + @_deprecate_positional_args def full_like(self, *, name: str, data: async_api.ArrayLike, **kwargs: Any) -> Array: return Array(self._sync(self._async_group.full_like(name=name, data=data, **kwargs))) @@ -1726,6 +1736,7 @@ def move(self, source: str, dest: str) -> None: return self._sync(self._async_group.move(source, dest)) @deprecated("Use Group.create_array instead.") + @_deprecate_positional_args def array( self, name: str, diff --git a/tests/test_group.py b/tests/test_group.py index f4063f6ef9..21e4ef4e50 100644 --- a/tests/test_group.py +++ b/tests/test_group.py @@ -1334,3 +1334,23 @@ def test_update_attrs() -> None: ) root.attrs["foo"] = "bar" assert root.attrs["foo"] == "bar" + + +@pytest.mark.parametrize("method", ["empty", "zeros", "ones", "full"]) +def test_group_deprecated_positional_args(method: str) -> None: + if method == "full": + kwargs = {"fill_value": 0} + else: + kwargs = {} + + root = zarr.group() + with pytest.warns(FutureWarning, match=r"Pass name=.* as keyword args."): + arr = getattr(root, method)("foo", shape=1, **kwargs) + assert arr.shape == (1,) + + method += "_like" + data = np.ones(1) + + with pytest.warns(FutureWarning, match=r"Pass name=.*, data=.* as keyword args."): + arr = getattr(root, method)("foo_like", data, **kwargs) + assert arr.shape == data.shape From 37fde7defadf13b6894487987e99e098f027dcf6 Mon Sep 17 00:00:00 2001 From: Dimitri Papadopoulos Orfanos <3234522+DimitriPapadopoulos@users.noreply.github.com> Date: Mon, 21 Oct 2024 19:09:13 +0200 Subject: [PATCH 05/12] Get rid of pep8speaks - replaced by ruff (#2421) --- .pep8speaks.yml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 .pep8speaks.yml diff --git a/.pep8speaks.yml b/.pep8speaks.yml deleted file mode 100644 index a000ded163..0000000000 --- a/.pep8speaks.yml +++ /dev/null @@ -1,4 +0,0 @@ -pycodestyle: - max-line-length: 100 - exclude: - - docs From 1e8c0a80e600282c8cd21ed02aa60d98ced5ae9f Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 21 Oct 2024 12:42:56 -0700 Subject: [PATCH 06/12] test(ci): add test environment for upstream dependencies (#2418) --- .github/workflows/test.yml | 27 +++++++++++++++++++++++++++ pyproject.toml | 29 +++++++++++++++++++++++++++++ src/zarr/codecs/zstd.py | 7 ++++--- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 3bd6226922..e09975e444 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -43,3 +43,30 @@ jobs: - name: Run Tests run: | hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run + + upstream: + name: py=${{ matrix.python-version }}-upstream + + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.13'] + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: 'pip' + - name: Install Hatch + run: | + python -m pip install --upgrade pip + pip install hatch + - name: Set Up Hatch Env + run: | + hatch env create upstream + hatch env run -e upstream list-env + - name: Run Tests + run: | + hatch env run --env upstream run diff --git a/pyproject.toml b/pyproject.toml index 84ec8b9a5d..e3d8a310b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -183,6 +183,35 @@ features = ['docs'] build = "cd docs && make html" serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" +[tool.hatch.envs.upstream] +dependencies = [ + 'numpy', # from scientific-python-nightly-wheels + 'numcodecs @ git+https://github.com/zarr-developers/numcodecs', + 'fsspec @ git+https://github.com/fsspec/filesystem_spec', + 's3fs @ git+https://github.com/fsspec/s3fs', + 'universal_pathlib @ git+https://github.com/fsspec/universal_pathlib', + 'crc32c @ git+https://github.com/ICRAR/crc32c', + 'typing_extensions @ git+https://github.com/python/typing_extensions', + 'donfig @ git+https://github.com/pytroll/donfig', + # test deps + 'hypothesis', + 'pytest', + 'pytest-cov', + 'pytest-asyncio', + 'moto[s3]', +] + +[tool.hatch.envs.upstream.env-vars] +PIP_INDEX_URL = "https://pypi.anaconda.org/scientific-python-nightly-wheels/simple/" +PIP_EXTRA_INDEX_URL = "https://pypi.org/simple/" +PIP_PRE = "1" + +[tool.hatch.envs.upstream.scripts] +run = "pytest --verbose" +run-mypy = "mypy src" +run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" +list-env = "pip list" + [tool.ruff] line-length = 100 force-exclude = true diff --git a/src/zarr/codecs/zstd.py b/src/zarr/codecs/zstd.py index 949f762b20..b4a4a13c29 100644 --- a/src/zarr/codecs/zstd.py +++ b/src/zarr/codecs/zstd.py @@ -3,10 +3,11 @@ import asyncio from dataclasses import dataclass from functools import cached_property -from importlib.metadata import version from typing import TYPE_CHECKING +import numcodecs from numcodecs.zstd import Zstd +from packaging.version import Version from zarr.abc.codec import BytesBytesCodec from zarr.core.buffer.cpu import as_numpy_array_wrapper @@ -43,8 +44,8 @@ class ZstdCodec(BytesBytesCodec): def __init__(self, *, level: int = 0, checksum: bool = False) -> None: # numcodecs 0.13.0 introduces the checksum attribute for the zstd codec - _numcodecs_version = tuple(map(int, version("numcodecs").split("."))) - if _numcodecs_version < (0, 13, 0): # pragma: no cover + _numcodecs_version = Version(numcodecs.__version__) + if _numcodecs_version < Version("0.13.0"): raise RuntimeError( "numcodecs version >= 0.13.0 is required to use the zstd codec. " f"Version {_numcodecs_version} is currently installed." From 4a18c5a597f5849f01ae4da24f690449100f0408 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 21 Oct 2024 12:43:23 -0700 Subject: [PATCH 07/12] fix(array): thread order parameter through to Array.__init__ (#2417) --- src/zarr/api/asynchronous.py | 10 +++------- src/zarr/core/array.py | 35 +++++++++++++++++------------------ src/zarr/core/array_spec.py | 8 ++++---- src/zarr/core/metadata/v2.py | 8 ++++---- src/zarr/core/metadata/v3.py | 3 ++- tests/test_api.py | 18 +++++++++++++++++- tests/test_array.py | 21 ++++++++++++++++++++- 7 files changed, 67 insertions(+), 36 deletions(-) diff --git a/src/zarr/api/asynchronous.py b/src/zarr/api/asynchronous.py index 2c423ff59b..680433565e 100644 --- a/src/zarr/api/asynchronous.py +++ b/src/zarr/api/asynchronous.py @@ -712,7 +712,7 @@ async def create( dtype: npt.DTypeLike | None = None, compressor: dict[str, JSON] | None = None, # TODO: default and type change fill_value: Any | None = 0, # TODO: need type - order: MemoryOrder | None = None, # TODO: default change + order: MemoryOrder | None = None, store: str | StoreLike | None = None, synchronizer: Any | None = None, overwrite: bool = False, @@ -761,6 +761,7 @@ async def create( Default value to use for uninitialized portions of the array. order : {'C', 'F'}, optional Memory layout to be used within each chunk. + Default is set in Zarr's config (`array.order`). store : Store or str Store or path to directory in file system or name of zip file. synchronizer : object, optional @@ -834,12 +835,6 @@ async def create( else: chunk_shape = shape - if order is not None: - warnings.warn( - "order is deprecated, use config `array.order` instead", - DeprecationWarning, - stacklevel=2, - ) if synchronizer is not None: warnings.warn("synchronizer is not yet implemented", RuntimeWarning, stacklevel=2) if chunk_store is not None: @@ -889,6 +884,7 @@ async def create( codecs=codecs, dimension_names=dimension_names, attributes=attributes, + order=order, **kwargs, ) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index 6e3430c41a..bdafa33f67 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -33,6 +33,7 @@ ZARRAY_JSON, ZATTRS_JSON, ChunkCoords, + MemoryOrder, ShapeLike, ZarrFormat, concurrent_map, @@ -203,14 +204,14 @@ class AsyncArray(Generic[T_ArrayMetadata]): metadata: T_ArrayMetadata store_path: StorePath codec_pipeline: CodecPipeline = field(init=False) - order: Literal["C", "F"] + order: MemoryOrder @overload def __init__( self: AsyncArray[ArrayV2Metadata], metadata: ArrayV2Metadata | ArrayV2MetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: ... @overload @@ -218,14 +219,14 @@ def __init__( self: AsyncArray[ArrayV3Metadata], metadata: ArrayV3Metadata | ArrayV3MetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: ... def __init__( self, metadata: ArrayMetadata | ArrayMetadataDict, store_path: StorePath, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, ) -> None: if isinstance(metadata, dict): zarr_format = metadata["zarr_format"] @@ -261,7 +262,7 @@ async def create( attributes: dict[str, JSON] | None = None, chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -350,7 +351,7 @@ async def create( # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -382,7 +383,7 @@ async def create( # v2 only chunks: ShapeLike | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -422,7 +423,6 @@ async def create( V2 only. V3 arrays cannot have a dimension separator. order : Literal["C", "F"], optional The order of the array (default is None). - V2 only. V3 arrays should not have 'order' parameter. filters : list[dict[str, JSON]], optional The filters used to compress the data (default is None). V2 only. V3 arrays should not have 'filters' parameter. @@ -471,10 +471,6 @@ async def create( raise ValueError( "dimension_separator cannot be used for arrays with version 3. Use chunk_key_encoding instead." ) - if order is not None: - raise ValueError( - "order cannot be used for arrays with version 3. Use a transpose codec instead." - ) if filters is not None: raise ValueError( "filters cannot be used for arrays with version 3. Use array-to-array codecs instead." @@ -494,6 +490,7 @@ async def create( dimension_names=dimension_names, attributes=attributes, exists_ok=exists_ok, + order=order, ) elif zarr_format == 2: if dtype is str or dtype == "str": @@ -545,6 +542,7 @@ async def _create_v3( dtype: npt.DTypeLike, chunk_shape: ChunkCoords, fill_value: Any | None = None, + order: MemoryOrder | None = None, chunk_key_encoding: ( ChunkKeyEncoding | tuple[Literal["default"], Literal[".", "/"]] @@ -588,7 +586,7 @@ async def _create_v3( attributes=attributes or {}, ) - array = cls(metadata=metadata, store_path=store_path) + array = cls(metadata=metadata, store_path=store_path, order=order) await array._save_metadata(metadata, ensure_parents=True) return array @@ -602,7 +600,7 @@ async def _create_v2( chunks: ChunkCoords, dimension_separator: Literal[".", "/"] | None = None, fill_value: None | float = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, attributes: dict[str, JSON] | None = None, @@ -610,8 +608,9 @@ async def _create_v2( ) -> AsyncArray[ArrayV2Metadata]: if not exists_ok: await ensure_no_existing_node(store_path, zarr_format=2) + if order is None: - order = "C" + order = parse_indexing_order(config.get("array.order")) if dimension_separator is None: dimension_separator = "." @@ -627,7 +626,7 @@ async def _create_v2( filters=filters, attributes=attributes, ) - array = cls(metadata=metadata, store_path=store_path) + array = cls(metadata=metadata, store_path=store_path, order=order) await array._save_metadata(metadata, ensure_parents=True) return array @@ -1179,7 +1178,7 @@ def create( # v2 only chunks: ChunkCoords | None = None, dimension_separator: Literal[".", "/"] | None = None, - order: Literal["C", "F"] | None = None, + order: MemoryOrder | None = None, filters: list[dict[str, JSON]] | None = None, compressor: dict[str, JSON] | None = None, # runtime @@ -1370,7 +1369,7 @@ def store_path(self) -> StorePath: return self._async_array.store_path @property - def order(self) -> Literal["C", "F"]: + def order(self) -> MemoryOrder: return self._async_array.order @property diff --git a/src/zarr/core/array_spec.py b/src/zarr/core/array_spec.py index e84a81cb05..c4d9c363fa 100644 --- a/src/zarr/core/array_spec.py +++ b/src/zarr/core/array_spec.py @@ -1,11 +1,11 @@ from __future__ import annotations from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Literal +from typing import TYPE_CHECKING, Any import numpy as np -from zarr.core.common import parse_fill_value, parse_order, parse_shapelike +from zarr.core.common import MemoryOrder, parse_fill_value, parse_order, parse_shapelike if TYPE_CHECKING: from zarr.core.buffer import BufferPrototype @@ -17,7 +17,7 @@ class ArraySpec: shape: ChunkCoords dtype: np.dtype[Any] fill_value: Any - order: Literal["C", "F"] + order: MemoryOrder prototype: BufferPrototype def __init__( @@ -25,7 +25,7 @@ def __init__( shape: ChunkCoords, dtype: np.dtype[Any], fill_value: Any, - order: Literal["C", "F"], + order: MemoryOrder, prototype: BufferPrototype, ) -> None: shape_parsed = parse_shapelike(shape) diff --git a/src/zarr/core/metadata/v2.py b/src/zarr/core/metadata/v2.py index 2e18336050..f18f2e4e8d 100644 --- a/src/zarr/core/metadata/v2.py +++ b/src/zarr/core/metadata/v2.py @@ -25,7 +25,7 @@ from zarr.core.array_spec import ArraySpec from zarr.core.chunk_grids import RegularChunkGrid from zarr.core.chunk_key_encodings import parse_separator -from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, parse_shapelike +from zarr.core.common import ZARRAY_JSON, ZATTRS_JSON, MemoryOrder, parse_shapelike from zarr.core.config import config, parse_indexing_order from zarr.core.metadata.common import parse_attributes @@ -45,7 +45,7 @@ class ArrayV2Metadata(Metadata): chunks: tuple[int, ...] dtype: np.dtype[Any] fill_value: None | int | float | str | bytes = 0 - order: Literal["C", "F"] = "C" + order: MemoryOrder = "C" filters: tuple[numcodecs.abc.Codec, ...] | None = None dimension_separator: Literal[".", "/"] = "." compressor: numcodecs.abc.Codec | None = None @@ -59,7 +59,7 @@ def __init__( dtype: npt.DTypeLike, chunks: ChunkCoords, fill_value: Any, - order: Literal["C", "F"], + order: MemoryOrder, dimension_separator: Literal[".", "/"] = ".", compressor: numcodecs.abc.Codec | dict[str, JSON] | None = None, filters: Iterable[numcodecs.abc.Codec | dict[str, JSON]] | None = None, @@ -185,7 +185,7 @@ def to_dict(self) -> dict[str, JSON]: return zarray_dict def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + self, _chunk_coords: ChunkCoords, order: MemoryOrder, prototype: BufferPrototype ) -> ArraySpec: return ArraySpec( shape=self.chunks, diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index e9d2f92d8a..6b6f28dd96 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -31,6 +31,7 @@ JSON, ZARR_JSON, ChunkCoords, + MemoryOrder, parse_named_configuration, parse_shapelike, ) @@ -289,7 +290,7 @@ def ndim(self) -> int: return len(self.shape) def get_chunk_spec( - self, _chunk_coords: ChunkCoords, order: Literal["C", "F"], prototype: BufferPrototype + self, _chunk_coords: ChunkCoords, order: MemoryOrder, prototype: BufferPrototype ) -> ArraySpec: assert isinstance( self.chunk_grid, RegularChunkGrid diff --git a/tests/test_api.py b/tests/test_api.py index 9b7b4f8b9a..5b62e3a2fa 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -21,7 +21,7 @@ save_array, save_group, ) -from zarr.core.common import ZarrFormat +from zarr.core.common import MemoryOrder, ZarrFormat from zarr.errors import MetadataValidationError from zarr.storage._utils import normalize_path from zarr.storage.memory import MemoryStore @@ -206,6 +206,22 @@ def test_open_with_mode_w_minus(tmp_path: pathlib.Path) -> None: zarr.open(store=tmp_path, mode="w-") +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_array_order(order: MemoryOrder | None, zarr_format: ZarrFormat) -> None: + arr = zarr.ones(shape=(2, 2), order=order, zarr_format=zarr_format) + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError + + # def test_lazy_loader(): # foo = np.arange(100) # bar = np.arange(100, 0, -1) diff --git a/tests/test_array.py b/tests/test_array.py index 829a04d304..f182cb1a14 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -10,7 +10,7 @@ from zarr.codecs import BytesCodec, VLenBytesCodec from zarr.core.array import chunks_initialized from zarr.core.buffer.cpu import NDBuffer -from zarr.core.common import JSON, ZarrFormat +from zarr.core.common import JSON, MemoryOrder, ZarrFormat from zarr.core.group import AsyncGroup from zarr.core.indexing import ceildiv from zarr.core.sync import sync @@ -417,3 +417,22 @@ def test_update_attrs(zarr_format: int) -> None: arr2 = zarr.open_array(store=store, zarr_format=zarr_format) assert arr2.attrs["foo"] == "bar" + + +@pytest.mark.parametrize("order", ["C", "F", None]) +@pytest.mark.parametrize("zarr_format", [2, 3]) +@pytest.mark.parametrize("store", ["memory"], indirect=True) +def test_array_create_order( + order: MemoryOrder | None, zarr_format: int, store: MemoryStore +) -> None: + arr = Array.create(store=store, shape=(2, 2), order=order, zarr_format=zarr_format, dtype="i4") + expected = order or zarr.config.get("array.order") + assert arr.order == expected + + vals = np.asarray(arr) + if expected == "C": + assert vals.flags.c_contiguous + elif expected == "F": + assert vals.flags.f_contiguous + else: + raise AssertionError From a9d6d74c4a4d4efcdb64015919d0a5685a7e0239 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Mon, 21 Oct 2024 13:26:22 -0700 Subject: [PATCH 08/12] test(ci): Add CI for minimum supported dependency versions (#2423) * test(ci): add test environment for upstream dependencies * try PIP_INDEX_URL again * test(ci): add test environment for oldest supported dependency versions * use a matrix again --- .github/workflows/test.yml | 19 ++++++++++------- pyproject.toml | 43 ++++++++++++++++++++++++++++++++------ src/zarr/storage/remote.py | 10 ++++++--- 3 files changed, 56 insertions(+), 16 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e09975e444..d32f6f793c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,14 +44,19 @@ jobs: run: | hatch env run --env test.py${{ matrix.python-version }}-${{ matrix.numpy-version }}-${{ matrix.dependency-set }} run - upstream: - name: py=${{ matrix.python-version }}-upstream + test-upstream-and-min-deps: + name: py=${{ matrix.python-version }}-${{ matrix.dependency-set }} runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.13'] - + python-version: ['3.11', "3.13"] + dependency-set: ["upstream", "min_deps"] + exclude: + - python-version: "3.13" + dependency-set: min_deps + - python-version: "3.11" + dependency-set: upstream steps: - uses: actions/checkout@v4 - name: Set up Python @@ -65,8 +70,8 @@ jobs: pip install hatch - name: Set Up Hatch Env run: | - hatch env create upstream - hatch env run -e upstream list-env + hatch env create ${{ matrix.dependency-set }} + hatch env run -e ${{ matrix.dependency-set }} list-env - name: Run Tests run: | - hatch env run --env upstream run + hatch env run --env ${{ matrix.dependency-set }} run diff --git a/pyproject.toml b/pyproject.toml index e3d8a310b1..ef2ccd9469 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,12 +28,13 @@ requires-python = ">=3.11" dependencies = [ 'asciitree', 'numpy>=1.25', - 'numcodecs>=0.10.2', - 'fsspec>2024', - 'crc32c', - 'typing_extensions', - 'donfig', + 'numcodecs>=0.13', + 'fsspec>=2022.10.0', + 'crc32c>=2.3', + 'typing_extensions>=4.6', + 'donfig>=0.8', ] + dynamic = [ "version", ] @@ -98,7 +99,7 @@ extra = [ ] optional = [ 'lmdb', - 'universal-pathlib', + 'universal-pathlib>=0.0.22', ] [project.urls] @@ -184,6 +185,7 @@ build = "cd docs && make html" serve = "sphinx-autobuild docs docs/_build --host 0.0.0.0" [tool.hatch.envs.upstream] +python = "3.13" dependencies = [ 'numpy', # from scientific-python-nightly-wheels 'numcodecs @ git+https://github.com/zarr-developers/numcodecs', @@ -212,6 +214,35 @@ run-mypy = "mypy src" run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" list-env = "pip list" +[tool.hatch.envs.min_deps] +description = """Test environment for minimum supported dependencies + +See Spec 0000 for details and drop schedule: https://scientific-python.org/specs/spec-0000/ +""" +python = "3.11" +dependencies = [ + 'numpy==1.25.*', + 'numcodecs==0.13.*', # 0.13 needed for? (should be 0.11) + 'fsspec==2022.10.0', + 's3fs==2022.10.0', + 'universal_pathlib==0.0.22', + 'crc32c==2.3.*', + 'typing_extensions==4.6.*', # 4.5 needed for @deprecated, 4.6 for Buffer + 'donfig==0.8.*', + # test deps + 'hypothesis', + 'pytest', + 'pytest-cov', + 'pytest-asyncio', + 'moto[s3]', +] + +[tool.hatch.envs.min_deps.scripts] +run = "pytest --verbose" +run-hypothesis = "pytest --hypothesis-profile ci tests/test_properties.py tests/test_store/test_stateful*" +list-env = "pip list" + + [tool.ruff] line-length = 100 force-exclude = true diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 0a0ec7f7cc..812b1e24f9 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -2,8 +2,6 @@ from typing import TYPE_CHECKING, Any, Self -import fsspec - from zarr.abc.store import ByteRangeRequest, Store from zarr.storage.common import _dereference_path @@ -130,7 +128,13 @@ def from_url( ------- RemoteStore """ - fs, path = fsspec.url_to_fs(url, **storage_options) + try: + from fsspec import url_to_fs + except ImportError: + # before fsspec==2024.3.1 + from fsspec.core import url_to_fs + + fs, path = url_to_fs(url, **storage_options) return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions) async def clear(self) -> None: From 5807cba192773630dfd172715558423d5a32bb01 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Tue, 22 Oct 2024 11:17:11 -0700 Subject: [PATCH 09/12] fix(remotestore): raise error if path includes scheme (#2348) * fix(remotestore): raise error if path includes scheme * fixup * fixup * strip scheme in from_url in case fsspec fails to * style: pre-commit fixes * disable cache in listing ops * update docs * use listings cache again * no refresh * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- docs/guide/storage.rst | 2 +- src/zarr/abc/store.py | 2 +- src/zarr/storage/remote.py | 42 +++++++++++++++++++++++++++++++-- tests/test_store/test_core.py | 5 +--- tests/test_store/test_remote.py | 35 ++++++++++++++++++++++----- 5 files changed, 72 insertions(+), 14 deletions(-) diff --git a/docs/guide/storage.rst b/docs/guide/storage.rst index dfda553c43..0019f993a2 100644 --- a/docs/guide/storage.rst +++ b/docs/guide/storage.rst @@ -72,7 +72,7 @@ that implements the `AbstractFileSystem` API, .. code-block:: python >>> import zarr - >>> store = zarr.storage.RemoteStore("gs://foo/bar", mode="r") + >>> store = zarr.storage.RemoteStore.from_url("gs://foo/bar", mode="r") >>> zarr.open(store=store) shape=(10, 20) dtype=float32> diff --git a/src/zarr/abc/store.py b/src/zarr/abc/store.py index a995a6bf38..045da7e84a 100644 --- a/src/zarr/abc/store.py +++ b/src/zarr/abc/store.py @@ -168,7 +168,7 @@ def with_mode(self, mode: AccessModeLiteral) -> Self: Returns ------- - store: + store A new store of the same type with the new mode. Examples diff --git a/src/zarr/storage/remote.py b/src/zarr/storage/remote.py index 812b1e24f9..1f7d5f7a12 100644 --- a/src/zarr/storage/remote.py +++ b/src/zarr/storage/remote.py @@ -1,5 +1,6 @@ from __future__ import annotations +import warnings from typing import TYPE_CHECKING, Any, Self from zarr.abc.store import ByteRangeRequest, Store @@ -32,7 +33,8 @@ class RemoteStore(Store): mode : AccessModeLiteral The access mode to use. path : str - The root path of the store. + The root path of the store. This should be a relative path and must not include the + filesystem scheme. allowed_exceptions : tuple[type[Exception], ...] When fetching data, these cases will be deemed to correspond to missing keys. @@ -44,6 +46,23 @@ class RemoteStore(Store): supports_deletes supports_partial_writes supports_listing + + Raises + ------ + TypeError + If the Filesystem does not support async operations. + ValueError + If the path argument includes a scheme. + + Warns + ----- + UserWarning + If the file system (fs) was not created with `asynchronous=True`. + + See Also + -------- + RemoteStore.from_upath + RemoteStore.from_url """ # based on FSSpec @@ -69,6 +88,15 @@ def __init__( if not self.fs.async_impl: raise TypeError("Filesystem needs to support async operations.") + if not self.fs.asynchronous: + warnings.warn( + f"fs ({fs}) was not created with `asynchronous=True`, this may lead to surprising behavior", + stacklevel=2, + ) + if "://" in path and not path.startswith("http"): + # `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯) + scheme, _ = path.split("://", maxsplit=1) + raise ValueError(f"path argument to RemoteStore must not include scheme ({scheme}://)") @classmethod def from_upath( @@ -134,7 +162,17 @@ def from_url( # before fsspec==2024.3.1 from fsspec.core import url_to_fs - fs, path = url_to_fs(url, **storage_options) + opts = storage_options or {} + opts = {"asynchronous": True, **opts} + + fs, path = url_to_fs(url, **opts) + + # fsspec is not consistent about removing the scheme from the path, so check and strip it here + # https://github.com/fsspec/filesystem_spec/issues/1722 + if "://" in path and not path.startswith("http"): + # `not path.startswith("http")` is a special case for the http filesystem (¯\_(ツ)_/¯) + path = fs._strip_protocol(path) + return cls(fs=fs, path=path, mode=mode, allowed_exceptions=allowed_exceptions) async def clear(self) -> None: diff --git a/tests/test_store/test_core.py b/tests/test_store/test_core.py index 771dc3c43e..38292e23c9 100644 --- a/tests/test_store/test_core.py +++ b/tests/test_store/test_core.py @@ -71,10 +71,7 @@ async def test_make_store_path_invalid() -> None: async def test_make_store_path_fsspec(monkeypatch) -> None: - import fsspec.implementations.memory - - monkeypatch.setattr(fsspec.implementations.memory.MemoryFileSystem, "async_impl", True) - store_path = await make_store_path("memory://") + store_path = await make_store_path("http://foo.com/bar") assert isinstance(store_path.store, RemoteStore) diff --git a/tests/test_store/test_remote.py b/tests/test_store/test_remote.py index c8e9a162b0..2ad1fd787b 100644 --- a/tests/test_store/test_remote.py +++ b/tests/test_store/test_remote.py @@ -86,10 +86,12 @@ def s3(s3_base: None) -> Generator[s3fs.S3FileSystem, None, None]: async def test_basic() -> None: store = RemoteStore.from_url( - f"s3://{test_bucket_name}", + f"s3://{test_bucket_name}/foo/spam/", mode="w", storage_options={"endpoint_url": endpoint_url, "anon": False}, ) + assert store.fs.asynchronous + assert store.path == f"{test_bucket_name}/foo/spam" assert await _collect_aiterator(store.list()) == () assert not await store.exists("foo") data = b"hello" @@ -109,7 +111,7 @@ class TestRemoteStoreS3(StoreTests[RemoteStore, cpu.Buffer]): @pytest.fixture def store_kwargs(self, request) -> dict[str, str | bool]: fs, path = fsspec.url_to_fs( - f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False + f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=True ) return {"fs": fs, "path": path, "mode": "r+"} @@ -143,9 +145,7 @@ def test_store_supports_partial_writes(self, store: RemoteStore) -> None: def test_store_supports_listing(self, store: RemoteStore) -> None: assert store.supports_listing - async def test_remote_store_from_uri( - self, store: RemoteStore, store_kwargs: dict[str, str | bool] - ): + async def test_remote_store_from_uri(self, store: RemoteStore): storage_options = { "endpoint_url": endpoint_url, "anon": False, @@ -183,9 +183,32 @@ async def test_remote_store_from_uri( assert dict(group.attrs) == {"key": "value-3"} def test_from_upath(self) -> None: - path = UPath(f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False) + path = UPath( + f"s3://{test_bucket_name}/foo/bar/", + endpoint_url=endpoint_url, + anon=False, + asynchronous=True, + ) result = RemoteStore.from_upath(path) assert result.fs.endpoint_url == endpoint_url + assert result.fs.asynchronous + assert result.path == f"{test_bucket_name}/foo/bar" + + def test_init_raises_if_path_has_scheme(self, store_kwargs) -> None: + # regression test for https://github.com/zarr-developers/zarr-python/issues/2342 + store_kwargs["path"] = "s3://" + store_kwargs["path"] + with pytest.raises( + ValueError, match="path argument to RemoteStore must not include scheme .*" + ): + self.store_cls(**store_kwargs) + + def test_init_warns_if_fs_asynchronous_is_false(self) -> None: + fs, path = fsspec.url_to_fs( + f"s3://{test_bucket_name}", endpoint_url=endpoint_url, anon=False, asynchronous=False + ) + store_kwargs = {"fs": fs, "path": path, "mode": "r+"} + with pytest.warns(UserWarning, match=r".* was not created with `asynchronous=True`.*"): + self.store_cls(**store_kwargs) async def test_empty_nonexistent_path(self, store_kwargs) -> None: # regression test for https://github.com/zarr-developers/zarr-python/pull/2343 From 8a33df7fb0568c92a40c57874e07b68e371a4a59 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:19:23 -0700 Subject: [PATCH 10/12] chore: update pre-commit hooks (#2427) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.6.9 → v0.7.0](https://github.com/astral-sh/ruff-pre-commit/compare/v0.6.9...v0.7.0) - [github.com/pre-commit/mirrors-mypy: v1.11.2 → v1.12.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.11.2...v1.12.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 4667b20de1..55488c2372 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ default_language_version: python: python3 repos: - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.9 + rev: v0.7.0 hooks: - id: ruff args: ["--fix", "--show-fixes"] @@ -22,7 +22,7 @@ repos: hooks: - id: check-yaml - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.11.2 + rev: v1.12.1 hooks: - id: mypy files: src|tests From 6ce05265472771e922e69012105d2210e3405aa9 Mon Sep 17 00:00:00 2001 From: Joe Hamman Date: Wed, 23 Oct 2024 08:37:55 -0700 Subject: [PATCH 11/12] [v3] Array.append (#2413) * feature(array): implement Array.append changes the Array.resize to be an inplace operation * better error message * no more warn * style: pre-commit fixes --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- src/zarr/core/array.py | 130 ++++++++++++++++++--- tests/test_array.py | 188 +++++++++++++++++++++++++++++++ tests/test_codecs/test_codecs.py | 3 +- 3 files changed, 302 insertions(+), 19 deletions(-) diff --git a/src/zarr/core/array.py b/src/zarr/core/array.py index bdafa33f67..8c4d797e9a 100644 --- a/src/zarr/core/array.py +++ b/src/zarr/core/array.py @@ -2,7 +2,7 @@ import json from asyncio import gather -from dataclasses import dataclass, field, replace +from dataclasses import dataclass, field from itertools import starmap from logging import getLogger from typing import TYPE_CHECKING, Any, Generic, Literal, cast, overload @@ -1104,15 +1104,15 @@ async def setitem( ) return await self._set_selection(indexer, value, prototype=prototype) - async def resize(self, new_shape: ChunkCoords, delete_outside_chunks: bool = True) -> Self: + async def resize(self, new_shape: ShapeLike, delete_outside_chunks: bool = True) -> None: + new_shape = parse_shapelike(new_shape) assert len(new_shape) == len(self.metadata.shape) new_metadata = self.metadata.update_shape(new_shape) - # Remove all chunks outside of the new shape - old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape)) - new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape)) - if delete_outside_chunks: + # Remove all chunks outside of the new shape + old_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(self.metadata.shape)) + new_chunk_coords = set(self.metadata.chunk_grid.all_chunk_coords(new_shape)) async def _delete_key(key: str) -> None: await (self.store_path / key).delete() @@ -1128,7 +1128,63 @@ async def _delete_key(key: str) -> None: # Write new metadata await self._save_metadata(new_metadata) - return replace(self, metadata=new_metadata) + + # Update metadata (in place) + object.__setattr__(self, "metadata", new_metadata) + + async def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords: + """Append `data` to `axis`. + + Parameters + ---------- + data : array-like + Data to be appended. + axis : int + Axis along which to append. + + Returns + ------- + new_shape : tuple + + Notes + ----- + The size of all dimensions other than `axis` must match between this + array and `data`. + """ + # ensure data is array-like + if not hasattr(data, "shape"): + data = np.asanyarray(data) + + self_shape_preserved = tuple(s for i, s in enumerate(self.shape) if i != axis) + data_shape_preserved = tuple(s for i, s in enumerate(data.shape) if i != axis) + if self_shape_preserved != data_shape_preserved: + raise ValueError( + f"shape of data to append is not compatible with the array. " + f"The shape of the data is ({data_shape_preserved})" + f"and the shape of the array is ({self_shape_preserved})." + "All dimensions must match except for the dimension being " + "appended." + ) + # remember old shape + old_shape = self.shape + + # determine new shape + new_shape = tuple( + self.shape[i] if i != axis else self.shape[i] + data.shape[i] + for i in range(len(self.shape)) + ) + + # resize + await self.resize(new_shape) + + # store data + append_selection = tuple( + slice(None) if i != axis else slice(old_shape[i], new_shape[i]) + for i in range(len(self.shape)) + ) + await self.setitem(append_selection, data) + + return new_shape async def update_attributes(self, new_attributes: dict[str, JSON]) -> Self: # metadata.attributes is "frozen" so we simply clear and update the dict @@ -1147,7 +1203,8 @@ async def info(self) -> None: raise NotImplementedError -@dataclass(frozen=True) +# TODO: Array can be a frozen data class again once property setters (e.g. shape) are removed +@dataclass(frozen=False) class Array: """Instantiate an array from an initialized store.""" @@ -1297,6 +1354,11 @@ def shape(self) -> ChunkCoords: """ return self._async_array.shape + @shape.setter + def shape(self, value: ChunkCoords) -> None: + """Sets the shape of the array by calling resize.""" + self.resize(value) + @property def chunks(self) -> ChunkCoords: """Returns a tuple of integers describing the length of each dimension of a chunk of the array. @@ -2754,18 +2816,18 @@ def blocks(self) -> BlockIndex: :func:`set_block_selection` for documentation and examples.""" return BlockIndex(self) - def resize(self, new_shape: ChunkCoords) -> Array: + def resize(self, new_shape: ShapeLike) -> None: """ Change the shape of the array by growing or shrinking one or more dimensions. - This method does not modify the original Array object. Instead, it returns a new Array - with the specified shape. + Parameters + ---------- + new_shape : tuple + New shape of the array. Notes ----- - When resizing an array, the data are not rearranged in any way. - If one or more dimensions are shrunk, any chunks falling outside the new array shape will be deleted from the underlying store. However, it is noteworthy that the chunks partially falling inside the new array @@ -2778,7 +2840,6 @@ def resize(self, new_shape: ChunkCoords) -> Array: >>> import zarr >>> z = zarr.zeros(shape=(10000, 10000), >>> chunk_shape=(1000, 1000), - >>> store=StorePath(MemoryStore(mode="w")), >>> dtype="i4",) >>> z.shape (10000, 10000) @@ -2791,10 +2852,43 @@ def resize(self, new_shape: ChunkCoords) -> Array: >>> z2.shape (50, 50) """ - resized = sync(self._async_array.resize(new_shape)) - # TODO: remove this cast when type inference improves - _resized = cast(AsyncArray[ArrayV2Metadata] | AsyncArray[ArrayV3Metadata], resized) - return type(self)(_resized) + sync(self._async_array.resize(new_shape)) + + def append(self, data: npt.ArrayLike, axis: int = 0) -> ChunkCoords: + """Append `data` to `axis`. + + Parameters + ---------- + data : array-like + Data to be appended. + axis : int + Axis along which to append. + + Returns + ------- + new_shape : tuple + + Notes + ----- + The size of all dimensions other than `axis` must match between this + array and `data`. + + Examples + -------- + >>> import numpy as np + >>> import zarr + >>> a = np.arange(10000000, dtype='i4').reshape(10000, 1000) + >>> z = zarr.array(a, chunks=(1000, 100)) + >>> z.shape + (10000, 1000) + >>> z.append(a) + (20000, 1000) + >>> z.append(np.vstack([a, a]), axis=1) + (20000, 2000) + >>> z.shape + (20000, 2000) + """ + return sync(self._async_array.append(data, axis=axis)) def update_attributes(self, new_attributes: dict[str, JSON]) -> Array: # TODO: remove this cast when type inference improves diff --git a/tests/test_array.py b/tests/test_array.py index f182cb1a14..ae8e7f99c2 100644 --- a/tests/test_array.py +++ b/tests/test_array.py @@ -419,6 +419,194 @@ def test_update_attrs(zarr_format: int) -> None: assert arr2.attrs["foo"] == "bar" +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_resize_1d(store: MemoryStore, zarr_format: int) -> None: + z = zarr.create( + shape=105, chunks=10, dtype="i4", fill_value=0, store=store, zarr_format=zarr_format + ) + a = np.arange(105, dtype="i4") + z[:] = a + assert (105,) == z.shape + assert (105,) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + z.resize(205) + assert (205,) == z.shape + assert (205,) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a, z[:105]) + np.testing.assert_array_equal(np.zeros(100, dtype="i4"), z[105:]) + + z.resize(55) + assert (55,) == z.shape + assert (55,) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a[:55], z[:]) + + # via shape setter + new_shape = (105,) + z.shape = new_shape + assert new_shape == z.shape + assert new_shape == z[:].shape + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_resize_2d(store: MemoryStore, zarr_format: int) -> None: + z = zarr.create( + shape=(105, 105), + chunks=(10, 10), + dtype="i4", + fill_value=0, + store=store, + zarr_format=zarr_format, + ) + a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) + z[:] = a + assert (105, 105) == z.shape + assert (105, 105) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + z.resize((205, 205)) + assert (205, 205) == z.shape + assert (205, 205) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a, z[:105, :105]) + np.testing.assert_array_equal(np.zeros((100, 205), dtype="i4"), z[105:, :]) + np.testing.assert_array_equal(np.zeros((205, 100), dtype="i4"), z[:, 105:]) + + z.resize((55, 55)) + assert (55, 55) == z.shape + assert (55, 55) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a[:55, :55], z[:]) + + z.resize((55, 1)) + assert (55, 1) == z.shape + assert (55, 1) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a[:55, :1], z[:]) + + z.resize((1, 55)) + assert (1, 55) == z.shape + assert (1, 55) == z[:].shape + assert np.dtype("i4") == z.dtype + assert np.dtype("i4") == z[:].dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a[:1, :10], z[:, :10]) + np.testing.assert_array_equal(np.zeros((1, 55 - 10), dtype="i4"), z[:, 10:55]) + + # via shape setter + new_shape = (105, 105) + z.shape = new_shape + assert new_shape == z.shape + assert new_shape == z[:].shape + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_append_1d(store: MemoryStore, zarr_format: int) -> None: + a = np.arange(105) + z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + b = np.arange(105, 205) + e = np.append(a, b) + assert z.shape == (105,) + z.append(b) + assert e.shape == z.shape + assert e.dtype == z.dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(e, z[:]) + + # check append handles array-like + c = [1, 2, 3] + f = np.append(e, c) + z.append(c) + assert f.shape == z.shape + assert f.dtype == z.dtype + assert (10,) == z.chunks + np.testing.assert_array_equal(f, z[:]) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_append_2d(store: MemoryStore, zarr_format: int) -> None: + a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) + z = zarr.create( + shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format + ) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert (10, 10) == z.chunks + actual = z[:] + np.testing.assert_array_equal(a, actual) + + b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105)) + e = np.append(a, b, axis=0) + z.append(b) + assert e.shape == z.shape + assert e.dtype == z.dtype + assert (10, 10) == z.chunks + actual = z[:] + np.testing.assert_array_equal(e, actual) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_append_2d_axis(store: MemoryStore, zarr_format: int) -> None: + a = np.arange(105 * 105, dtype="i4").reshape((105, 105)) + z = zarr.create( + shape=a.shape, chunks=(10, 10), dtype=a.dtype, store=store, zarr_format=zarr_format + ) + z[:] = a + assert a.shape == z.shape + assert a.dtype == z.dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(a, z[:]) + + b = np.arange(105 * 105, 2 * 105 * 105, dtype="i4").reshape((105, 105)) + e = np.append(a, b, axis=1) + z.append(b, axis=1) + assert e.shape == z.shape + assert e.dtype == z.dtype + assert (10, 10) == z.chunks + np.testing.assert_array_equal(e, z[:]) + + +@pytest.mark.parametrize("store", ["memory"], indirect=True) +@pytest.mark.parametrize("zarr_format", [2, 3]) +def test_append_bad_shape(store: MemoryStore, zarr_format: int) -> None: + a = np.arange(100) + z = zarr.create(shape=a.shape, chunks=10, dtype=a.dtype, store=store, zarr_format=zarr_format) + z[:] = a + b = a.reshape(10, 10) + with pytest.raises(ValueError): + z.append(b) + + @pytest.mark.parametrize("order", ["C", "F", None]) @pytest.mark.parametrize("zarr_format", [2, 3]) @pytest.mark.parametrize("store", ["memory"], indirect=True) diff --git a/tests/test_codecs/test_codecs.py b/tests/test_codecs/test_codecs.py index 7a5fb979a1..0f2f892915 100644 --- a/tests/test_codecs/test_codecs.py +++ b/tests/test_codecs/test_codecs.py @@ -371,8 +371,9 @@ async def test_resize(store: Store) -> None: assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is not None - a = await a.resize((10, 12)) + await a.resize((10, 12)) assert a.metadata.shape == (10, 12) + assert a.shape == (10, 12) assert await store.get(f"{path}/0.0", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/0.1", prototype=default_buffer_prototype()) is not None assert await store.get(f"{path}/1.0", prototype=default_buffer_prototype()) is None From bc588a760a804f783c4242d4435863a43a5f3f9f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 23 Oct 2024 13:30:49 -0600 Subject: [PATCH 12/12] Fix JSON encoding of complex fill values (#2432) * Fix JSON encoding of complex fill values We were not replacing NaNs and Infs with the string versions. * Fix decoding of complex fill values * try excluding `math.inf` * Check complex numbers explicitly * Update src/zarr/core/metadata/v3.py --- src/zarr/core/metadata/v3.py | 26 ++++++++++++++++++++++---- tests/test_array.py | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/src/zarr/core/metadata/v3.py b/src/zarr/core/metadata/v3.py index 6b6f28dd96..7a38e9fd70 100644 --- a/src/zarr/core/metadata/v3.py +++ b/src/zarr/core/metadata/v3.py @@ -43,6 +43,13 @@ DEFAULT_DTYPE = "float64" +# Keep in sync with _replace_special_floats +SPECIAL_FLOATS_ENCODED = { + "Infinity": np.inf, + "-Infinity": -np.inf, + "NaN": np.nan, +} + def parse_zarr_format(data: object) -> Literal[3]: if data == 3: @@ -149,7 +156,7 @@ def default(self, o: object) -> Any: if isinstance(out, complex): # python complex types are not JSON serializable, so we use the # serialization defined in the zarr v3 spec - return [out.real, out.imag] + return _replace_special_floats([out.real, out.imag]) elif np.isnan(out): return "NaN" elif np.isinf(out): @@ -447,8 +454,11 @@ def parse_fill_value( if isinstance(fill_value, Sequence) and not isinstance(fill_value, str): if data_type in (DataType.complex64, DataType.complex128): if len(fill_value) == 2: + decoded_fill_value = tuple( + SPECIAL_FLOATS_ENCODED.get(value, value) for value in fill_value + ) # complex datatypes serialize to JSON arrays with two elements - return np_dtype.type(complex(*fill_value)) + return np_dtype.type(complex(*decoded_fill_value)) else: msg = ( f"Got an invalid fill value for complex data type {data_type.value}." @@ -475,12 +485,20 @@ def parse_fill_value( pass elif fill_value in ["Infinity", "-Infinity"] and not np.isfinite(casted_value): pass - elif np_dtype.kind in "cf": + elif np_dtype.kind == "f": # float comparison is not exact, especially when dtype None: + store = MemoryStore({}, mode="w") + Array.create(store=store, shape=(1,), dtype=np.complex64, fill_value=fill_value) + content = await store.get("zarr.json", prototype=default_buffer_prototype()) + assert content is not None + actual = json.loads(content.to_bytes()) + assert actual["fill_value"] == expected