diff --git a/src/zarr/v3/array.py b/src/zarr/v3/array.py index 4b7a3bb0ad..37730eb86e 100644 --- a/src/zarr/v3/array.py +++ b/src/zarr/v3/array.py @@ -136,8 +136,6 @@ def from_dict( async_array = cls( metadata=metadata, store_path=store_path, runtime_configuration=runtime_configuration ) - # todo: remove this, pushing the logic down to the array metadata creation - async_array._validate_metadata() return async_array @classmethod @@ -223,20 +221,8 @@ async def getitem(self, selection: Selection): return out[()] async def _save_metadata(self) -> None: - self._validate_metadata() - await (self.store_path / ZARR_JSON).set(self.metadata.to_bytes()) - def _validate_metadata(self) -> None: - assert len(self.metadata.shape) == len( - self.metadata.chunk_grid.chunk_shape - ), "`chunk_shape` and `shape` need to have the same number of dimensions." - assert self.metadata.dimension_names is None or len(self.metadata.shape) == len( - self.metadata.dimension_names - ), "`dimension_names` and `shape` need to have the same number of dimensions." - assert self.metadata.fill_value is not None, "`fill_value` is required." - self.codecs.validate(self.metadata) - async def _read_chunk( self, chunk_coords: ChunkCoords, @@ -477,7 +463,6 @@ def open( AsyncArray.open(store, runtime_configuration=runtime_configuration), runtime_configuration.asyncio_loop, ) - async_array._validate_metadata() return cls(async_array) @classmethod diff --git a/src/zarr/v3/chunk_grids.py b/src/zarr/v3/chunk_grids.py index 470574cd2b..ba7d9f08fe 100644 --- a/src/zarr/v3/chunk_grids.py +++ b/src/zarr/v3/chunk_grids.py @@ -17,7 +17,7 @@ def from_dict(cls, data: Dict[str, JSON]) -> Self: return data if data["name"] == "regular": return RegularChunkGrid.from_dict(data) - raise ValueError(f"Unknown chunk grid, got {data['name']}") + raise ValueError(f"Unknown chunk grid. Got {data['name']}.") @dataclass(frozen=True) diff --git a/src/zarr/v3/chunk_key_encodings.py b/src/zarr/v3/chunk_key_encodings.py index a0dc159519..f98bcda3b0 100644 --- a/src/zarr/v3/chunk_key_encodings.py +++ b/src/zarr/v3/chunk_key_encodings.py @@ -35,7 +35,7 @@ def from_dict(cls, data: Dict[str, JSON]) -> Self: return DefaultChunkKeyEncoding(**data["configuration"]) if data["name"] == "v2": return V2ChunkKeyEncoding(**data["configuration"]) - raise ValueError(f"Unknown chunk key encoding, got {data['name']}") + raise ValueError(f"Unknown chunk key encoding. Got {data['name']}.") def to_dict(self) -> Dict[str, JSON]: return {"name": self.name, "configuration": {"separator": self.separator}} diff --git a/src/zarr/v3/codecs/blosc.py b/src/zarr/v3/codecs/blosc.py index caaeb18e8b..512ad368b3 100644 --- a/src/zarr/v3/codecs/blosc.py +++ b/src/zarr/v3/codecs/blosc.py @@ -51,28 +51,26 @@ class BloscCname(Enum): def parse_typesize(data: JSON) -> int: if isinstance(data, int): - if data >= 0: + if data > 0: return data else: - msg = f"Value must be greater than or equal to 0. Got {data}, which is less than 0." - raise ValueError(msg) - msg = f"Value must be an int. Got {type(data)} instead." - raise TypeError(msg) + raise ValueError( + f"Value must be greater than 0. Got {data}, which is less or equal to 0." + ) + raise TypeError(f"Value must be an int. Got {type(data)} instead.") # todo: real validation def parse_clevel(data: JSON) -> int: if isinstance(data, int): return data - msg = f"Value should be an int, got {type(data)} instead" - raise TypeError(msg) + raise TypeError(f"Value should be an int. Got {type(data)} instead.") def parse_blocksize(data: JSON) -> int: if isinstance(data, int): return data - msg = f"Value should be an int, got {type(data)} instead" - raise TypeError(msg) + raise TypeError(f"Value should be an int. Got {type(data)} instead.") @dataclass(frozen=True) @@ -88,13 +86,13 @@ class BloscCodec(BytesBytesCodec): def __init__( self, *, - typesize, + typesize=None, cname=BloscCname.zstd, clevel=5, shuffle=BloscShuffle.noshuffle, blocksize=0, ) -> None: - typesize_parsed = parse_typesize(typesize) + typesize_parsed = parse_typesize(typesize) if typesize is not None else None cname_parsed = parse_enum(cname, BloscCname) clevel_parsed = parse_clevel(clevel) shuffle_parsed = parse_enum(shuffle, BloscShuffle) @@ -112,6 +110,8 @@ def from_dict(cls, data: Dict[str, JSON]) -> Self: return cls(**data["configuration"]) def to_dict(self) -> Dict[str, JSON]: + if self.typesize is None: + raise ValueError("`typesize` needs to be set for serialization.") return { "name": "blosc", "configuration": { @@ -125,7 +125,7 @@ def to_dict(self) -> Dict[str, JSON]: def evolve(self, array_spec: ArraySpec) -> Self: new_codec = self - if new_codec.typesize == 0: + if new_codec.typesize is None: new_codec = replace(new_codec, typesize=array_spec.dtype.itemsize) return new_codec diff --git a/src/zarr/v3/codecs/crc32c_.py b/src/zarr/v3/codecs/crc32c_.py index a5b9eec3bc..b62e96c0c9 100644 --- a/src/zarr/v3/codecs/crc32c_.py +++ b/src/zarr/v3/codecs/crc32c_.py @@ -38,7 +38,13 @@ async def decode( crc32_bytes = chunk_bytes[-4:] inner_bytes = chunk_bytes[:-4] - assert np.uint32(crc32c(inner_bytes)).tobytes() == bytes(crc32_bytes) + computed_checksum = np.uint32(crc32c(inner_bytes)).tobytes() + stored_checksum = bytes(crc32_bytes) + if computed_checksum != stored_checksum: + raise ValueError( + "Stored and computed checksum do not match. " + + f"Stored: {stored_checksum}. Computed: {computed_checksum}." + ) return inner_bytes async def encode( diff --git a/src/zarr/v3/codecs/pipeline.py b/src/zarr/v3/codecs/pipeline.py index 50c69879c8..56530af463 100644 --- a/src/zarr/v3/codecs/pipeline.py +++ b/src/zarr/v3/codecs/pipeline.py @@ -50,38 +50,33 @@ def evolve(self, array_spec: ArraySpec) -> Self: def from_list(cls, codecs: List[Codec]) -> CodecPipeline: from zarr.v3.codecs.sharding import ShardingCodec - assert any( - isinstance(codec, ArrayBytesCodec) for codec in codecs - ), "Exactly one array-to-bytes codec is required." + if not any(isinstance(codec, ArrayBytesCodec) for codec in codecs): + raise ValueError("Exactly one array-to-bytes codec is required.") prev_codec: Optional[Codec] = None for codec in codecs: if prev_codec is not None: - assert not isinstance(codec, ArrayBytesCodec) or not isinstance( - prev_codec, ArrayBytesCodec - ), ( - f"ArrayBytesCodec '{type(codec)}' cannot follow after " - + f"ArrayBytesCodec '{type(prev_codec)}' because exactly " - + "1 ArrayBytesCodec is allowed." - ) - assert not isinstance(codec, ArrayBytesCodec) or not isinstance( - prev_codec, BytesBytesCodec - ), ( - f"ArrayBytesCodec '{type(codec)}' cannot follow after " - + f"BytesBytesCodec '{type(prev_codec)}'." - ) - assert not isinstance(codec, ArrayArrayCodec) or not isinstance( - prev_codec, ArrayBytesCodec - ), ( - f"ArrayArrayCodec '{type(codec)}' cannot follow after " - + f"ArrayBytesCodec '{type(prev_codec)}'." - ) - assert not isinstance(codec, ArrayArrayCodec) or not isinstance( - prev_codec, BytesBytesCodec - ), ( - f"ArrayArrayCodec '{type(codec)}' cannot follow after " - + f"BytesBytesCodec '{type(prev_codec)}'." - ) + if isinstance(codec, ArrayBytesCodec) and isinstance(prev_codec, ArrayBytesCodec): + raise ValueError( + f"ArrayBytesCodec '{type(codec)}' cannot follow after " + + f"ArrayBytesCodec '{type(prev_codec)}' because exactly " + + "1 ArrayBytesCodec is allowed." + ) + if isinstance(codec, ArrayBytesCodec) and isinstance(prev_codec, BytesBytesCodec): + raise ValueError( + f"ArrayBytesCodec '{type(codec)}' cannot follow after " + + f"BytesBytesCodec '{type(prev_codec)}'." + ) + if isinstance(codec, ArrayArrayCodec) and isinstance(prev_codec, ArrayBytesCodec): + raise ValueError( + f"ArrayArrayCodec '{type(codec)}' cannot follow after " + + f"ArrayBytesCodec '{type(prev_codec)}'." + ) + if isinstance(codec, ArrayArrayCodec) and isinstance(prev_codec, BytesBytesCodec): + raise ValueError( + f"ArrayArrayCodec '{type(codec)}' cannot follow after " + + f"BytesBytesCodec '{type(prev_codec)}'." + ) prev_codec = codec if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(codecs) > 1: diff --git a/src/zarr/v3/codecs/sharding.py b/src/zarr/v3/codecs/sharding.py index fb60dc4ab8..6acebee25f 100644 --- a/src/zarr/v3/codecs/sharding.py +++ b/src/zarr/v3/codecs/sharding.py @@ -279,23 +279,24 @@ def to_dict(self) -> Dict[str, JSON]: } def validate(self, array_metadata: ArrayMetadata) -> None: - assert len(self.chunk_shape) == array_metadata.ndim, ( - "The shard's `chunk_shape` and array's `shape` need to have the " - + "same number of dimensions." - ) - assert isinstance( - array_metadata.chunk_grid, RegularChunkGrid - ), "Sharding is only compatible with regular chunk grids." - assert all( + if len(self.chunk_shape) != array_metadata.ndim: + raise ValueError( + "The shard's `chunk_shape` and array's `shape` need to have the " + + "same number of dimensions." + ) + if not isinstance(array_metadata.chunk_grid, RegularChunkGrid): + raise ValueError("Sharding is only compatible with regular chunk grids.") + if not all( s % c == 0 for s, c in zip( array_metadata.chunk_grid.chunk_shape, self.chunk_shape, ) - ), ( - "The array's `chunk_shape` needs to be divisible by the " - + "shard's inner `chunk_shape`." - ) + ): + raise ValueError( + "The array's `chunk_shape` needs to be divisible by the " + + "shard's inner `chunk_shape`." + ) async def decode( self, diff --git a/src/zarr/v3/codecs/transpose.py b/src/zarr/v3/codecs/transpose.py index e00d70ec6f..ef4f456579 100644 --- a/src/zarr/v3/codecs/transpose.py +++ b/src/zarr/v3/codecs/transpose.py @@ -51,16 +51,16 @@ def evolve(self, array_spec: ArraySpec) -> Self: if len(self.order) != array_spec.ndim: raise ValueError( "The `order` tuple needs have as many entries as " - + f"there are dimensions in the array. Got: {self.order}" + + f"there are dimensions in the array. Got {self.order}." ) if len(self.order) != len(set(self.order)): raise ValueError( - "There must not be duplicates in the `order` tuple. " + f"Got: {self.order}" + f"There must not be duplicates in the `order` tuple. Got {self.order}." ) if not all(0 <= x < array_spec.ndim for x in self.order): raise ValueError( "All entries in the `order` tuple must be between 0 and " - + f"the number of dimensions in the array. Got: {self.order}" + + f"the number of dimensions in the array. Got {self.order}." ) order = tuple(self.order) diff --git a/src/zarr/v3/codecs/zstd.py b/src/zarr/v3/codecs/zstd.py index 47fac495fb..feb43f7b6e 100644 --- a/src/zarr/v3/codecs/zstd.py +++ b/src/zarr/v3/codecs/zstd.py @@ -19,18 +19,15 @@ def parse_zstd_level(data: JSON) -> int: if isinstance(data, int): if data >= 23: - msg = f"Value must be less than or equal to 22. Got {data} instead." - raise ValueError(msg) + raise ValueError(f"Value must be less than or equal to 22. Got {data} instead.") return data - msg = f"Got value with type {type(data)}, but expected an int" - raise TypeError(msg) + raise TypeError(f"Got value with type {type(data)}, but expected an int.") def parse_checksum(data: JSON) -> bool: if isinstance(data, bool): return data - msg = f"Expected bool, got {type(data)}" - raise TypeError(msg) + raise TypeError(f"Expected bool. Got {type(data)}.") @dataclass(frozen=True) diff --git a/src/zarr/v3/common.py b/src/zarr/v3/common.py index a3c218ee9b..167a2f6d14 100644 --- a/src/zarr/v3/common.py +++ b/src/zarr/v3/common.py @@ -81,8 +81,7 @@ def parse_enum(data: JSON, cls: Type[E]) -> E: return data if data in enum_names(cls): return cls(data) - msg = f"Value must be one of {repr(list(enum_names(cls)))}, got {data} instead." - raise ValueError(msg) + raise ValueError(f"Value must be one of {repr(list(enum_names(cls)))}. Got {data} instead.") class NamedConfig(Protocol): @@ -125,7 +124,7 @@ def ndim(self) -> int: def parse_name(data: JSON, expected: str) -> str: if data == expected: return data - raise ValueError(f"Expected '{expected}' chunk, got {data} instead.") + raise ValueError(f"Expected '{expected}' chunk. Got {data} instead.") def parse_shapelike(data: Any) -> Tuple[int, ...]: diff --git a/src/zarr/v3/group.py b/src/zarr/v3/group.py index 8b89dfb41d..acd5ca0d62 100644 --- a/src/zarr/v3/group.py +++ b/src/zarr/v3/group.py @@ -20,7 +20,7 @@ def parse_zarr_format(data: Any) -> Literal[2, 3]: if data in (2, 3): return data - msg = msg = f"Invalid zarr_format: got {data}, expected one of (2,3)" + msg = msg = f"Invalid zarr_format. Expected one 2 or 3. Got {data}." raise ValueError(msg) @@ -30,7 +30,7 @@ def parse_attributes(data: Any) -> Dict[str, Any]: return {} elif isinstance(data, dict) and all(map(lambda v: isinstance(v, str), data.keys())): return data - msg = f"Expected dict with string keys, got {type(data)} instead." + msg = f"Expected dict with string keys. Got {type(data)} instead." raise TypeError(msg) diff --git a/src/zarr/v3/metadata.py b/src/zarr/v3/metadata.py index 3b6c1346bb..86a185121c 100644 --- a/src/zarr/v3/metadata.py +++ b/src/zarr/v3/metadata.py @@ -159,6 +159,21 @@ def __init__( object.__setattr__(self, "fill_value", fill_value_parsed) object.__setattr__(self, "attributes", attributes_parsed) + self._validate_metadata() + + def _validate_metadata(self) -> None: + if len(self.shape) != len(self.chunk_grid.chunk_shape): + raise ValueError( + "`chunk_shape` and `shape` need to have the same number of dimensions." + ) + if self.dimension_names is not None and len(self.shape) != len(self.dimension_names): + raise ValueError( + "`dimension_names` and `shape` need to have the same number of dimensions." + ) + if self.fill_value is None: + raise ValueError("`fill_value` is required.") + self.codecs.validate(self) + @property def dtype(self) -> np.dtype: return self.data_type @@ -288,23 +303,20 @@ def parse_attributes(data: Any) -> Any: def parse_zarr_format_v3(data: Any) -> Literal[3]: if data == 3: return data - msg = f"Invalid value for `zarr_format`, got {data}, expected 3" - raise ValueError(msg) + raise ValueError(f"Invalid value for `zarr_format`. Expected 3. Got {data}.") # todo: move to its own module and drop _v2 suffix def parse_zarr_format_v2(data: Any) -> Literal[2]: if data == 2: return data - msg = f"Invalid value for `zarr_format`, got {data}, expected 2" - raise ValueError(msg) + raise ValueError(f"Invalid value for `zarr_format`. Expected 3. Got {data}.") def parse_node_type_array(data: Any) -> Literal["array"]: if data == "array": return data - msg = f"Invalid value for `node_type`, got {data}, expected 'array'" - raise ValueError(msg) + raise ValueError(f"Invalid value for `node_type`. Expected 'array'. Got {data}.") # todo: real validation @@ -317,24 +329,6 @@ def parse_compressor(data: Any) -> Codec: return data -def parse_v3_metadata(data: ArrayMetadata) -> ArrayMetadata: - if (l_chunks := len(data.chunk_grid.chunk_shape)) != (l_shape := len(data.shape)): - msg = ( - f"The `shape` and `chunk_grid.chunk_shape` attributes " - "must have the same length. " - f"`chunk_grid.chunk_shape` has length {l_chunks}, " - f"but `shape` has length {l_shape}" - ) - raise ValueError(msg) - if data.dimension_names is not None and (l_dimnames := len(data.dimension_names) != l_shape): - msg = ( - f"The `shape` and `dimension_names` attribute must have the same length. " - f"`dimension_names` has length {l_dimnames}" - ) - raise ValueError(msg) - return data - - def parse_v2_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata: if (l_chunks := len(data.chunks)) != (l_shape := len(data.shape)): msg = ( diff --git a/src/zarr/v3/store/memory.py b/src/zarr/v3/store/memory.py index 1370375851..afacfa4321 100644 --- a/src/zarr/v3/store/memory.py +++ b/src/zarr/v3/store/memory.py @@ -49,7 +49,7 @@ async def set( ) -> None: assert isinstance(key, str) if not isinstance(value, (bytes, bytearray, memoryview)): - raise TypeError(f"expected BytesLike, got {type(value)}") + raise TypeError(f"Expected BytesLike. Got {type(value)}.") if byte_range is not None: buf = bytearray(self._store_dict[key]) diff --git a/tests/test_codecs_v3.py b/tests/test_codecs_v3.py index 6b32924847..c966e0c816 100644 --- a/tests/test_codecs_v3.py +++ b/tests/test_codecs_v3.py @@ -845,7 +845,7 @@ async def test_endian_write( def test_invalid_metadata(store: Store): - with pytest.raises(AssertionError): + with pytest.raises(ValueError): Array.create( store / "invalid_chunk_shape", shape=(16, 16, 16), @@ -854,7 +854,7 @@ def test_invalid_metadata(store: Store): fill_value=0, ) - with pytest.raises(AssertionError): + with pytest.raises(ValueError): Array.create( store / "invalid_endian", shape=(16, 16), @@ -880,7 +880,7 @@ def test_invalid_metadata(store: Store): ], ) - with pytest.raises(AssertionError): + with pytest.raises(ValueError): Array.create( store / "invalid_missing_bytes_codec", shape=(16, 16), @@ -892,7 +892,7 @@ def test_invalid_metadata(store: Store): ], ) - with pytest.raises(AssertionError): + with pytest.raises(ValueError): Array.create( store / "invalid_inner_chunk_shape", shape=(16, 16), @@ -903,7 +903,7 @@ def test_invalid_metadata(store: Store): ShardingCodec(chunk_shape=(8,)), ], ) - with pytest.raises(AssertionError): + with pytest.raises(ValueError): Array.create( store / "invalid_inner_chunk_shape", shape=(16, 16),