Skip to content

Commit

Permalink
cleanup error messages
Browse files Browse the repository at this point in the history
  • Loading branch information
normanrz committed Feb 9, 2024
1 parent abea690 commit 45adb5e
Show file tree
Hide file tree
Showing 14 changed files with 91 additions and 114 deletions.
15 changes: 0 additions & 15 deletions src/zarr/v3/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,8 +136,6 @@ def from_dict(
async_array = cls(
metadata=metadata, store_path=store_path, runtime_configuration=runtime_configuration
)
# todo: remove this, pushing the logic down to the array metadata creation
async_array._validate_metadata()
return async_array

@classmethod
Expand Down Expand Up @@ -223,20 +221,8 @@ async def getitem(self, selection: Selection):
return out[()]

async def _save_metadata(self) -> None:
self._validate_metadata()

await (self.store_path / ZARR_JSON).set(self.metadata.to_bytes())

def _validate_metadata(self) -> None:
assert len(self.metadata.shape) == len(
self.metadata.chunk_grid.chunk_shape
), "`chunk_shape` and `shape` need to have the same number of dimensions."
assert self.metadata.dimension_names is None or len(self.metadata.shape) == len(
self.metadata.dimension_names
), "`dimension_names` and `shape` need to have the same number of dimensions."
assert self.metadata.fill_value is not None, "`fill_value` is required."
self.codecs.validate(self.metadata)

async def _read_chunk(
self,
chunk_coords: ChunkCoords,
Expand Down Expand Up @@ -477,7 +463,6 @@ def open(
AsyncArray.open(store, runtime_configuration=runtime_configuration),
runtime_configuration.asyncio_loop,
)
async_array._validate_metadata()
return cls(async_array)

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/v3/chunk_grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def from_dict(cls, data: Dict[str, JSON]) -> Self:
return data
if data["name"] == "regular":
return RegularChunkGrid.from_dict(data)
raise ValueError(f"Unknown chunk grid, got {data['name']}")
raise ValueError(f"Unknown chunk grid. Got {data['name']}.")


@dataclass(frozen=True)
Expand Down
2 changes: 1 addition & 1 deletion src/zarr/v3/chunk_key_encodings.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def from_dict(cls, data: Dict[str, JSON]) -> Self:
return DefaultChunkKeyEncoding(**data["configuration"])
if data["name"] == "v2":
return V2ChunkKeyEncoding(**data["configuration"])
raise ValueError(f"Unknown chunk key encoding, got {data['name']}")
raise ValueError(f"Unknown chunk key encoding. Got {data['name']}.")

def to_dict(self) -> Dict[str, JSON]:
return {"name": self.name, "configuration": {"separator": self.separator}}
Expand Down
24 changes: 12 additions & 12 deletions src/zarr/v3/codecs/blosc.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,28 +51,26 @@ class BloscCname(Enum):

def parse_typesize(data: JSON) -> int:
if isinstance(data, int):
if data >= 0:
if data > 0:
return data
else:
msg = f"Value must be greater than or equal to 0. Got {data}, which is less than 0."
raise ValueError(msg)
msg = f"Value must be an int. Got {type(data)} instead."
raise TypeError(msg)
raise ValueError(
f"Value must be greater than 0. Got {data}, which is less or equal to 0."
)
raise TypeError(f"Value must be an int. Got {type(data)} instead.")


# todo: real validation
def parse_clevel(data: JSON) -> int:
if isinstance(data, int):
return data
msg = f"Value should be an int, got {type(data)} instead"
raise TypeError(msg)
raise TypeError(f"Value should be an int. Got {type(data)} instead.")


def parse_blocksize(data: JSON) -> int:
if isinstance(data, int):
return data
msg = f"Value should be an int, got {type(data)} instead"
raise TypeError(msg)
raise TypeError(f"Value should be an int. Got {type(data)} instead.")


@dataclass(frozen=True)
Expand All @@ -88,13 +86,13 @@ class BloscCodec(BytesBytesCodec):
def __init__(
self,
*,
typesize,
typesize=None,
cname=BloscCname.zstd,
clevel=5,
shuffle=BloscShuffle.noshuffle,
blocksize=0,
) -> None:
typesize_parsed = parse_typesize(typesize)
typesize_parsed = parse_typesize(typesize) if typesize is not None else None
cname_parsed = parse_enum(cname, BloscCname)
clevel_parsed = parse_clevel(clevel)
shuffle_parsed = parse_enum(shuffle, BloscShuffle)
Expand All @@ -112,6 +110,8 @@ def from_dict(cls, data: Dict[str, JSON]) -> Self:
return cls(**data["configuration"])

def to_dict(self) -> Dict[str, JSON]:
if self.typesize is None:
raise ValueError("`typesize` needs to be set for serialization.")
return {
"name": "blosc",
"configuration": {
Expand All @@ -125,7 +125,7 @@ def to_dict(self) -> Dict[str, JSON]:

def evolve(self, array_spec: ArraySpec) -> Self:
new_codec = self
if new_codec.typesize == 0:
if new_codec.typesize is None:
new_codec = replace(new_codec, typesize=array_spec.dtype.itemsize)

return new_codec
Expand Down
8 changes: 7 additions & 1 deletion src/zarr/v3/codecs/crc32c_.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,13 @@ async def decode(
crc32_bytes = chunk_bytes[-4:]
inner_bytes = chunk_bytes[:-4]

assert np.uint32(crc32c(inner_bytes)).tobytes() == bytes(crc32_bytes)
computed_checksum = np.uint32(crc32c(inner_bytes)).tobytes()
stored_checksum = bytes(crc32_bytes)
if computed_checksum != stored_checksum:
raise ValueError(
"Stored and computed checksum do not match. "
+ f"Stored: {stored_checksum}. Computed: {computed_checksum}."
)
return inner_bytes

async def encode(
Expand Down
51 changes: 23 additions & 28 deletions src/zarr/v3/codecs/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,38 +50,33 @@ def evolve(self, array_spec: ArraySpec) -> Self:
def from_list(cls, codecs: List[Codec]) -> CodecPipeline:
from zarr.v3.codecs.sharding import ShardingCodec

assert any(
isinstance(codec, ArrayBytesCodec) for codec in codecs
), "Exactly one array-to-bytes codec is required."
if not any(isinstance(codec, ArrayBytesCodec) for codec in codecs):
raise ValueError("Exactly one array-to-bytes codec is required.")

prev_codec: Optional[Codec] = None
for codec in codecs:
if prev_codec is not None:
assert not isinstance(codec, ArrayBytesCodec) or not isinstance(
prev_codec, ArrayBytesCodec
), (
f"ArrayBytesCodec '{type(codec)}' cannot follow after "
+ f"ArrayBytesCodec '{type(prev_codec)}' because exactly "
+ "1 ArrayBytesCodec is allowed."
)
assert not isinstance(codec, ArrayBytesCodec) or not isinstance(
prev_codec, BytesBytesCodec
), (
f"ArrayBytesCodec '{type(codec)}' cannot follow after "
+ f"BytesBytesCodec '{type(prev_codec)}'."
)
assert not isinstance(codec, ArrayArrayCodec) or not isinstance(
prev_codec, ArrayBytesCodec
), (
f"ArrayArrayCodec '{type(codec)}' cannot follow after "
+ f"ArrayBytesCodec '{type(prev_codec)}'."
)
assert not isinstance(codec, ArrayArrayCodec) or not isinstance(
prev_codec, BytesBytesCodec
), (
f"ArrayArrayCodec '{type(codec)}' cannot follow after "
+ f"BytesBytesCodec '{type(prev_codec)}'."
)
if isinstance(codec, ArrayBytesCodec) and isinstance(prev_codec, ArrayBytesCodec):
raise ValueError(
f"ArrayBytesCodec '{type(codec)}' cannot follow after "
+ f"ArrayBytesCodec '{type(prev_codec)}' because exactly "
+ "1 ArrayBytesCodec is allowed."
)
if isinstance(codec, ArrayBytesCodec) and isinstance(prev_codec, BytesBytesCodec):
raise ValueError(
f"ArrayBytesCodec '{type(codec)}' cannot follow after "
+ f"BytesBytesCodec '{type(prev_codec)}'."
)
if isinstance(codec, ArrayArrayCodec) and isinstance(prev_codec, ArrayBytesCodec):
raise ValueError(
f"ArrayArrayCodec '{type(codec)}' cannot follow after "
+ f"ArrayBytesCodec '{type(prev_codec)}'."
)
if isinstance(codec, ArrayArrayCodec) and isinstance(prev_codec, BytesBytesCodec):
raise ValueError(
f"ArrayArrayCodec '{type(codec)}' cannot follow after "
+ f"BytesBytesCodec '{type(prev_codec)}'."
)
prev_codec = codec

if any(isinstance(codec, ShardingCodec) for codec in codecs) and len(codecs) > 1:
Expand Down
25 changes: 13 additions & 12 deletions src/zarr/v3/codecs/sharding.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,23 +279,24 @@ def to_dict(self) -> Dict[str, JSON]:
}

def validate(self, array_metadata: ArrayMetadata) -> None:
assert len(self.chunk_shape) == array_metadata.ndim, (
"The shard's `chunk_shape` and array's `shape` need to have the "
+ "same number of dimensions."
)
assert isinstance(
array_metadata.chunk_grid, RegularChunkGrid
), "Sharding is only compatible with regular chunk grids."
assert all(
if len(self.chunk_shape) != array_metadata.ndim:
raise ValueError(
"The shard's `chunk_shape` and array's `shape` need to have the "
+ "same number of dimensions."
)
if not isinstance(array_metadata.chunk_grid, RegularChunkGrid):
raise ValueError("Sharding is only compatible with regular chunk grids.")
if not all(
s % c == 0
for s, c in zip(
array_metadata.chunk_grid.chunk_shape,
self.chunk_shape,
)
), (
"The array's `chunk_shape` needs to be divisible by the "
+ "shard's inner `chunk_shape`."
)
):
raise ValueError(
"The array's `chunk_shape` needs to be divisible by the "
+ "shard's inner `chunk_shape`."
)

async def decode(
self,
Expand Down
6 changes: 3 additions & 3 deletions src/zarr/v3/codecs/transpose.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,16 +51,16 @@ def evolve(self, array_spec: ArraySpec) -> Self:
if len(self.order) != array_spec.ndim:
raise ValueError(
"The `order` tuple needs have as many entries as "
+ f"there are dimensions in the array. Got: {self.order}"
+ f"there are dimensions in the array. Got {self.order}."
)
if len(self.order) != len(set(self.order)):
raise ValueError(
"There must not be duplicates in the `order` tuple. " + f"Got: {self.order}"
f"There must not be duplicates in the `order` tuple. Got {self.order}."
)
if not all(0 <= x < array_spec.ndim for x in self.order):
raise ValueError(
"All entries in the `order` tuple must be between 0 and "
+ f"the number of dimensions in the array. Got: {self.order}"
+ f"the number of dimensions in the array. Got {self.order}."
)
order = tuple(self.order)

Expand Down
9 changes: 3 additions & 6 deletions src/zarr/v3/codecs/zstd.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,15 @@
def parse_zstd_level(data: JSON) -> int:
if isinstance(data, int):
if data >= 23:
msg = f"Value must be less than or equal to 22. Got {data} instead."
raise ValueError(msg)
raise ValueError(f"Value must be less than or equal to 22. Got {data} instead.")
return data
msg = f"Got value with type {type(data)}, but expected an int"
raise TypeError(msg)
raise TypeError(f"Got value with type {type(data)}, but expected an int.")


def parse_checksum(data: JSON) -> bool:
if isinstance(data, bool):
return data
msg = f"Expected bool, got {type(data)}"
raise TypeError(msg)
raise TypeError(f"Expected bool. Got {type(data)}.")


@dataclass(frozen=True)
Expand Down
5 changes: 2 additions & 3 deletions src/zarr/v3/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,7 @@ def parse_enum(data: JSON, cls: Type[E]) -> E:
return data
if data in enum_names(cls):
return cls(data)
msg = f"Value must be one of {repr(list(enum_names(cls)))}, got {data} instead."
raise ValueError(msg)
raise ValueError(f"Value must be one of {repr(list(enum_names(cls)))}. Got {data} instead.")


class NamedConfig(Protocol):
Expand Down Expand Up @@ -125,7 +124,7 @@ def ndim(self) -> int:
def parse_name(data: JSON, expected: str) -> str:
if data == expected:
return data
raise ValueError(f"Expected '{expected}' chunk, got {data} instead.")
raise ValueError(f"Expected '{expected}' chunk. Got {data} instead.")


def parse_shapelike(data: Any) -> Tuple[int, ...]:
Expand Down
4 changes: 2 additions & 2 deletions src/zarr/v3/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
def parse_zarr_format(data: Any) -> Literal[2, 3]:
if data in (2, 3):
return data
msg = msg = f"Invalid zarr_format: got {data}, expected one of (2,3)"
msg = msg = f"Invalid zarr_format. Expected one 2 or 3. Got {data}."
raise ValueError(msg)


Expand All @@ -30,7 +30,7 @@ def parse_attributes(data: Any) -> Dict[str, Any]:
return {}
elif isinstance(data, dict) and all(map(lambda v: isinstance(v, str), data.keys())):
return data
msg = f"Expected dict with string keys, got {type(data)} instead."
msg = f"Expected dict with string keys. Got {type(data)} instead."
raise TypeError(msg)


Expand Down
42 changes: 18 additions & 24 deletions src/zarr/v3/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,21 @@ def __init__(
object.__setattr__(self, "fill_value", fill_value_parsed)
object.__setattr__(self, "attributes", attributes_parsed)

self._validate_metadata()

def _validate_metadata(self) -> None:
if len(self.shape) != len(self.chunk_grid.chunk_shape):
raise ValueError(
"`chunk_shape` and `shape` need to have the same number of dimensions."
)
if self.dimension_names is not None and len(self.shape) != len(self.dimension_names):
raise ValueError(
"`dimension_names` and `shape` need to have the same number of dimensions."
)
if self.fill_value is None:
raise ValueError("`fill_value` is required.")
self.codecs.validate(self)

@property
def dtype(self) -> np.dtype:
return self.data_type
Expand Down Expand Up @@ -288,23 +303,20 @@ def parse_attributes(data: Any) -> Any:
def parse_zarr_format_v3(data: Any) -> Literal[3]:
if data == 3:
return data
msg = f"Invalid value for `zarr_format`, got {data}, expected 3"
raise ValueError(msg)
raise ValueError(f"Invalid value for `zarr_format`. Expected 3. Got {data}.")


# todo: move to its own module and drop _v2 suffix
def parse_zarr_format_v2(data: Any) -> Literal[2]:
if data == 2:
return data
msg = f"Invalid value for `zarr_format`, got {data}, expected 2"
raise ValueError(msg)
raise ValueError(f"Invalid value for `zarr_format`. Expected 3. Got {data}.")


def parse_node_type_array(data: Any) -> Literal["array"]:
if data == "array":
return data
msg = f"Invalid value for `node_type`, got {data}, expected 'array'"
raise ValueError(msg)
raise ValueError(f"Invalid value for `node_type`. Expected 'array'. Got {data}.")


# todo: real validation
Expand All @@ -317,24 +329,6 @@ def parse_compressor(data: Any) -> Codec:
return data


def parse_v3_metadata(data: ArrayMetadata) -> ArrayMetadata:
if (l_chunks := len(data.chunk_grid.chunk_shape)) != (l_shape := len(data.shape)):
msg = (
f"The `shape` and `chunk_grid.chunk_shape` attributes "
"must have the same length. "
f"`chunk_grid.chunk_shape` has length {l_chunks}, "
f"but `shape` has length {l_shape}"
)
raise ValueError(msg)
if data.dimension_names is not None and (l_dimnames := len(data.dimension_names) != l_shape):
msg = (
f"The `shape` and `dimension_names` attribute must have the same length. "
f"`dimension_names` has length {l_dimnames}"
)
raise ValueError(msg)
return data


def parse_v2_metadata(data: ArrayV2Metadata) -> ArrayV2Metadata:
if (l_chunks := len(data.chunks)) != (l_shape := len(data.shape)):
msg = (
Expand Down
Loading

0 comments on commit 45adb5e

Please sign in to comment.