diff --git a/CHANGELOG.md b/CHANGELOG.md index 39a84d5e..4bed6036 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,10 +8,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Add `array:codec::{InvalidBytesLengthError,InvalidArrayShapeError,InvalidNumberOfElementsError,SubsetOutOfBoundsError}` - Add `ArraySubset::inbounds_shape()` (matches the old `ArraySubset::inbounds` behaviour) ### Changed - **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape +- **Breaking**: `CodecError` enum changes: + - Change `CodecError::UnexpectedChunkDecodedSize` to an `InvalidBytesLengthError` + - Add `CodecError::{InvalidArrayShape,InvalidNumberOfElements,SubsetOutOfBounds}` ## [0.19.1] - 2025-01-19 diff --git a/zarrs/src/array.rs b/zarrs/src/array.rs index e810b4dd..4d595709 100644 --- a/zarrs/src/array.rs +++ b/zarrs/src/array.rs @@ -910,10 +910,7 @@ pub fn elements_to_ndarray( ) -> Result, ArrayError> { let length = elements.len(); ndarray::ArrayD::::from_shape_vec(iter_u64_to_usize(shape.iter()), elements).map_err(|_| { - ArrayError::CodecError(codec::CodecError::UnexpectedChunkDecodedSize( - length * size_of::(), - shape.iter().product::() * size_of::() as u64, - )) + ArrayError::CodecError(codec::InvalidArrayShapeError::new(shape.to_vec(), length).into()) }) } diff --git a/zarrs/src/array/array_bytes.rs b/zarrs/src/array/array_bytes.rs index 53c4e7b5..eabe18de 100644 --- a/zarrs/src/array/array_bytes.rs +++ b/zarrs/src/array/array_bytes.rs @@ -10,7 +10,10 @@ use crate::{ metadata::v3::array::data_type::DataTypeSize, }; -use super::{codec::CodecError, ravel_indices, ArraySize, DataType, FillValue}; +use super::{ + codec::{CodecError, InvalidBytesLengthError}, + ravel_indices, ArraySize, DataType, FillValue, +}; /// Array element bytes. /// @@ -217,14 +220,11 @@ impl<'a> ArrayBytes<'a> { } /// Validate fixed length array bytes for a given array size. -fn validate_bytes_flen(bytes: &RawBytes, array_size: u64) -> Result<(), CodecError> { - if bytes.len() as u64 == array_size { +fn validate_bytes_flen(bytes: &RawBytes, array_size: usize) -> Result<(), InvalidBytesLengthError> { + if bytes.len() == array_size { Ok(()) } else { - Err(CodecError::UnexpectedChunkDecodedSize( - bytes.len(), - array_size, - )) + Err(InvalidBytesLengthError::new(bytes.len(), array_size)) } } @@ -259,9 +259,10 @@ fn validate_bytes( data_type_size: DataTypeSize, ) -> Result<(), CodecError> { match (bytes, data_type_size) { - (ArrayBytes::Fixed(bytes), DataTypeSize::Fixed(data_type_size)) => { - validate_bytes_flen(bytes, num_elements * data_type_size as u64) - } + (ArrayBytes::Fixed(bytes), DataTypeSize::Fixed(data_type_size)) => Ok(validate_bytes_flen( + bytes, + usize::try_from(num_elements * data_type_size as u64).unwrap(), + )?), (ArrayBytes::Variable(bytes, offsets), DataTypeSize::Variable) => { validate_bytes_vlen(bytes, offsets, num_elements) } diff --git a/zarrs/src/array/codec.rs b/zarrs/src/array/codec.rs index e911ec70..1eff5539 100644 --- a/zarrs/src/array/codec.rs +++ b/zarrs/src/array/codec.rs @@ -16,6 +16,7 @@ pub mod bytes_to_bytes; pub mod metadata_options; pub mod options; +use derive_more::derive::Display; pub use metadata_options::CodecMetadataOptions; pub use options::{CodecOptions, CodecOptionsBuilder}; @@ -79,6 +80,7 @@ pub use array_to_array_partial_encoder_default::ArrayToArrayPartialEncoderDefaul mod bytes_partial_encoder_default; pub use bytes_partial_encoder_default::BytesPartialEncoderDefault; +use zarrs_metadata::ArrayShape; use crate::storage::{StoreKeyOffsetValue, WritableStorage}; use crate::{ @@ -964,6 +966,76 @@ impl AsyncBytesPartialDecoderTraits for std::io::Cursor> { } } +/// An error indicating the length of bytes does not match the expected length. +#[derive(Debug, Error, Display)] +#[display("Invalid bytes len {len}, expected {expected_len}")] +pub struct InvalidBytesLengthError { + len: usize, + expected_len: usize, +} + +impl InvalidBytesLengthError { + /// Create a new [`InvalidBytesLengthError`]. + #[must_use] + pub fn new(len: usize, expected_len: usize) -> Self { + Self { len, expected_len } + } +} + +/// An error indicating the shape is not compatible with the expected number of elements. +#[derive(Debug, Error, Display)] +#[display("Invalid shape {shape:?} for number of elements {expected_num_elements}")] +pub struct InvalidArrayShapeError { + shape: ArrayShape, + expected_num_elements: usize, +} + +impl InvalidArrayShapeError { + /// Create a new [`InvalidArrayShapeError`]. + #[must_use] + pub fn new(shape: ArrayShape, expected_num_elements: usize) -> Self { + Self { + shape, + expected_num_elements, + } + } +} + +/// An error indicating the length of elements does not match the expected length. +#[derive(Debug, Error, Display)] +#[display("Invalid number of elements {num}, expected {expected}")] +pub struct InvalidNumberOfElementsError { + num: u64, + expected: u64, +} + +impl InvalidNumberOfElementsError { + /// Create a new [`InvalidNumberOfElementsError`]. + #[must_use] + pub fn new(num: u64, expected: u64) -> Self { + Self { num, expected } + } +} + +/// An array subset is out of bounds. +#[derive(Debug, Error, Display)] +#[display("Subset {subset} is out of bounds of {must_be_within}")] +pub struct SubsetOutOfBoundsError { + subset: ArraySubset, + must_be_within: ArraySubset, +} + +impl SubsetOutOfBoundsError { + /// Create a new [`InvalidNumberOfElementsError`]. + #[must_use] + pub fn new(subset: ArraySubset, must_be_within: ArraySubset) -> Self { + Self { + subset, + must_be_within, + } + } +} + /// A codec error. #[derive(Debug, Error)] pub enum CodecError { @@ -980,8 +1052,8 @@ pub enum CodecError { #[error("the array subset {_0} has the wrong dimensionality, expected {_1}")] InvalidArraySubsetDimensionalityError(ArraySubset, usize), /// The decoded size of a chunk did not match what was expected. - #[error("the size of a decoded chunk is {_0}, expected {_1}")] - UnexpectedChunkDecodedSize(usize, u64), + #[error("the size of a decoded chunk is {}, expected {}", _0.len, _0.expected_len)] + UnexpectedChunkDecodedSize(#[from] InvalidBytesLengthError), /// An embedded checksum does not match the decoded value. #[error("the checksum is invalid")] InvalidChecksum, @@ -1006,6 +1078,15 @@ pub enum CodecError { /// Expected variable length bytes. #[error("Expected variable length array bytes")] ExpectedVariableLengthBytes, + /// Invalid array shape. + #[error(transparent)] + InvalidArrayShape(#[from] InvalidArrayShapeError), + /// Invalid number of elements. + #[error(transparent)] + InvalidNumberOfElements(#[from] InvalidNumberOfElementsError), + /// Subset out of bounds. + #[error(transparent)] + SubsetOutOfBounds(#[from] SubsetOutOfBoundsError), } impl From<&str> for CodecError { diff --git a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs index ce379a00..a199d326 100644 --- a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs +++ b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs @@ -8,7 +8,7 @@ use crate::{ ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault, ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits, BytesPartialEncoderTraits, CodecError, CodecMetadataOptions, CodecOptions, CodecTraits, - RecommendedConcurrency, + InvalidBytesLengthError, RecommendedConcurrency, }, ArrayBytes, BytesRepresentation, ChunkRepresentation, DataTypeSize, RawBytes, }, @@ -76,12 +76,11 @@ impl BytesCodec { )); } DataTypeSize::Fixed(data_type_size) => { - let array_size = decoded_representation.num_elements() * data_type_size as u64; - if value.len() as u64 != array_size { - return Err(CodecError::UnexpectedChunkDecodedSize( - value.len(), - array_size, - )); + let array_size = + usize::try_from(decoded_representation.num_elements() * data_type_size as u64) + .unwrap(); + if value.len() != array_size { + return Err(InvalidBytesLengthError::new(value.len(), array_size).into()); } else if data_type_size > 1 && self.endian.is_none() { return Err(CodecError::Other(format!( "tried to encode an array with element size {data_type_size} with endianness None" diff --git a/zarrs/src/array/codec/array_to_bytes/vlen.rs b/zarrs/src/array/codec/array_to_bytes/vlen.rs index 5873fe7a..b246c08e 100644 --- a/zarrs/src/array/codec/array_to_bytes/vlen.rs +++ b/zarrs/src/array/codec/array_to_bytes/vlen.rs @@ -13,7 +13,7 @@ pub use crate::metadata::v3::array::codec::vlen::{ }; use crate::{ array::{ - codec::{ArrayToBytesCodecTraits, CodecError, CodecOptions}, + codec::{ArrayToBytesCodecTraits, CodecError, CodecOptions, InvalidBytesLengthError}, convert_from_bytes_slice, ChunkRepresentation, CodecChain, DataType, Endianness, FillValue, RawBytes, }, @@ -62,10 +62,7 @@ fn get_vlen_bytes_and_offsets( ) -> Result<(Vec, Vec), CodecError> { // Get the index length and data start if bytes.len() < size_of::() { - return Err(CodecError::UnexpectedChunkDecodedSize( - bytes.len(), - size_of::() as u64, - )); + return Err(InvalidBytesLengthError::new(bytes.len(), size_of::()).into()); } let index_len = u64::from_le_bytes(bytes[0..size_of::()].try_into().unwrap()); let index_len = usize::try_from(index_len) diff --git a/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs b/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs index 856d7566..e22a0d28 100644 --- a/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs +++ b/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs @@ -11,7 +11,10 @@ use std::sync::Arc; pub(crate) const IDENTIFIER: &str = "vlen_v2"; // pub use vlen_v2::IDENTIFIER; -use crate::array::{codec::CodecError, RawBytes}; +use crate::array::{ + codec::{CodecError, InvalidBytesLengthError}, + RawBytes, +}; pub(crate) use vlen_v2_codec::VlenV2Codec; @@ -67,10 +70,7 @@ fn get_interleaved_bytes_and_offsets( // Validate the bytes is long enough to contain header and element lengths let header_length = size_of::() * (1 + num_elements); if bytes.len() < header_length { - return Err(CodecError::UnexpectedChunkDecodedSize( - bytes.len(), - header_length as u64, - )); + return Err(InvalidBytesLengthError::new(bytes.len(), header_length).into()); } // Validate the number of elements from the header diff --git a/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs b/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs index ac74783e..13ade3f2 100644 --- a/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs +++ b/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs @@ -28,7 +28,7 @@ pub use gdeflate_codec::GDeflateCodec; use crate::{ array::{ - codec::{Codec, CodecError, CodecPlugin}, + codec::{Codec, CodecError, CodecPlugin, InvalidBytesLengthError}, RawBytes, }, metadata::v3::{array::codec::gdeflate, MetadataV3}, @@ -61,10 +61,11 @@ const GDEFLATE_STATIC_HEADER_LENGTH: usize = 2 * size_of::(); fn gdeflate_decode(encoded_value: &RawBytes<'_>) -> Result, CodecError> { if encoded_value.len() < GDEFLATE_STATIC_HEADER_LENGTH { - return Err(CodecError::UnexpectedChunkDecodedSize( + return Err(InvalidBytesLengthError::new( encoded_value.len(), - GDEFLATE_STATIC_HEADER_LENGTH as u64, - )); + GDEFLATE_STATIC_HEADER_LENGTH, + ) + .into()); } // Decode the static header @@ -77,10 +78,11 @@ fn gdeflate_decode(encoded_value: &RawBytes<'_>) -> Result, CodecError> // Check length of dynamic header let dynamic_header_length = num_pages * size_of::(); if encoded_value.len() < GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length { - return Err(CodecError::UnexpectedChunkDecodedSize( + return Err(InvalidBytesLengthError::new( encoded_value.len(), - (GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length) as u64, - )); + GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length, + ) + .into()); } // Decode the pages