From 1f4aebd38635c41df390a19c4ba1c41d5a955ae7 Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sun, 19 Jan 2025 15:13:16 +1100 Subject: [PATCH 1/2] refactor!: change `ArraySubset::inbounds` to take another subset rather than a shape --- CHANGELOG.md | 6 +++++ zarrs/src/array/array_async_readable.rs | 4 +-- zarrs/src/array/array_sync_readable.rs | 4 +-- .../chunk_cache/array_chunk_cache_ext_sync.rs | 2 +- zarrs/src/array/codec.rs | 6 ++--- zarrs/src/array_subset.rs | 27 +++++++++++++++---- 6 files changed, 36 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2db1ecfd..39a84d5e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- Add `ArraySubset::inbounds_shape()` (matches the old `ArraySubset::inbounds` behaviour) + +### Changed +- **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape + ## [0.19.1] - 2025-01-19 ### Added diff --git a/zarrs/src/array/array_async_readable.rs b/zarrs/src/array/array_async_readable.rs index 79ca0f72..8139b356 100644 --- a/zarrs/src/array/array_async_readable.rs +++ b/zarrs/src/array/array_async_readable.rs @@ -737,7 +737,7 @@ impl Array { options: &CodecOptions, ) -> Result, ArrayError> { let chunk_representation = self.chunk_array_representation(chunk_indices)?; - if !chunk_subset.inbounds(&chunk_representation.shape_u64()) { + if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) { return Err(ArrayError::InvalidArraySubset( chunk_subset.clone(), self.shape().to_vec(), @@ -783,7 +783,7 @@ impl Array { options: &CodecOptions, ) -> Result<(), ArrayError> { let chunk_representation = self.chunk_array_representation(chunk_indices)?; - if !chunk_subset.inbounds(&chunk_representation.shape_u64()) { + if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) { return Err(ArrayError::InvalidArraySubset( chunk_subset.clone(), self.shape().to_vec(), diff --git a/zarrs/src/array/array_sync_readable.rs b/zarrs/src/array/array_sync_readable.rs index 200e0ba6..a2b8d4b2 100644 --- a/zarrs/src/array/array_sync_readable.rs +++ b/zarrs/src/array/array_sync_readable.rs @@ -794,7 +794,7 @@ impl Array { options: &CodecOptions, ) -> Result, ArrayError> { let chunk_representation = self.chunk_array_representation(chunk_indices)?; - if !chunk_subset.inbounds(&chunk_representation.shape_u64()) { + if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) { return Err(ArrayError::InvalidArraySubset( chunk_subset.clone(), self.shape().to_vec(), @@ -837,7 +837,7 @@ impl Array { options: &CodecOptions, ) -> Result<(), ArrayError> { let chunk_representation = self.chunk_array_representation(chunk_indices)?; - if !chunk_subset.inbounds(&chunk_representation.shape_u64()) { + if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) { return Err(ArrayError::InvalidArraySubset( chunk_subset.clone(), self.shape().to_vec(), diff --git a/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs b/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs index db3f3fee..03cd7036 100644 --- a/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs +++ b/zarrs/src/array/chunk_cache/array_chunk_cache_ext_sync.rs @@ -229,7 +229,7 @@ impl ArrayChunkCacheExt Result, ArrayError> { let chunk_representation = self.chunk_array_representation(chunk_indices)?; - if !chunk_subset.inbounds(&chunk_representation.shape_u64()) { + if !chunk_subset.inbounds_shape(&chunk_representation.shape_u64()) { return Err(ArrayError::InvalidArraySubset( chunk_subset.clone(), self.shape().to_vec(), diff --git a/zarrs/src/array/codec.rs b/zarrs/src/array/codec.rs index 1850b43c..31e7aa43 100644 --- a/zarrs/src/array/codec.rs +++ b/zarrs/src/array/codec.rs @@ -375,7 +375,7 @@ pub trait ArrayPartialDecoderTraits: Any + Send + Sync { output_subset: &ArraySubset, options: &CodecOptions, ) -> Result<(), CodecError> { - debug_assert!(output_subset.inbounds(output_shape)); + debug_assert!(output_subset.inbounds_shape(output_shape)); debug_assert_eq!(array_subset.num_elements(), output_subset.num_elements()); let decoded_value = self .partial_decode(&[array_subset.clone()], options)? @@ -460,7 +460,7 @@ pub trait AsyncArrayPartialDecoderTraits: Any + Send + Sync { output_subset: &ArraySubset, options: &CodecOptions, ) -> Result<(), CodecError> { - debug_assert!(output_subset.inbounds(output_shape)); + debug_assert!(output_subset.inbounds_shape(output_shape)); debug_assert_eq!(array_subset.shape(), output_subset.shape()); let decoded_value = self .partial_decode(&[array_subset.clone()], options) @@ -728,7 +728,7 @@ pub trait ArrayToBytesCodecTraits: ArrayCodecTraits + core::fmt::Debug { output_subset: &ArraySubset, options: &CodecOptions, ) -> Result<(), CodecError> { - debug_assert!(output_subset.inbounds(output_shape)); + debug_assert!(output_subset.inbounds_shape(output_shape)); debug_assert_eq!( decoded_representation.num_elements(), output_subset.num_elements() diff --git a/zarrs/src/array_subset.rs b/zarrs/src/array_subset.rs index ac7b8cde..79730915 100644 --- a/zarrs/src/array_subset.rs +++ b/zarrs/src/array_subset.rs @@ -564,9 +564,26 @@ impl ArraySubset { } } - /// Returns true if the array subset is within the bounds of `array_shape`. + /// Returns true if this array subset is within the bounds of `subset`. #[must_use] - pub fn inbounds(&self, array_shape: &[u64]) -> bool { + pub fn inbounds(&self, subset: &ArraySubset) -> bool { + if self.dimensionality() != subset.dimensionality() { + return false; + } + + for (self_start, self_shape, other_start, other_shape) in + izip!(self.start(), self.shape(), subset.start(), subset.shape()) + { + if self_start < other_start || self_start + self_shape > other_start + other_shape { + return false; + } + } + true + } + + /// Returns true if the array subset is within the bounds of an `ArraySubset` with zero origin and a shape of `array_shape`. + #[must_use] + pub fn inbounds_shape(&self, array_shape: &[u64]) -> bool { if self.dimensionality() != array_shape.len() { return false; } @@ -646,9 +663,9 @@ mod tests { ArraySubset::new_with_ranges(&[0..4, 1..5]) ); assert!(array_subset0.relative_to(&[1, 1, 1]).is_err()); - assert!(array_subset0.inbounds(&[10, 10])); - assert!(!array_subset0.inbounds(&[2, 2])); - assert!(!array_subset0.inbounds(&[10, 10, 10])); + assert!(array_subset0.inbounds_shape(&[10, 10])); + assert!(!array_subset0.inbounds_shape(&[2, 2])); + assert!(!array_subset0.inbounds_shape(&[10, 10, 10])); assert_eq!(array_subset0.to_ranges(), vec![1..5, 2..6]); let array_subset2 = ArraySubset::new_with_ranges(&[3..6, 4..7, 0..1]); From e240dbc1eca3104f2e0a8d58af6ac18653ead15c Mon Sep 17 00:00:00 2001 From: Lachlan Deakin Date: Sun, 19 Jan 2025 15:20:04 +1100 Subject: [PATCH 2/2] refactor!: add new types of `CodecError`s --- CHANGELOG.md | 4 + zarrs/src/array.rs | 5 +- zarrs/src/array/array_bytes.rs | 21 ++--- zarrs/src/array/codec.rs | 85 ++++++++++++++++++- .../codec/array_to_bytes/bytes/bytes_codec.rs | 13 ++- zarrs/src/array/codec/array_to_bytes/vlen.rs | 7 +- .../src/array/codec/array_to_bytes/vlen_v2.rs | 10 +-- .../array/codec/bytes_to_bytes/gdeflate.rs | 16 ++-- 8 files changed, 121 insertions(+), 40 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 39a84d5e..e515451f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] ### Added +- Add `array:codec::{InvalidBytesLengthError,InvalidArrayShapeError,InvalidNumberOfElementsError,SubsetOutOfBoundsError}` - Add `ArraySubset::inbounds_shape()` (matches the old `ArraySubset::inbounds` behaviour) ### Changed +- **Breaking**: `CodecError` enum changes: + - Change `CodecError::UnexpectedChunkDecodedSize` to an `InvalidBytesLengthError` + - Add `CodecError::{InvalidArrayShape,InvalidNumberOfElements,SubsetOutOfBounds}` - **Breaking**: change `ArraySubset::inbounds` to take another subset rather than a shape ## [0.19.1] - 2025-01-19 diff --git a/zarrs/src/array.rs b/zarrs/src/array.rs index c85aff98..c29a9d4c 100644 --- a/zarrs/src/array.rs +++ b/zarrs/src/array.rs @@ -910,10 +910,7 @@ pub fn elements_to_ndarray( ) -> Result, ArrayError> { let length = elements.len(); ndarray::ArrayD::::from_shape_vec(iter_u64_to_usize(shape.iter()), elements).map_err(|_| { - ArrayError::CodecError(codec::CodecError::UnexpectedChunkDecodedSize( - length * size_of::(), - shape.iter().product::() * size_of::() as u64, - )) + ArrayError::CodecError(codec::InvalidArrayShapeError::new(shape.to_vec(), length).into()) }) } diff --git a/zarrs/src/array/array_bytes.rs b/zarrs/src/array/array_bytes.rs index 53c4e7b5..eabe18de 100644 --- a/zarrs/src/array/array_bytes.rs +++ b/zarrs/src/array/array_bytes.rs @@ -10,7 +10,10 @@ use crate::{ metadata::v3::array::data_type::DataTypeSize, }; -use super::{codec::CodecError, ravel_indices, ArraySize, DataType, FillValue}; +use super::{ + codec::{CodecError, InvalidBytesLengthError}, + ravel_indices, ArraySize, DataType, FillValue, +}; /// Array element bytes. /// @@ -217,14 +220,11 @@ impl<'a> ArrayBytes<'a> { } /// Validate fixed length array bytes for a given array size. -fn validate_bytes_flen(bytes: &RawBytes, array_size: u64) -> Result<(), CodecError> { - if bytes.len() as u64 == array_size { +fn validate_bytes_flen(bytes: &RawBytes, array_size: usize) -> Result<(), InvalidBytesLengthError> { + if bytes.len() == array_size { Ok(()) } else { - Err(CodecError::UnexpectedChunkDecodedSize( - bytes.len(), - array_size, - )) + Err(InvalidBytesLengthError::new(bytes.len(), array_size)) } } @@ -259,9 +259,10 @@ fn validate_bytes( data_type_size: DataTypeSize, ) -> Result<(), CodecError> { match (bytes, data_type_size) { - (ArrayBytes::Fixed(bytes), DataTypeSize::Fixed(data_type_size)) => { - validate_bytes_flen(bytes, num_elements * data_type_size as u64) - } + (ArrayBytes::Fixed(bytes), DataTypeSize::Fixed(data_type_size)) => Ok(validate_bytes_flen( + bytes, + usize::try_from(num_elements * data_type_size as u64).unwrap(), + )?), (ArrayBytes::Variable(bytes, offsets), DataTypeSize::Variable) => { validate_bytes_vlen(bytes, offsets, num_elements) } diff --git a/zarrs/src/array/codec.rs b/zarrs/src/array/codec.rs index 31e7aa43..7bcc5e0f 100644 --- a/zarrs/src/array/codec.rs +++ b/zarrs/src/array/codec.rs @@ -15,6 +15,7 @@ pub mod array_to_bytes; pub mod bytes_to_bytes; pub mod options; +use derive_more::derive::Display; pub use options::{CodecOptions, CodecOptionsBuilder}; // Array to array @@ -77,6 +78,7 @@ pub use array_to_array_partial_encoder_default::ArrayToArrayPartialEncoderDefaul mod bytes_partial_encoder_default; pub use bytes_partial_encoder_default::BytesPartialEncoderDefault; +use zarrs_metadata::ArrayShape; use crate::storage::{StoreKeyOffsetValue, WritableStorage}; use crate::{ @@ -962,6 +964,76 @@ impl AsyncBytesPartialDecoderTraits for std::io::Cursor> { } } +/// An error indicating the length of bytes does not match the expected length. +#[derive(Debug, Error, Display)] +#[display("Invalid bytes len {len}, expected {expected_len}")] +pub struct InvalidBytesLengthError { + len: usize, + expected_len: usize, +} + +impl InvalidBytesLengthError { + /// Create a new [`InvalidBytesLengthError`]. + #[must_use] + pub fn new(len: usize, expected_len: usize) -> Self { + Self { len, expected_len } + } +} + +/// An error indicating the shape is not compatible with the expected number of elements. +#[derive(Debug, Error, Display)] +#[display("Invalid shape {shape:?} for number of elements {expected_num_elements}")] +pub struct InvalidArrayShapeError { + shape: ArrayShape, + expected_num_elements: usize, +} + +impl InvalidArrayShapeError { + /// Create a new [`InvalidArrayShapeError`]. + #[must_use] + pub fn new(shape: ArrayShape, expected_num_elements: usize) -> Self { + Self { + shape, + expected_num_elements, + } + } +} + +/// An error indicating the length of elements does not match the expected length. +#[derive(Debug, Error, Display)] +#[display("Invalid number of elements {num}, expected {expected}")] +pub struct InvalidNumberOfElementsError { + num: u64, + expected: u64, +} + +impl InvalidNumberOfElementsError { + /// Create a new [`InvalidNumberOfElementsError`]. + #[must_use] + pub fn new(num: u64, expected: u64) -> Self { + Self { num, expected } + } +} + +/// An array subset is out of bounds. +#[derive(Debug, Error, Display)] +#[display("Subset {subset} is out of bounds of {must_be_within}")] +pub struct SubsetOutOfBoundsError { + subset: ArraySubset, + must_be_within: ArraySubset, +} + +impl SubsetOutOfBoundsError { + /// Create a new [`InvalidNumberOfElementsError`]. + #[must_use] + pub fn new(subset: ArraySubset, must_be_within: ArraySubset) -> Self { + Self { + subset, + must_be_within, + } + } +} + /// A codec error. #[derive(Debug, Error)] pub enum CodecError { @@ -978,8 +1050,8 @@ pub enum CodecError { #[error("the array subset {_0} has the wrong dimensionality, expected {_1}")] InvalidArraySubsetDimensionalityError(ArraySubset, usize), /// The decoded size of a chunk did not match what was expected. - #[error("the size of a decoded chunk is {_0}, expected {_1}")] - UnexpectedChunkDecodedSize(usize, u64), + #[error("the size of a decoded chunk is {}, expected {}", _0.len, _0.expected_len)] + UnexpectedChunkDecodedSize(#[from] InvalidBytesLengthError), /// An embedded checksum does not match the decoded value. #[error("the checksum is invalid")] InvalidChecksum, @@ -1004,6 +1076,15 @@ pub enum CodecError { /// Expected variable length bytes. #[error("Expected variable length array bytes")] ExpectedVariableLengthBytes, + /// Invalid array shape. + #[error(transparent)] + InvalidArrayShape(#[from] InvalidArrayShapeError), + /// Invalid number of elements. + #[error(transparent)] + InvalidNumberOfElements(#[from] InvalidNumberOfElementsError), + /// Subset out of bounds. + #[error(transparent)] + SubsetOutOfBounds(#[from] SubsetOutOfBoundsError), } impl From<&str> for CodecError { diff --git a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs index f6cff4a3..aee68567 100644 --- a/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs +++ b/zarrs/src/array/codec/array_to_bytes/bytes/bytes_codec.rs @@ -8,7 +8,7 @@ use crate::{ ArrayCodecTraits, ArrayPartialDecoderTraits, ArrayPartialEncoderDefault, ArrayPartialEncoderTraits, ArrayToBytesCodecTraits, BytesPartialDecoderTraits, BytesPartialEncoderTraits, CodecError, CodecOptions, CodecTraits, - RecommendedConcurrency, + InvalidBytesLengthError, RecommendedConcurrency, }, ArrayBytes, ArrayMetadataOptions, BytesRepresentation, ChunkRepresentation, DataTypeSize, RawBytes, @@ -77,12 +77,11 @@ impl BytesCodec { )); } DataTypeSize::Fixed(data_type_size) => { - let array_size = decoded_representation.num_elements() * data_type_size as u64; - if value.len() as u64 != array_size { - return Err(CodecError::UnexpectedChunkDecodedSize( - value.len(), - array_size, - )); + let array_size = + usize::try_from(decoded_representation.num_elements() * data_type_size as u64) + .unwrap(); + if value.len() != array_size { + return Err(InvalidBytesLengthError::new(value.len(), array_size).into()); } else if data_type_size > 1 && self.endian.is_none() { return Err(CodecError::Other(format!( "tried to encode an array with element size {data_type_size} with endianness None" diff --git a/zarrs/src/array/codec/array_to_bytes/vlen.rs b/zarrs/src/array/codec/array_to_bytes/vlen.rs index 5873fe7a..b246c08e 100644 --- a/zarrs/src/array/codec/array_to_bytes/vlen.rs +++ b/zarrs/src/array/codec/array_to_bytes/vlen.rs @@ -13,7 +13,7 @@ pub use crate::metadata::v3::array::codec::vlen::{ }; use crate::{ array::{ - codec::{ArrayToBytesCodecTraits, CodecError, CodecOptions}, + codec::{ArrayToBytesCodecTraits, CodecError, CodecOptions, InvalidBytesLengthError}, convert_from_bytes_slice, ChunkRepresentation, CodecChain, DataType, Endianness, FillValue, RawBytes, }, @@ -62,10 +62,7 @@ fn get_vlen_bytes_and_offsets( ) -> Result<(Vec, Vec), CodecError> { // Get the index length and data start if bytes.len() < size_of::() { - return Err(CodecError::UnexpectedChunkDecodedSize( - bytes.len(), - size_of::() as u64, - )); + return Err(InvalidBytesLengthError::new(bytes.len(), size_of::()).into()); } let index_len = u64::from_le_bytes(bytes[0..size_of::()].try_into().unwrap()); let index_len = usize::try_from(index_len) diff --git a/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs b/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs index 856d7566..e22a0d28 100644 --- a/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs +++ b/zarrs/src/array/codec/array_to_bytes/vlen_v2.rs @@ -11,7 +11,10 @@ use std::sync::Arc; pub(crate) const IDENTIFIER: &str = "vlen_v2"; // pub use vlen_v2::IDENTIFIER; -use crate::array::{codec::CodecError, RawBytes}; +use crate::array::{ + codec::{CodecError, InvalidBytesLengthError}, + RawBytes, +}; pub(crate) use vlen_v2_codec::VlenV2Codec; @@ -67,10 +70,7 @@ fn get_interleaved_bytes_and_offsets( // Validate the bytes is long enough to contain header and element lengths let header_length = size_of::() * (1 + num_elements); if bytes.len() < header_length { - return Err(CodecError::UnexpectedChunkDecodedSize( - bytes.len(), - header_length as u64, - )); + return Err(InvalidBytesLengthError::new(bytes.len(), header_length).into()); } // Validate the number of elements from the header diff --git a/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs b/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs index ac74783e..13ade3f2 100644 --- a/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs +++ b/zarrs/src/array/codec/bytes_to_bytes/gdeflate.rs @@ -28,7 +28,7 @@ pub use gdeflate_codec::GDeflateCodec; use crate::{ array::{ - codec::{Codec, CodecError, CodecPlugin}, + codec::{Codec, CodecError, CodecPlugin, InvalidBytesLengthError}, RawBytes, }, metadata::v3::{array::codec::gdeflate, MetadataV3}, @@ -61,10 +61,11 @@ const GDEFLATE_STATIC_HEADER_LENGTH: usize = 2 * size_of::(); fn gdeflate_decode(encoded_value: &RawBytes<'_>) -> Result, CodecError> { if encoded_value.len() < GDEFLATE_STATIC_HEADER_LENGTH { - return Err(CodecError::UnexpectedChunkDecodedSize( + return Err(InvalidBytesLengthError::new( encoded_value.len(), - GDEFLATE_STATIC_HEADER_LENGTH as u64, - )); + GDEFLATE_STATIC_HEADER_LENGTH, + ) + .into()); } // Decode the static header @@ -77,10 +78,11 @@ fn gdeflate_decode(encoded_value: &RawBytes<'_>) -> Result, CodecError> // Check length of dynamic header let dynamic_header_length = num_pages * size_of::(); if encoded_value.len() < GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length { - return Err(CodecError::UnexpectedChunkDecodedSize( + return Err(InvalidBytesLengthError::new( encoded_value.len(), - (GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length) as u64, - )); + GDEFLATE_STATIC_HEADER_LENGTH + dynamic_header_length, + ) + .into()); } // Decode the pages