diff --git a/doc/correctness_issues.md b/doc/correctness_issues.md index 87e12a8c..889d1882 100644 --- a/doc/correctness_issues.md +++ b/doc/correctness_issues.md @@ -1,4 +1,8 @@ ## Correctness Issues with Past Versions +- Prior to `zarrs_metadata` [v0.3.5](https://github.com/LDeakin/zarrs/releases/tag/zarrs_metadata-v0.3.5) (`zarrs` <= 0.19), it was possible for a user to create non-conformant Zarr V2 metadata with `filters: []` + - Empty filters now always correctly serialise to `null` + - `zarrs` will indefinitely support reading Zarr V2 data with `filters: []` + - `zarr-python` shared this bug (see https://github.com/zarr-developers/zarr-python/issues/2842) - Prior to zarrs [v0.11.5](https://github.com/LDeakin/zarrs/releases/tag/v0.11.5), arrays that used the `crc32c` codec have invalid chunk checksums - These arrays will fail to be read by Zarr implementations if they validate checksums - These arrays can be read by zarrs if the [validate checksums](crate::config::Config#validate-checksums) global configuration option is disabled or the relevant codec option is set explicitly diff --git a/zarrs_metadata/CHANGELOG.md b/zarrs_metadata/CHANGELOG.md index 26c9a3c6..2f7ca7c5 100644 --- a/zarrs_metadata/CHANGELOG.md +++ b/zarrs_metadata/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed +- Ensure that Zarr V2 array metadata with empty `filters` is serialised as `null` instead of `[]` + ## [0.3.4] - 2025-02-13 ### Added diff --git a/zarrs_metadata/src/v2/array.rs b/zarrs_metadata/src/v2/array.rs index c679e5fb..5d5daacb 100644 --- a/zarrs_metadata/src/v2/array.rs +++ b/zarrs_metadata/src/v2/array.rs @@ -1,5 +1,5 @@ use derive_more::{derive::From, Display}; -use serde::{Deserialize, Serialize}; +use serde::{Deserialize, Serialize, Serializer}; use thiserror::Error; use crate::{ @@ -77,7 +77,7 @@ pub struct ArrayMetadataV2 { /// Either ā€œCā€ or ā€œFā€, defining the layout of bytes within each chunk of the array. pub order: ArrayMetadataV2Order, /// A list of JSON objects providing codec configurations, or null if no filters are to be applied. - #[serde(default)] + #[serde(default, serialize_with = "serialize_v2_filters")] pub filters: Option>, /// If present, either the string "." or "/" defining the separator placed between the dimensions of a chunk. #[serde(default = "chunk_key_separator_default_zarr_v2")] @@ -92,6 +92,21 @@ pub struct ArrayMetadataV2 { pub additional_fields: AdditionalFields, } +#[allow(clippy::ref_option)] +fn serialize_v2_filters( + filters: &Option>, + serializer: S, +) -> Result +where + S: Serializer, +{ + let filters = filters.as_ref().filter(|v| !v.is_empty()); + match filters { + Some(filters) => serializer.collect_seq(filters), + None => serializer.serialize_none(), + } +} + impl ArrayMetadataV2 { /// Create Zarr V2 array metadata. /// @@ -108,6 +123,7 @@ impl ArrayMetadataV2 { compressor: Option, filters: Option>, ) -> Self { + let filters = filters.filter(|v| !v.is_empty()); Self { zarr_format: monostate::MustBe!(2u64), shape, @@ -303,3 +319,30 @@ pub enum ArrayMetadataV2Order { /// Column-major order. The first dimension varies fastest. F, } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty_filters() { + let array = ArrayMetadataV2 { + zarr_format: monostate::MustBe!(2u64), + shape: vec![10000, 10000], + chunks: vec![1000, 1000].try_into().unwrap(), + dtype: DataTypeMetadataV2::Simple("