Skip to content

Commit

Permalink
handle fill_value datatype for string-array (#140)
Browse files Browse the repository at this point in the history
* handle fill_value datatype for string-array

* fix 0 -> ''

* fix: minimise "string"/0 fill value workaround

* chore: add V2 string fill value tests

---------

Co-authored-by: Zhuoqing Fang <fangzq@KLG9H04WF3.local>
Co-authored-by: Lachlan Deakin <ljdgit@gmail.com>
  • Loading branch information
3 people authored Feb 3, 2025
1 parent 9070e12 commit 20089c5
Show file tree
Hide file tree
Showing 11 changed files with 115 additions and 0 deletions.
42 changes: 42 additions & 0 deletions zarrs/tests/data/v2_str0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env -S uv run
# /// script
# requires-python = ">=3.12"
# dependencies = [
# "zarr==3.0.1",
# ]
# ///

import zarr

path_out = "tests/data/zarr_python_compat/str_v2_fv_0.zarr"
array = zarr.create_array(
path_out,
dtype=str,
shape=(5,),
chunks=(2,),
filters=zarr.codecs.vlen_utf8.VLenUTF8(),
compressors=[None],
fill_value=0,
zarr_format=2,
overwrite=True,
)
array[:3] = ["a", "bb", ""]
print(array.info)
# assert (array[:] == ["a", "bb", "", "", ""]).all() # FAILURE

path_out = "tests/data/zarr_python_compat/str_v2_fv_null.zarr"
array = zarr.create_array(
path_out,
dtype=str,
shape=(5,),
chunks=(2,),
filters=zarr.codecs.vlen_utf8.VLenUTF8(),
compressors=[None],
fill_value=None,
zarr_format=2,
overwrite=True,
)
array[:3] = ["a", "bb", ""]
print(array.info)
print(array[:])
assert (array[:] == ["a", "bb", "", "", ""]).all()
19 changes: 19 additions & 0 deletions zarrs/tests/data/zarr_python_compat/str_v2_fv_0.zarr/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"shape": [
5
],
"chunks": [
2
],
"fill_value": 0,
"order": "C",
"filters": [
{
"id": "vlen-utf8"
}
],
"dimension_separator": ".",
"compressor": null,
"zarr_format": 2,
"dtype": "|O"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Binary file not shown.
Binary file not shown.
19 changes: 19 additions & 0 deletions zarrs/tests/data/zarr_python_compat/str_v2_fv_null.zarr/.zarray
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"shape": [
5
],
"chunks": [
2
],
"fill_value": null,
"order": "C",
"filters": [
{
"id": "vlen-utf8"
}
],
"dimension_separator": ".",
"compressor": null,
"zarr_format": 2,
"dtype": "|O"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{}
Binary file not shown.
Binary file not shown.
28 changes: 28 additions & 0 deletions zarrs/tests/zarr_python_compat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,31 @@ fn zarr_python_compat_fletcher32_v2() -> Result<(), Box<dyn Error>> {

Ok(())
}

#[test]
fn zarr_python_v2_compat_str_fv_0() -> Result<(), Box<dyn Error>> {
let store = Arc::new(FilesystemStore::new(
"tests/data/zarr_python_compat/str_v2_fv_0.zarr",
)?);
let array = zarrs::array::Array::open(store.clone(), "/")?;
let subset_all = array.subset_all();
let elements = array.retrieve_array_subset_elements::<String>(&subset_all)?;

assert_eq!(elements, &["a", "bb", "", "", ""]);

Ok(())
}

#[test]
fn zarr_python_v2_compat_str_fv_null() -> Result<(), Box<dyn Error>> {
let store = Arc::new(FilesystemStore::new(
"tests/data/zarr_python_compat/str_v2_fv_null.zarr",
)?);
let array = zarrs::array::Array::open(store.clone(), "/")?;
let subset_all = array.subset_all();
let elements = array.retrieve_array_subset_elements::<String>(&subset_all)?;

assert_eq!(elements, &["a", "bb", "", "", ""]);

Ok(())
}
5 changes: 5 additions & 0 deletions zarrs_metadata/src/v2_to_v3.rs
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,11 @@ pub fn array_metadata_v2_to_v3(
));
}
}
} else if data_type.name() == "string" {
// Add a special case for `zarr-python` string data with a 0 fill value -> empty string
if let Some(0) = fill_value.try_as_uint::<u64>() {
fill_value = FillValueMetadataV3::String(String::new());
}
}

let mut codecs: Vec<MetadataV3> = vec![];
Expand Down

0 comments on commit 20089c5

Please sign in to comment.