Skip to content

Commit

Permalink
Improve correctness of metadata parsing.
Browse files Browse the repository at this point in the history
- Particularly improve how integers are handled if they exceed the
internal max representation value.
- Also ensure that any changes made to an array's metadata do not
  accidentally modify uintended fields.
  • Loading branch information
zoj613 committed Jan 12, 2025
1 parent 5d79457 commit 9cfb08c
Show file tree
Hide file tree
Showing 20 changed files with 871 additions and 1,187 deletions.
18 changes: 9 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,17 @@ FilesystemStore.Group.create store group_node;;
let array_node = Node.Array.(group_node / "name");;
(* creates an array with char data type and fill value '?' *)
FilesystemStore.Array.create
~codecs:[`Transpose [|2; 0; 1|]; `Bytes BE; `Gzip L2]
~shape:[|100; 100; 50|]
~chunks:[|10; 15; 20|]
~codecs:[`Transpose [2; 0; 1]; `Bytes BE; `Gzip L2]
~shape:[100; 100; 50]
~chunks:[10; 15; 20]
Ndarray.Char
'?'
array_node
store;;
```
### read/write from/to an array
```ocaml
let slice = [|R [|0; 20|]; I 10; R [||]|];;
let slice = [R (0, 20); I 10; F];;
let x = FilesystemStore.Array.read store array_node slice Ndarray.Char;;
(* Do some computation on the array slice *)
let x' = Zarr.Ndarray.map (fun _ -> Random.int 256 |> Char.chr) x;;
Expand All @@ -90,17 +90,17 @@ assert (Ndarray.equal x' y);;
### create an array with sharding
```ocaml
let config =
{chunk_shape = [|5; 3; 5|]
;codecs = [`Transpose [|2; 0; 1|]; `Bytes LE; `Zstd (0, true)]
{chunk_shape = [5; 3; 5]
;codecs = [`Transpose [2; 0; 1]; `Bytes LE; `Zstd (0, true)]
;index_codecs = [`Bytes BE; `Crc32c]
;index_location = Start};;
let shard_node = Node.Array.(group_node / "another");;
FilesystemStore.Array.create
~codecs:[`ShardingIndexed config]
~shape:[|100; 100; 50|]
~chunks:[|10; 15; 20|]
~shape:[100; 100; 50]
~chunks:[10; 15; 20]
Ndarray.Complex32
Complex.zero
shard_node
Expand All @@ -114,7 +114,7 @@ List.map Node.Array.to_path a;;
List.map Node.Group.to_path g;;
(*- : string list = ["/"; "/some"; "/some/group"] *)
FilesystemStore.Array.reshape store array_node [|25; 32; 10|];;
FilesystemStore.Array.reshape store array_node [25; 32; 10];;
let meta = FilesystemStore.Group.metadata store group_node;;
Metadata.Group.show meta;; (* pretty prints the contents of the metadata *)
Expand Down
49 changes: 14 additions & 35 deletions zarr-eio/test/test_eio.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ open Zarr_eio.Storage

let string_of_list = [%show: string list]
let print_node_pair = [%show: Node.Array.t list * Node.Group.t list]
let print_int_array = [%show : int array]

module type EIO_STORE = Zarr.Storage.S with type 'a io := 'a

Expand Down Expand Up @@ -40,19 +39,17 @@ let test_storage
assert_equal ~printer:string_of_bool false exists;

let cfg =
{chunk_shape = [|2; 5; 5|]
{chunk_shape = [2; 5; 5]
;index_location = End
;index_codecs = [`Bytes BE]
;codecs = [`Bytes LE]} in
let anode = Node.Array.(gnode / "arrnode") in
let slice = [|R [|0; 20|]; I 10; R [|0; 29|]|] in
let exp = Ndarray.init Complex32 [|21; 1; 30|] (Fun.const Complex.one) in
let slice = [R (0, 20); I 10; R (0, 29)] in
let exp = Ndarray.init Complex32 [21; 1; 30] (Fun.const Complex.one) in

List.iter
(fun codecs ->
Array.create
~codecs ~shape:[|100; 100; 50|] ~chunks:[|10; 15; 20|]
Complex32 Complex.one anode store;
Array.create ~codecs ~shape:[100; 100; 50] ~chunks:[10; 15; 20] Complex32 Complex.one anode store;
Array.write store anode slice exp;
let got = Array.read store anode slice Complex32 in
assert_equal exp got;
Expand All @@ -72,44 +69,29 @@ let test_storage
let child = Node.Group.of_path "/some/child/group" in
Group.create store child;
let arrays, groups = Group.children store gnode in
assert_equal
~printer:string_of_list ["/arrnode"] (List.map Node.Array.to_path arrays);
assert_equal
~printer:string_of_list ["/some"] (List.map Node.Group.to_path groups);

assert_equal ~printer:string_of_list ["/arrnode"] (List.map Node.Array.to_path arrays);
assert_equal ~printer:string_of_list ["/some"] (List.map Node.Group.to_path groups);
let c = Group.children store @@ Node.Group.(root / "fakegroup") in
assert_equal ([], []) c;

let ac, gc = hierarchy store in
let got =
List.fast_sort String.compare @@
List.map Node.Array.show ac @ List.map Node.Group.show gc in
assert_equal
~printer:string_of_list
["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got;

let got = List.fast_sort String.compare @@ List.map Node.Array.show ac @ List.map Node.Group.show gc in
assert_equal ~printer:string_of_list ["/"; "/arrnode"; "/some"; "/some/child"; "/some/child/group"] got;
(* tests for renaming nodes *)
let some = Node.Group.of_path "/some/child" in
Array.rename store anode "ARRAYNODE";
Group.rename store some "CHILD";
let ac, gc = hierarchy store in
let got =
List.fast_sort String.compare @@
List.map Node.Array.show ac @ List.map Node.Group.show gc in
assert_equal
~printer:string_of_list
["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got;
let got = List.fast_sort String.compare (List.map Node.Array.show ac @ List.map Node.Group.show gc) in
assert_equal ~printer:string_of_list ["/"; "/ARRAYNODE"; "/some"; "/some/CHILD"; "/some/CHILD/group"] got;
(* restore old array node name. *)
Array.rename store (Node.Array.of_path "/ARRAYNODE") "arrnode";

let nshape = [|25; 32; 10|] in
let nshape = [25; 32; 10] in
Array.reshape store anode nshape;
let meta = Array.metadata store anode in
assert_equal ~printer:print_int_array nshape @@ Metadata.Array.shape meta;
assert_equal ~printer:[%show : int list] nshape @@ Metadata.Array.shape meta;
assert_raises
(Zarr.Storage.Key_not_found "fakegroup/zarr.json")
(fun () -> Array.metadata store Node.Array.(gnode / "fakegroup"));

Array.delete store anode;
clear store;
let got = hierarchy store in
Expand All @@ -120,22 +102,19 @@ let _ =
"test eio-based stores" >::
(fun _ ->
Eio_main.run @@ fun env ->
let rand_num = string_of_int @@ Random.int 10_000 in
let rand_num = string_of_int (Random.int 10_000) in
let tmp_dir = Filename.(concat (get_temp_dir_name ()) (rand_num ^ ".zarr")) in
let s = FilesystemStore.create ~env tmp_dir in

assert_raises
(Sys_error (Format.sprintf "%s: File exists" tmp_dir))
(fun () -> FilesystemStore.create ~env tmp_dir);

(* ensure it works with an extra "/" appended to directory name. *)
ignore @@ FilesystemStore.open_store ~env (tmp_dir ^ "/");

let fakedir = "non-existant-zarr-store112345.zarr" in
assert_raises
(Sys_error (Printf.sprintf "%s: No such file or directory" fakedir))
(fun () -> FilesystemStore.open_store ~env fakedir);

let fn = Filename.temp_file "nonexistantfile" ".zarr" in
assert_raises
(Zarr.Storage.Not_a_filesystem_store fn)
Expand All @@ -146,6 +125,6 @@ let _ =
ZipStore.with_open `Read_write zpath (fun z -> test_storage (module ZipStore) z);
(* test just opening the now exisitant archive created by the previous test. *)
ZipStore.with_open `Read_only zpath (fun _ -> ());
test_storage (module MemoryStore) @@ MemoryStore.create ();
test_storage (module MemoryStore) (MemoryStore.create ());
test_storage (module FilesystemStore) s)
])
13 changes: 6 additions & 7 deletions zarr-lwt/test/test_lwt.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ open Zarr_lwt.Storage

let string_of_list = [%show: string list]
let print_node_pair = [%show: Node.Array.t list * Node.Group.t list]
let print_int_array = [%show : int array]

module type LWT_STORE = Zarr.Storage.S with type 'a io := 'a Lwt.t

Expand Down Expand Up @@ -41,18 +40,18 @@ let test_storage
assert_equal ~printer:string_of_bool false exists;

let cfg =
{chunk_shape = [|2; 5; 5|]
{chunk_shape = [2; 5; 5]
;index_location = End
;index_codecs = [`Bytes BE]
;codecs = [`Bytes LE]} in
let anode = Node.Array.(gnode / "arrnode") in
let slice = [|R [|0; 20|]; I 10; R [|0; 29|]|] in
let exp = Ndarray.init Ndarray.Complex32 [|21; 1; 30|] (Fun.const Complex.one) in
let slice = [R (0, 20); I 10; R (0, 29)] in
let exp = Ndarray.init Ndarray.Complex32 [21; 1; 30] (Fun.const Complex.one) in

Lwt_list.iter_s
(fun codecs ->
Array.create
~codecs ~shape:[|100; 100; 50|] ~chunks:[|10; 15; 20|]
~codecs ~shape:[100; 100; 50] ~chunks:[10; 15; 20]
Ndarray.Complex32 Complex.one anode store >>= fun () ->
Array.write store anode slice exp >>= fun () ->
Array.read store anode slice Complex32 >>= fun got ->
Expand Down Expand Up @@ -103,10 +102,10 @@ let test_storage
(* restore old array node name. *)
Array.rename store (Node.Array.of_path "/ARRAYNODE") "arrnode" >>= fun () ->

let nshape = [|25; 32; 10|] in
let nshape = [25; 32; 10] in
Array.reshape store anode nshape >>= fun () ->
Array.metadata store anode >>= fun meta ->
assert_equal ~printer:print_int_array nshape @@ Metadata.Array.shape meta;
assert_equal ~printer:[%show : int list] nshape @@ Metadata.Array.shape meta;

Array.delete store anode >>= fun () ->
clear store >>= fun () ->
Expand Down
38 changes: 16 additions & 22 deletions zarr-sync/test/test_sync.ml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ open Zarr_sync.Storage

let string_of_list = [%show: string list]
let print_node_pair = [%show: Node.Array.t list * Node.Group.t list]
let print_int_array = [%show : int array]

module type SYNC_STORE = Zarr.Storage.S with type 'a io := 'a

Expand Down Expand Up @@ -40,25 +39,23 @@ let test_storage
assert_equal ~printer:string_of_bool false exists;

let cfg =
{chunk_shape = [|2; 5; 5|]
{chunk_shape = [2; 5; 5]
;index_location = End
;index_codecs = [`Bytes LE; `Crc32c]
;codecs = [`Transpose [|2; 0; 1|]; `Bytes BE; `Zstd (0, false)]} in
;codecs = [`Transpose [2; 0; 1]; `Bytes BE; `Zstd (0, false)]} in
let cfg2 =
{chunk_shape = [|2; 5; 5|]
{chunk_shape = [2; 5; 5]
;index_location = Start
;index_codecs = [`Bytes BE]
;codecs = [`Bytes LE]} in
let anode = Node.Array.(gnode / "arrnode") in
let slice = [|R [|0; 20|]; I 10; R [|0; 29|]|] in
let bigger_slice = [|R [|0; 21|]; L [|9; 10|] ; R [|0; 30|]|] in
let slice = [R (0, 20); I 10; R (0, 29)] in
let bigger_slice = [R (0, 21); L [9; 10] ; R (0, 30)] in

List.iter
(fun codecs ->
Array.create
~codecs ~shape:[|100; 100; 50|] ~chunks:[|10; 15; 20|]
Complex32 Complex.one anode store;
let exp = Ndarray.init Complex32 [|21; 1; 30|] (Fun.const Complex.one) in
Array.create ~codecs ~shape:[100; 100; 50] ~chunks:[10; 15; 20] Complex32 Complex.one anode store;
let exp = Ndarray.init Complex32 [21; 1; 30] (Fun.const Complex.one) in
let got = Array.read store anode slice Complex32 in
assert_equal exp got;
Ndarray.fill exp Complex.{re=2.0; im=0.};
Expand All @@ -68,20 +65,17 @@ let test_storage
let _ = Array.read store anode bigger_slice Complex32 in
assert_equal exp got;
(* test writing a bigger slice to store *)
Array.write store anode bigger_slice @@ Ndarray.init Complex32 [|22; 2; 31|] (Fun.const Complex.{re=0.; im=3.0});
Array.write store anode bigger_slice @@ Ndarray.init Complex32 [22; 2; 31] (Fun.const Complex.{re=0.; im=3.0});
let got = Array.read store anode slice Complex32 in
Ndarray.fill exp Complex.{re=0.; im=3.0};
assert_equal exp got;
Array.delete store anode)
[[`ShardingIndexed cfg]; [`ShardingIndexed cfg2]];

(* repeat tests for non-sharding codec chain *)
Array.create
~sep:`Dot ~codecs:[`Bytes BE]
~shape:[|100; 100; 50|] ~chunks:[|10; 15; 20|]
Ndarray.Int Int.max_int anode store;
Array.create ~sep:`Dot ~codecs:[`Bytes BE] ~shape:[100; 100; 50] ~chunks:[10; 15; 20] Ndarray.Int Int.max_int anode store;
(* test path where there is no chunk key present in store *)
let exp = Ndarray.init Int [|21; 1; 30|] (Fun.const Int.max_int) in
let exp = Ndarray.init Int [21; 1; 30] (Fun.const Int.max_int) in
Array.write store anode slice exp;
let got = Array.read store anode slice Int in
assert_equal exp got;
Expand All @@ -93,7 +87,7 @@ let test_storage
assert_raises
(Zarr.Storage.Invalid_data_type)
(fun () -> Array.read store anode slice Ndarray.Char);
let badslice = [|R [|0; 20|]; I 10; R [||]; R [||] |] in
let badslice = [R (0, 20); I 10; F; F] in
assert_raises
(Zarr.Storage.Invalid_array_slice)
(fun () -> Array.read store anode badslice Ndarray.Int);
Expand All @@ -102,8 +96,8 @@ let test_storage
(fun () -> Array.write store anode badslice exp);
assert_raises
(Zarr.Storage.Invalid_array_slice)
(fun () -> Array.write store anode [|R [|0; 20|]; R [||]; R [||]|] exp);
let badarray = Ndarray.init Float64 [|21; 1; 30|] (Fun.const 0.) in
(fun () -> Array.write store anode [R (0, 20); F; F] exp);
let badarray = Ndarray.init Float64 [21; 1; 30] (Fun.const 0.) in
assert_raises
(Zarr.Storage.Invalid_data_type)
(fun () -> Array.write store anode slice badarray);
Expand Down Expand Up @@ -147,13 +141,13 @@ let test_storage

(* restore old array node name. *)
Array.rename store (Node.Array.of_path "/ARRAYNODE") "arrnode";
let nshape = [|25; 32; 10|] in
let nshape = [25; 32; 10] in
Array.reshape store anode nshape;
let meta = Array.metadata store anode in
assert_equal ~printer:print_int_array nshape @@ Metadata.Array.shape meta;
assert_equal ~printer:[%show : int list] nshape @@ Metadata.Array.shape meta;
assert_raises
(Zarr.Storage.Invalid_resize_shape)
(fun () -> Array.reshape store anode [|25; 10|]);
(fun () -> Array.reshape store anode [25; 10]);
assert_raises
(Zarr.Storage.Key_not_found "fakegroup/zarr.json")
(fun () -> Array.metadata store Node.Array.(gnode / "fakegroup"));
Expand Down
Loading

0 comments on commit 9cfb08c

Please sign in to comment.