Skip to content

Commit

Permalink
Add shards to array strategy (#2822)
Browse files Browse the repository at this point in the history
* Add shards to array strategy

* Prioritize v3 over v2 in property tests

---------

Co-authored-by: Davis Bennett <davis.v.bennett@gmail.com>
  • Loading branch information
dcherian and d-v-b authored Feb 14, 2025
1 parent 24ef221 commit 3c25dac
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 17 deletions.
1 change: 1 addition & 0 deletions changes/2822.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add arbitrary `shards` to Hypothesis strategy for generating arrays.
55 changes: 38 additions & 17 deletions src/zarr/testing/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def clear_store(x: Store) -> Store:
# So we map a clear to reset the store.
stores = st.builds(MemoryStore, st.just({})).map(clear_store)
compressors = st.sampled_from([None, "default"])
zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([2, 3])
zarr_formats: st.SearchStrategy[ZarrFormat] = st.sampled_from([3, 2])
array_shapes = npst.array_shapes(max_dims=4, min_side=0)


Expand Down Expand Up @@ -166,6 +166,32 @@ def numpy_arrays(
return draw(npst.arrays(dtype=dtype, shape=shapes))


@st.composite # type: ignore[misc]
def chunk_shapes(draw: st.DrawFn, *, shape: tuple[int, ...]) -> tuple[int, ...]:
# We want this strategy to shrink towards arrays with smaller number of chunks
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
numchunks = draw(
st.tuples(*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in shape])
)
# 2. and now generate the chunks tuple
return tuple(
size // nchunks if nchunks > 0 else 0
for size, nchunks in zip(shape, numchunks, strict=True)
)


@st.composite # type: ignore[misc]
def shard_shapes(
draw: st.DrawFn, *, shape: tuple[int, ...], chunk_shape: tuple[int, ...]
) -> tuple[int, ...]:
# We want this strategy to shrink towards arrays with smaller number of shards
# shards must be an integral number of chunks
assert all(c != 0 for c in chunk_shape)
numchunks = tuple(s // c for s, c in zip(shape, chunk_shape, strict=True))
multiples = tuple(draw(st.integers(min_value=1, max_value=nc)) for nc in numchunks)
return tuple(m * c for m, c in zip(multiples, chunk_shape, strict=True))


@st.composite # type: ignore[misc]
def np_array_and_chunks(
draw: st.DrawFn, *, arrays: st.SearchStrategy[np.ndarray] = numpy_arrays
Expand All @@ -175,19 +201,7 @@ def np_array_and_chunks(
Returns: a tuple of the array and a suitable random chunking for it.
"""
array = draw(arrays)
# We want this strategy to shrink towards arrays with smaller number of chunks
# 1. st.integers() shrinks towards smaller values. So we use that to generate number of chunks
numchunks = draw(
st.tuples(
*[st.integers(min_value=0 if size == 0 else 1, max_value=size) for size in array.shape]
)
)
# 2. and now generate the chunks tuple
chunks = tuple(
size // nchunks if nchunks > 0 else 0
for size, nchunks in zip(array.shape, numchunks, strict=True)
)
return (array, chunks)
return (array, draw(chunk_shapes(shape=array.shape)))


@st.composite # type: ignore[misc]
Expand All @@ -210,7 +224,12 @@ def arrays(
zarr_format = draw(zarr_formats)
if arrays is None:
arrays = numpy_arrays(shapes=shapes, zarr_formats=st.just(zarr_format))
nparray, chunks = draw(np_array_and_chunks(arrays=arrays))
nparray = draw(arrays)
chunk_shape = draw(chunk_shapes(shape=nparray.shape))
if zarr_format == 3 and all(c > 0 for c in chunk_shape):
shard_shape = draw(st.none() | shard_shapes(shape=nparray.shape, chunk_shape=chunk_shape))
else:
shard_shape = None
# test that None works too.
fill_value = draw(st.one_of([st.none(), npst.from_dtype(nparray.dtype)]))
# compressor = draw(compressors)
Expand All @@ -223,7 +242,8 @@ def arrays(
a = root.create_array(
array_path,
shape=nparray.shape,
chunks=chunks,
chunks=chunk_shape,
shards=shard_shape,
dtype=nparray.dtype,
attributes=attributes,
# compressor=compressor, # FIXME
Expand All @@ -236,7 +256,8 @@ def arrays(
assert a.name is not None
assert isinstance(root[array_path], Array)
assert nparray.shape == a.shape
assert chunks == a.chunks
assert chunk_shape == a.chunks
assert shard_shape == a.shards
assert array_path == a.path, (path, name, array_path, a.name, a.path)
assert a.basename == name, (a.basename, name)
assert dict(a.attrs) == expected_attrs
Expand Down

0 comments on commit 3c25dac

Please sign in to comment.