From aa5653ff4f81017b3de81279e15ae98229653508 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 23 Jan 2024 21:20:32 -0700 Subject: [PATCH 01/69] Stateful tests with Dataset --- xarray/tests/test_state_machine.py | 68 ++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 xarray/tests/test_state_machine.py diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py new file mode 100644 index 00000000000..3dd974e00ac --- /dev/null +++ b/xarray/tests/test_state_machine.py @@ -0,0 +1,68 @@ +import hypothesis.extra.numpy as npst +import hypothesis.strategies as st +import numpy as np +from hypothesis import note, settings +from hypothesis.stateful import RuleBasedStateMachine, invariant, precondition, rule + +import xarray.testing.strategies as xrst +from xarray import Dataset +from xarray.testing import _assert_internal_invariants + + +def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.integer_dtypes(sizes=(32, 64)) + | npst.unsigned_integer_dtypes(sizes=(32, 64)) + | npst.floating_dtypes(sizes=(32, 64)) + ) + + +class DatasetStateMachine(RuleBasedStateMachine): + def __init__(self): + super().__init__() + self.dataset = Dataset() + + @rule( + var=xrst.variables( + dims=xrst.dimension_names(min_dims=1, max_dims=1), + dtype=pandas_index_dtypes(), + ) + ) + def add_dim_coord(self, var): + (name,) = var.dims + # dim coord + self.dataset[name] = var + # non-dim coord of same size + self.dataset[name + "_"] = var + note(f"> vars: {tuple(self.dataset._variables)}") + + @rule(newname=xrst.names()) + @precondition(lambda self: len(self.dataset.dims) >= 1) + def rename_vars(self, newname): + # TODO: randomize this + oldname = tuple(self.dataset.dims)[0] + self.dataset = self.dataset.rename_vars({oldname: newname}) + note(f"> renaming {oldname} to {newname}") + + @rule() + @precondition(lambda self: len(self.dataset._variables) >= 2) + def swap_dims(self): + ds = self.dataset + # TODO: randomize? + dim = tuple(ds.dims)[0] + + to = dim + "_" if "_" not in dim else dim[:-1] + assert to in ds._variables + self.dataset = ds.swap_dims({dim: to}) + note(f"> swapping {dim} to {to}") + + @invariant() + def assert_invariants(self): + # ndims = len(self.dataset.dims) + + note(f"> ===\n\n {self.dataset!r} \n===\n\n") + _assert_internal_invariants(self.dataset, check_default_indexes=True) + + +DatasetStateMachine.TestCase.settings = settings(max_examples=1000) +DatasetTest = DatasetStateMachine.TestCase From f1199a14097de00ec49c6d050715599c75cb4caa Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:12:57 -0700 Subject: [PATCH 02/69] Disable check_default_indexes when needed --- xarray/tests/test_state_machine.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 3dd974e00ac..27088b1ab8d 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -21,6 +21,7 @@ class DatasetStateMachine(RuleBasedStateMachine): def __init__(self): super().__init__() self.dataset = Dataset() + self.check_default_indexes = True @rule( var=xrst.variables( @@ -40,7 +41,11 @@ def add_dim_coord(self, var): @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): # TODO: randomize this + # benbovy: "skip the default indexes invariant test when the name of an + # existing dimension coordinate is passed as input kwarg or dict key + # to .rename_vars()." oldname = tuple(self.dataset.dims)[0] + self.check_default_indexes = False self.dataset = self.dataset.rename_vars({oldname: newname}) note(f"> renaming {oldname} to {newname}") @@ -61,7 +66,7 @@ def assert_invariants(self): # ndims = len(self.dataset.dims) note(f"> ===\n\n {self.dataset!r} \n===\n\n") - _assert_internal_invariants(self.dataset, check_default_indexes=True) + _assert_internal_invariants(self.dataset, self.check_default_indexes) DatasetStateMachine.TestCase.settings = settings(max_examples=1000) From 3fdb188bba0db870dfe9eabc0369ba8d7a86b80f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:13:16 -0700 Subject: [PATCH 03/69] Add Zarr roundtrip --- xarray/tests/test_state_machine.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 27088b1ab8d..dc1e0822a81 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -4,9 +4,12 @@ from hypothesis import note, settings from hypothesis.stateful import RuleBasedStateMachine, invariant, precondition, rule +import xarray as xr import xarray.testing.strategies as xrst from xarray import Dataset -from xarray.testing import _assert_internal_invariants +from xarray.testing import _assert_internal_invariants, assert_identical +from xarray.tests import has_zarr +from xarray.tests.test_backends import ON_WINDOWS, create_tmp_file def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: @@ -61,6 +64,16 @@ def swap_dims(self): self.dataset = ds.swap_dims({dim: to}) note(f"> swapping {dim} to {to}") + @rule() + def roundtrip_zarr(self): + if not has_zarr: + return + expected = self.dataset + with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as path: + self.dataset.to_zarr(path + ".zarr") + with xr.open_dataset(path + ".zarr", engine="zarr") as ds: + assert_identical(expected, ds) + @invariant() def assert_invariants(self): # ndims = len(self.dataset.dims) From 08203232fd9b3e4cdb133492c3c6510b2f2837cd Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:15:46 -0700 Subject: [PATCH 04/69] Randomize dimension choice --- xarray/tests/test_state_machine.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index dc1e0822a81..850fb7c8ce7 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -1,3 +1,5 @@ +import random + import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np @@ -11,6 +13,8 @@ from xarray.tests import has_zarr from xarray.tests.test_backends import ON_WINDOWS, create_tmp_file +random.seed(123456) + def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: return ( @@ -43,11 +47,10 @@ def add_dim_coord(self, var): @rule(newname=xrst.names()) @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): - # TODO: randomize this # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - oldname = tuple(self.dataset.dims)[0] + oldname = random.choice(tuple(self.dataset.dims)) self.check_default_indexes = False self.dataset = self.dataset.rename_vars({oldname: newname}) note(f"> renaming {oldname} to {newname}") @@ -56,8 +59,7 @@ def rename_vars(self, newname): @precondition(lambda self: len(self.dataset._variables) >= 2) def swap_dims(self): ds = self.dataset - # TODO: randomize? - dim = tuple(ds.dims)[0] + dim = random.choice(tuple(ds.dims)) to = dim + "_" if "_" not in dim else dim[:-1] assert to in ds._variables @@ -76,8 +78,6 @@ def roundtrip_zarr(self): @invariant() def assert_invariants(self): - # ndims = len(self.dataset.dims) - note(f"> ===\n\n {self.dataset!r} \n===\n\n") _assert_internal_invariants(self.dataset, self.check_default_indexes) From 06bdbf81203665c5ad9b8e41f5810428110debc1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:24:36 -0700 Subject: [PATCH 05/69] Fix a bug --- xarray/tests/test_state_machine.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 850fb7c8ce7..aabc00d932f 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -40,7 +40,7 @@ def add_dim_coord(self, var): (name,) = var.dims # dim coord self.dataset[name] = var - # non-dim coord of same size + # non-dim coord of same size; this allows renaming self.dataset[name + "_"] = var note(f"> vars: {tuple(self.dataset._variables)}") @@ -50,6 +50,8 @@ def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." + if newname in self.dataset._variables: + newname += "_" oldname = random.choice(tuple(self.dataset.dims)) self.check_default_indexes = False self.dataset = self.dataset.rename_vars({oldname: newname}) From 2710a4eedcb5df2f25ea3bd7dfe262f4533bc83b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:28:28 -0700 Subject: [PATCH 06/69] Add reset_index --- xarray/tests/test_state_machine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index aabc00d932f..678eda721b0 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -44,6 +44,11 @@ def add_dim_coord(self, var): self.dataset[name + "_"] = var note(f"> vars: {tuple(self.dataset._variables)}") + @precondition(lambda self: len(self.dataset.dims) >= 1) + def reset_index(self): + dim = random.choice(tuple(self.dataset.dims)) + self.dataset = self.dataset.reset_index(dim) + @rule(newname=xrst.names()) @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): From 85ab18606ba40aa04cd95ecc4dfaeb10257a66c6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:32:19 -0700 Subject: [PATCH 07/69] Add stack, unstack --- xarray/tests/test_state_machine.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 678eda721b0..30113feae80 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -49,14 +49,29 @@ def reset_index(self): dim = random.choice(tuple(self.dataset.dims)) self.dataset = self.dataset.reset_index(dim) + @rule(newname=xrst.names()) + @precondition(lambda self: len(self.dataset.dims) >= 1) + def stack(self, newname): + # benbovy: "skip the default indexes invariant test when the name of an + # existing dimension coordinate is passed as input kwarg or dict key + # to .rename_vars()." + while newname in self.dataset._variables: + newname += "_foo" + oldnames = random.choices(tuple(self.dataset.dims), k=2) + self.dataset = self.dataset.stack({newname: oldnames}) + + @rule() + def unstack(self): + self.dataset = self.dataset.unstack() + @rule(newname=xrst.names()) @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - if newname in self.dataset._variables: - newname += "_" + while newname in self.dataset._variables: + newname += "_foo" oldname = random.choice(tuple(self.dataset.dims)) self.check_default_indexes = False self.dataset = self.dataset.rename_vars({oldname: newname}) From 443916cd40d0debeb6f4b259b7b07a3549c8ca61 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:38:46 -0700 Subject: [PATCH 08/69] [revert] Disable Zarr till we control attrs strategy --- xarray/tests/test_state_machine.py | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 30113feae80..55f7ceb4327 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -6,12 +6,9 @@ from hypothesis import note, settings from hypothesis.stateful import RuleBasedStateMachine, invariant, precondition, rule -import xarray as xr import xarray.testing.strategies as xrst from xarray import Dataset -from xarray.testing import _assert_internal_invariants, assert_identical -from xarray.tests import has_zarr -from xarray.tests.test_backends import ON_WINDOWS, create_tmp_file +from xarray.testing import _assert_internal_invariants random.seed(123456) @@ -88,15 +85,16 @@ def swap_dims(self): self.dataset = ds.swap_dims({dim: to}) note(f"> swapping {dim} to {to}") - @rule() - def roundtrip_zarr(self): - if not has_zarr: - return - expected = self.dataset - with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as path: - self.dataset.to_zarr(path + ".zarr") - with xr.open_dataset(path + ".zarr", engine="zarr") as ds: - assert_identical(expected, ds) + # TODO: enable when we have serializable attrs only + # @rule() + # def roundtrip_zarr(self): + # if not has_zarr: + # return + # expected = self.dataset + # with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as path: + # self.dataset.to_zarr(path + ".zarr") + # with xr.open_dataset(path + ".zarr", engine="zarr") as ds: + # assert_identical(expected, ds) @invariant() def assert_invariants(self): From 5c00585b533be2fb681f0930a97b8185fe515a7a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:38:18 -0700 Subject: [PATCH 09/69] Try making unique names --- xarray/tests/test_state_machine.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 55f7ceb4327..9f866be04f2 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -21,6 +21,18 @@ def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: ) +# https://stackoverflow.com/a/73810689 +# TODO: Consider building this into `dimension_names` and `names` strategies. +@st.composite +def unique(draw: st.DrawFn, strategy): + seen = draw(st.shared(st.builds(set), key="key-for-unique-elems")) + return draw( + strategy.map(lambda x: tuple(x)) + .filter(lambda x: x not in seen) + .map(lambda x: seen.add(x) or x) + ) + + class DatasetStateMachine(RuleBasedStateMachine): def __init__(self): super().__init__() @@ -29,7 +41,7 @@ def __init__(self): @rule( var=xrst.variables( - dims=xrst.dimension_names(min_dims=1, max_dims=1), + dims=unique(xrst.dimension_names(min_dims=1, max_dims=1)), dtype=pandas_index_dtypes(), ) ) @@ -39,14 +51,13 @@ def add_dim_coord(self, var): self.dataset[name] = var # non-dim coord of same size; this allows renaming self.dataset[name + "_"] = var - note(f"> vars: {tuple(self.dataset._variables)}") @precondition(lambda self: len(self.dataset.dims) >= 1) def reset_index(self): dim = random.choice(tuple(self.dataset.dims)) self.dataset = self.dataset.reset_index(dim) - @rule(newname=xrst.names()) + @rule(newname=unique(xrst.names())) @precondition(lambda self: len(self.dataset.dims) >= 1) def stack(self, newname): # benbovy: "skip the default indexes invariant test when the name of an @@ -61,7 +72,7 @@ def stack(self, newname): def unstack(self): self.dataset = self.dataset.unstack() - @rule(newname=xrst.names()) + @rule(newname=unique(xrst.names())) @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an From 04b6b9247ae42025e99a3ba3a07939363fec2386 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:51:51 -0700 Subject: [PATCH 10/69] Share names strategy to ensure uniques? --- xarray/testing/strategies.py | 7 ++++++- xarray/tests/test_state_machine.py | 27 +++++++-------------------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index c5a7afdf54e..896b44704c5 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -87,6 +87,7 @@ def names() -> st.SearchStrategy[str]: def dimension_names( *, + name_strategy=None, min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[list[Hashable]]: @@ -97,14 +98,18 @@ def dimension_names( Parameters ---------- + name_strategy + Strategy for making names. Useful if we need to share this. min_dims Minimum number of dimensions in generated list. max_dims Maximum number of dimensions in generated list. """ + elements = names() if name_strategy is None else name_strategy + return st.lists( - elements=names(), + elements=elements, min_size=min_dims, max_size=max_dims, unique=True, diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 9f866be04f2..43a79c27dac 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -12,6 +12,9 @@ random.seed(123456) +# Call once to enqure we get unique names on each draw? +NAMES = xrst.names() + def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: return ( @@ -21,18 +24,6 @@ def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: ) -# https://stackoverflow.com/a/73810689 -# TODO: Consider building this into `dimension_names` and `names` strategies. -@st.composite -def unique(draw: st.DrawFn, strategy): - seen = draw(st.shared(st.builds(set), key="key-for-unique-elems")) - return draw( - strategy.map(lambda x: tuple(x)) - .filter(lambda x: x not in seen) - .map(lambda x: seen.add(x) or x) - ) - - class DatasetStateMachine(RuleBasedStateMachine): def __init__(self): super().__init__() @@ -41,7 +32,7 @@ def __init__(self): @rule( var=xrst.variables( - dims=unique(xrst.dimension_names(min_dims=1, max_dims=1)), + dims=xrst.dimension_names(name_strategy=NAMES, min_dims=1, max_dims=1), dtype=pandas_index_dtypes(), ) ) @@ -57,14 +48,12 @@ def reset_index(self): dim = random.choice(tuple(self.dataset.dims)) self.dataset = self.dataset.reset_index(dim) - @rule(newname=unique(xrst.names())) - @precondition(lambda self: len(self.dataset.dims) >= 1) + @rule(newname=NAMES) + @precondition(lambda self: len(self.dataset.dims) >= 2) def stack(self, newname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - while newname in self.dataset._variables: - newname += "_foo" oldnames = random.choices(tuple(self.dataset.dims), k=2) self.dataset = self.dataset.stack({newname: oldnames}) @@ -72,14 +61,12 @@ def stack(self, newname): def unstack(self): self.dataset = self.dataset.unstack() - @rule(newname=unique(xrst.names())) + @rule(newname=NAMES) @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - while newname in self.dataset._variables: - newname += "_foo" oldname = random.choice(tuple(self.dataset.dims)) self.check_default_indexes = False self.dataset = self.dataset.rename_vars({oldname: newname}) From a4a4c430bdac0f22e876d95ca21ef4ff02b2e850 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 10:52:28 -0700 Subject: [PATCH 11/69] cleanup --- xarray/tests/test_state_machine.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 43a79c27dac..1b0a72c81d1 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -51,9 +51,6 @@ def reset_index(self): @rule(newname=NAMES) @precondition(lambda self: len(self.dataset.dims) >= 2) def stack(self, newname): - # benbovy: "skip the default indexes invariant test when the name of an - # existing dimension coordinate is passed as input kwarg or dict key - # to .rename_vars()." oldnames = random.choices(tuple(self.dataset.dims), k=2) self.dataset = self.dataset.stack({newname: oldnames}) From 83fa17b71e94625230df0f2f2bceffa94cfe6399 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 11:01:09 -0700 Subject: [PATCH 12/69] Try sharing strategies better --- xarray/tests/test_state_machine.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 1b0a72c81d1..b3193d201a5 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -10,10 +10,21 @@ from xarray import Dataset from xarray.testing import _assert_internal_invariants + +@st.composite +def unique(draw, strategy): + # https://stackoverflow.com/questions/73737073/create-hypothesis-strategy-that-returns-unique-values + seen = draw(st.shared(st.builds(set), key="key-for-unique-elems")) + return draw( + strategy.filter(lambda x: x not in seen).map(lambda x: seen.add(x) or x) + ) + + random.seed(123456) -# Call once to enqure we get unique names on each draw? -NAMES = xrst.names() +# Share to ensure we get unique names on each draw? +UNIQUE_NAME = unique(strategy=xrst.names()) +DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: @@ -30,12 +41,7 @@ def __init__(self): self.dataset = Dataset() self.check_default_indexes = True - @rule( - var=xrst.variables( - dims=xrst.dimension_names(name_strategy=NAMES, min_dims=1, max_dims=1), - dtype=pandas_index_dtypes(), - ) - ) + @rule(var=xrst.variables(dims=DIM_NAME, dtype=pandas_index_dtypes())) def add_dim_coord(self, var): (name,) = var.dims # dim coord @@ -48,7 +54,7 @@ def reset_index(self): dim = random.choice(tuple(self.dataset.dims)) self.dataset = self.dataset.reset_index(dim) - @rule(newname=NAMES) + @rule(newname=UNIQUE_NAME) @precondition(lambda self: len(self.dataset.dims) >= 2) def stack(self, newname): oldnames = random.choices(tuple(self.dataset.dims), k=2) @@ -58,7 +64,7 @@ def stack(self, newname): def unstack(self): self.dataset = self.dataset.unstack() - @rule(newname=NAMES) + @rule(newname=UNIQUE_NAME) @precondition(lambda self: len(self.dataset.dims) >= 1) def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an From 491b9b1c5e5302b0e08e26066f18fc4900fb27ac Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 11:20:57 -0700 Subject: [PATCH 13/69] Fix endianness --- xarray/tests/test_state_machine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index b3193d201a5..47a192c2377 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -29,9 +29,9 @@ def unique(draw, strategy): def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: return ( - npst.integer_dtypes(sizes=(32, 64)) - | npst.unsigned_integer_dtypes(sizes=(32, 64)) - | npst.floating_dtypes(sizes=(32, 64)) + npst.integer_dtypes(endianness="<", sizes=(32, 64)) + | npst.unsigned_integer_dtypes(endianness="<", sizes=(32, 64)) + | npst.floating_dtypes(endianness="<", sizes=(32, 64)) ) From c648cfd2004b380805fa32719efa3bf19efbd962 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 26 Jan 2024 11:25:29 -0700 Subject: [PATCH 14/69] Better swap_dims --- xarray/tests/test_state_machine.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 47a192c2377..2e717447a8c 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -76,13 +76,19 @@ def rename_vars(self, newname): note(f"> renaming {oldname} to {newname}") @rule() - @precondition(lambda self: len(self.dataset._variables) >= 2) + @precondition( + lambda self: ( + len(self.dataset._variables) >= 2 + and (set(self.dataset.dims) & set(self.dataset._variables)) + ) + ) def swap_dims(self): ds = self.dataset - dim = random.choice(tuple(ds.dims)) - - to = dim + "_" if "_" not in dim else dim[:-1] - assert to in ds._variables + # need a dimension coordinate for swapping + dim = random.choice(tuple(set(ds.dims) & set(ds._variables))) + to = random.choice( + [name for name, var in ds._variables.items() if var.size == ds.sizes[dim]] + ) self.dataset = ds.swap_dims({dim: to}) note(f"> swapping {dim} to {to}") From 06763c26e68afe488a222373e69f9c22a48261bb Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 9 Mar 2024 12:17:32 -0800 Subject: [PATCH 15/69] More improvements --- xarray/tests/test_state_machine.py | 47 +++++++++++++++++++++--------- 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 2e717447a8c..0bf59719261 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -8,9 +8,24 @@ import xarray.testing.strategies as xrst from xarray import Dataset +from xarray.indexes import PandasMultiIndex from xarray.testing import _assert_internal_invariants +def get_not_multiindex_dims(ds: Dataset) -> set: + dims = ds.dims + mindexes = [ + name + for name, index in ds.xindexes.items() + if isinstance(index, PandasMultiIndex) + ] + return set(dims) - set(mindexes) + + +def get_dimension_coordinates(ds: Dataset) -> set: + return set(ds.dims) & set(ds._variables) + + @st.composite def unique(draw, strategy): # https://stackoverflow.com/questions/73737073/create-hypothesis-strategy-that-returns-unique-values @@ -52,12 +67,18 @@ def add_dim_coord(self, var): @precondition(lambda self: len(self.dataset.dims) >= 1) def reset_index(self): dim = random.choice(tuple(self.dataset.dims)) + note(f"> resetting {dim}") self.dataset = self.dataset.reset_index(dim) @rule(newname=UNIQUE_NAME) - @precondition(lambda self: len(self.dataset.dims) >= 2) + @precondition(lambda self: len(get_not_multiindex_dims(self.dataset)) >= 2) def stack(self, newname): - oldnames = random.choices(tuple(self.dataset.dims), k=2) + choices = list(get_not_multiindex_dims(self.dataset)) + # cannot stack repeated dims ('0', '0'), so random.choices isn't the best way to choose it + # Instead shuffle and pick the first two. + random.shuffle(choices) + oldnames = choices[:2] + note(f"> stacking {oldnames} as {newname}") self.dataset = self.dataset.stack({newname: oldnames}) @rule() @@ -65,32 +86,30 @@ def unstack(self): self.dataset = self.dataset.unstack() @rule(newname=UNIQUE_NAME) - @precondition(lambda self: len(self.dataset.dims) >= 1) + @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - oldname = random.choice(tuple(self.dataset.dims)) + + oldname = random.choice(tuple(get_dimension_coordinates(self.dataset))) self.check_default_indexes = False - self.dataset = self.dataset.rename_vars({oldname: newname}) note(f"> renaming {oldname} to {newname}") + self.dataset = self.dataset.rename_vars({oldname: newname}) @rule() - @precondition( - lambda self: ( - len(self.dataset._variables) >= 2 - and (set(self.dataset.dims) & set(self.dataset._variables)) - ) - ) + @precondition(lambda self: len(self.dataset._variables) >= 2) + @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) def swap_dims(self): ds = self.dataset # need a dimension coordinate for swapping - dim = random.choice(tuple(set(ds.dims) & set(ds._variables))) + dim = random.choice(tuple(get_dimension_coordinates(ds))) + # Can only swap to a variable with the same dim to = random.choice( - [name for name, var in ds._variables.items() if var.size == ds.sizes[dim]] + [name for name, var in ds._variables.items() if var.dims == (dim,)] ) - self.dataset = ds.swap_dims({dim: to}) note(f"> swapping {dim} to {to}") + self.dataset = ds.swap_dims({dim: to}) # TODO: enable when we have serializable attrs only # @rule() From c07688c13ebbc9868a5737bbc2919dccba37a343 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 13 Mar 2024 11:39:27 -0600 Subject: [PATCH 16/69] WIP --- xarray/testing/strategies.py | 16 ++++++++++++++++ xarray/tests/test_state_machine.py | 4 +++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 896b44704c5..18cb25a26dd 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -197,6 +197,22 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: ) +def serializable_attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: + """ + Generates arbitrary valid attributes dictionaries for xarray objects. + + These are intended to be serialized, and so, are less general than the + `attrs` function above. + + Requires the hypothesis package to be installed. + + See Also + -------- + :ref:`testing.hypothesis`_ + """ + return st.dictionaries(_attr_keys, _attr_values) + + @st.composite def variables( draw: st.DrawFn, diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 0bf59719261..328c26ff66f 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -42,6 +42,7 @@ def unique(draw, strategy): DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) +# TODO: add datetime64[ns] def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: return ( npst.integer_dtypes(endianness="<", sizes=(32, 64)) @@ -83,6 +84,7 @@ def stack(self, newname): @rule() def unstack(self): + # TODO: Drop duplicates self.dataset = self.dataset.unstack() @rule(newname=UNIQUE_NAME) @@ -128,5 +130,5 @@ def assert_invariants(self): _assert_internal_invariants(self.dataset, self.check_default_indexes) -DatasetStateMachine.TestCase.settings = settings(max_examples=1000) +DatasetStateMachine.TestCase.settings = settings(max_examples=1000, deadline=None) DatasetTest = DatasetStateMachine.TestCase From e30a89f4cfc045292a242f2e354f922b7c41afd7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 12:58:03 -0600 Subject: [PATCH 17/69] Drop duplicates before unstacking --- xarray/tests/test_state_machine.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 328c26ff66f..8a68c2adf0b 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -22,6 +22,15 @@ def get_not_multiindex_dims(ds: Dataset) -> set: return set(dims) - set(mindexes) +def get_multiindex_dims(ds: Dataset) -> list: + mindexes = [ + name + for name, index in ds.xindexes.items() + if isinstance(index, PandasMultiIndex) + ] + return mindexes + + def get_dimension_coordinates(ds: Dataset) -> set: return set(ds.dims) & set(ds._variables) @@ -84,8 +93,12 @@ def stack(self, newname): @rule() def unstack(self): - # TODO: Drop duplicates - self.dataset = self.dataset.unstack() + choices = get_multiindex_dims(self.dataset) + if choices: + dim = random.choice(choices) + self.dataset = self.dataset.drop_duplicates(dim).unstack(dim) + else: + self.dataset = self.dataset.unstack() @rule(newname=UNIQUE_NAME) @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) @@ -93,7 +106,6 @@ def rename_vars(self, newname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - oldname = random.choice(tuple(get_dimension_coordinates(self.dataset))) self.check_default_indexes = False note(f"> renaming {oldname} to {newname}") From 316eb439ab7d2161303c47dd39c98f0681a834ce Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 12:59:21 -0600 Subject: [PATCH 18/69] Add reset_index --- xarray/tests/test_state_machine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 8a68c2adf0b..7e38e1b15bb 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -74,6 +74,7 @@ def add_dim_coord(self, var): # non-dim coord of same size; this allows renaming self.dataset[name + "_"] = var + @rule() @precondition(lambda self: len(self.dataset.dims) >= 1) def reset_index(self): dim = random.choice(tuple(self.dataset.dims)) From 88e20104f427b81dc371690e7fb532f46752c527 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 13:05:36 -0600 Subject: [PATCH 19/69] Better duplicate assumption --- xarray/tests/test_state_machine.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/xarray/tests/test_state_machine.py b/xarray/tests/test_state_machine.py index 7e38e1b15bb..719fe3106ac 100644 --- a/xarray/tests/test_state_machine.py +++ b/xarray/tests/test_state_machine.py @@ -3,7 +3,7 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st import numpy as np -from hypothesis import note, settings +from hypothesis import assume, note, settings from hypothesis.stateful import RuleBasedStateMachine, invariant, precondition, rule import xarray.testing.strategies as xrst @@ -97,7 +97,8 @@ def unstack(self): choices = get_multiindex_dims(self.dataset) if choices: dim = random.choice(choices) - self.dataset = self.dataset.drop_duplicates(dim).unstack(dim) + assume(self.dataset.xindexes[dim].index.is_unique) + self.dataset = self.dataset.unstack(dim) else: self.dataset = self.dataset.unstack() From 6c23b498707334390c97aa8e8cbf0ba310a54a42 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 13:08:22 -0600 Subject: [PATCH 20/69] Move --- .../test_index_manipulation.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename xarray/tests/test_state_machine.py => properties/test_index_manipulation.py (100%) diff --git a/xarray/tests/test_state_machine.py b/properties/test_index_manipulation.py similarity index 100% rename from xarray/tests/test_state_machine.py rename to properties/test_index_manipulation.py From 2c671e3961df167b5ed0370eb46b5bc210f527c4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 13:20:12 -0600 Subject: [PATCH 21/69] Fix reset_index --- properties/test_index_manipulation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 719fe3106ac..08a9726d7de 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -77,7 +77,8 @@ def add_dim_coord(self, var): @rule() @precondition(lambda self: len(self.dataset.dims) >= 1) def reset_index(self): - dim = random.choice(tuple(self.dataset.dims)) + dim = random.choice(tuple(set(self.dataset.dims) & set(self.dataset.xindexes))) + self.check_default_indexes = False note(f"> resetting {dim}") self.dataset = self.dataset.reset_index(dim) From ae5f4d5cf7cadc818925622cde9d7de918951b61 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 13:24:52 -0600 Subject: [PATCH 22/69] Skip if hypothesis not installed --- properties/test_index_manipulation.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 08a9726d7de..dca6c27b966 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,15 +1,20 @@ import random +import numpy as np +import pytest + +from xarray import Dataset +from xarray.indexes import PandasMultiIndex +from xarray.testing import _assert_internal_invariants + +pytest.importorskip("hypothesis") + import hypothesis.extra.numpy as npst import hypothesis.strategies as st -import numpy as np from hypothesis import assume, note, settings from hypothesis.stateful import RuleBasedStateMachine, invariant, precondition, rule import xarray.testing.strategies as xrst -from xarray import Dataset -from xarray.indexes import PandasMultiIndex -from xarray.testing import _assert_internal_invariants def get_not_multiindex_dims(ds: Dataset) -> set: From 3cfed5ea4bd62d6531b9959d8608f2bb5fc769de Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 13:38:10 -0600 Subject: [PATCH 23/69] Better precondition around reset_index --- properties/test_index_manipulation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index dca6c27b966..a913a82b0d2 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -80,7 +80,9 @@ def add_dim_coord(self, var): self.dataset[name + "_"] = var @rule() - @precondition(lambda self: len(self.dataset.dims) >= 1) + @precondition( + lambda self: len(set(self.dataset.dims) & set(self.dataset.xindexes)) >= 1 + ) def reset_index(self): dim = random.choice(tuple(set(self.dataset.dims) & set(self.dataset.xindexes))) self.check_default_indexes = False From 209a2ffdcb450a82cb79038e8d34611d0fb2385d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 15 Mar 2024 19:56:18 -0600 Subject: [PATCH 24/69] Note --- properties/test_index_manipulation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index a913a82b0d2..2590d8f4974 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -132,6 +132,9 @@ def swap_dims(self): to = random.choice( [name for name, var in ds._variables.items() if var.dims == (dim,)] ) + # TODO: swapping a dimension to itself + # TODO: swapping from Index to a MultiIndex level + # TODO: swapping from MultiIndex to a level of the same MultiIndex note(f"> swapping {dim} to {to}") self.dataset = ds.swap_dims({dim: to}) From 0be7c759709192c5c7c334aed14d7acb800c855c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 09:34:09 -0600 Subject: [PATCH 25/69] Try a bundle --- properties/test_index_manipulation.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 2590d8f4974..c3122316a57 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -12,7 +12,14 @@ import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import assume, note, settings -from hypothesis.stateful import RuleBasedStateMachine, invariant, precondition, rule +from hypothesis.stateful import ( + Bundle, + RuleBasedStateMachine, + consumes, + invariant, + precondition, + rule, +) import xarray.testing.strategies as xrst @@ -66,18 +73,21 @@ def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: class DatasetStateMachine(RuleBasedStateMachine): + dims = Bundle("dims") + def __init__(self): super().__init__() self.dataset = Dataset() self.check_default_indexes = True - @rule(var=xrst.variables(dims=DIM_NAME, dtype=pandas_index_dtypes())) + @rule(var=xrst.variables(dims=DIM_NAME, dtype=pandas_index_dtypes()), target=dims) def add_dim_coord(self, var): (name,) = var.dims # dim coord self.dataset[name] = var # non-dim coord of same size; this allows renaming self.dataset[name + "_"] = var + return name @rule() @precondition( @@ -110,21 +120,20 @@ def unstack(self): else: self.dataset = self.dataset.unstack() - @rule(newname=UNIQUE_NAME) + @rule(newname=UNIQUE_NAME, oldname=consumes(dims)) @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) - def rename_vars(self, newname): + def rename_vars(self, newname, oldname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." - oldname = random.choice(tuple(get_dimension_coordinates(self.dataset))) self.check_default_indexes = False note(f"> renaming {oldname} to {newname}") self.dataset = self.dataset.rename_vars({oldname: newname}) - @rule() + @rule(dim=consumes(dims)) @precondition(lambda self: len(self.dataset._variables) >= 2) @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) - def swap_dims(self): + def swap_dims(self, dim): ds = self.dataset # need a dimension coordinate for swapping dim = random.choice(tuple(get_dimension_coordinates(ds))) From 6dcbd48dab8fa25b29cc4f573e0f2a1edf1c1524 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 09:44:10 -0600 Subject: [PATCH 26/69] Use unique_subset_of --- properties/test_index_manipulation.py | 48 +++++++++++++++++---------- 1 file changed, 30 insertions(+), 18 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index c3122316a57..417f95c7b46 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,4 +1,5 @@ import random +from collections.abc import Hashable import numpy as np import pytest @@ -16,6 +17,7 @@ Bundle, RuleBasedStateMachine, consumes, + given, invariant, precondition, rule, @@ -24,17 +26,17 @@ import xarray.testing.strategies as xrst -def get_not_multiindex_dims(ds: Dataset) -> set: +def get_not_multiindex_dims(ds: Dataset) -> tuple[Hashable]: dims = ds.dims mindexes = [ name for name, index in ds.xindexes.items() if isinstance(index, PandasMultiIndex) ] - return set(dims) - set(mindexes) + return tuple(set(dims) - set(mindexes)) -def get_multiindex_dims(ds: Dataset) -> list: +def get_multiindex_dims(ds: Dataset) -> list[Hashable]: mindexes = [ name for name, index in ds.xindexes.items() @@ -43,8 +45,8 @@ def get_multiindex_dims(ds: Dataset) -> list: return mindexes -def get_dimension_coordinates(ds: Dataset) -> set: - return set(ds.dims) & set(ds._variables) +def get_dimension_coordinates(ds: Dataset) -> tuple[Hashable]: + return tuple(set(ds.dims) & set(ds._variables)) @st.composite @@ -90,32 +92,39 @@ def add_dim_coord(self, var): return name @rule() + @given(st.data()) @precondition( lambda self: len(set(self.dataset.dims) & set(self.dataset.xindexes)) >= 1 ) - def reset_index(self): - dim = random.choice(tuple(set(self.dataset.dims) & set(self.dataset.xindexes))) + def reset_index(self, data): + dim = data.draw( + xrst.unique_subset_of( + tuple(set(self.dataset.dims) & set(self.dataset.xindexes)), + min_size=1, + max_size=1, + ) + ) self.check_default_indexes = False note(f"> resetting {dim}") self.dataset = self.dataset.reset_index(dim) @rule(newname=UNIQUE_NAME) + @given(data=st.data()) @precondition(lambda self: len(get_not_multiindex_dims(self.dataset)) >= 2) - def stack(self, newname): + def stack(self, data, newname): choices = list(get_not_multiindex_dims(self.dataset)) - # cannot stack repeated dims ('0', '0'), so random.choices isn't the best way to choose it - # Instead shuffle and pick the first two. - random.shuffle(choices) - oldnames = choices[:2] + oldnames = data.draw(xrst.unique_subset_of(choices, min_size=2, max_size=2)) note(f"> stacking {oldnames} as {newname}") self.dataset = self.dataset.stack({newname: oldnames}) @rule() - def unstack(self): + @given(data=st.data()) + def unstack(self, data): choices = get_multiindex_dims(self.dataset) if choices: - dim = random.choice(choices) + dim = data.draw(xrst.unique_subset_of(choices, min_size=1, max_size=1)) assume(self.dataset.xindexes[dim].index.is_unique) + note(f"> unstacking {dim}") self.dataset = self.dataset.unstack(dim) else: self.dataset = self.dataset.unstack() @@ -131,15 +140,18 @@ def rename_vars(self, newname, oldname): self.dataset = self.dataset.rename_vars({oldname: newname}) @rule(dim=consumes(dims)) + @given(data=st.data()) @precondition(lambda self: len(self.dataset._variables) >= 2) @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) - def swap_dims(self, dim): + def swap_dims(self, data, dim): ds = self.dataset # need a dimension coordinate for swapping - dim = random.choice(tuple(get_dimension_coordinates(ds))) + dim = data.draw(xrst.unique_subset_of(get_dimension_coordinates(ds))) # Can only swap to a variable with the same dim - to = random.choice( - [name for name, var in ds._variables.items() if var.dims == (dim,)] + to = data.draw( + xrst.unique_subset_of( + [name for name, var in ds._variables.items() if var.dims == (dim,)] + ) ) # TODO: swapping a dimension to itself # TODO: swapping from Index to a MultiIndex level From 0fabd6bc81c39c32dbda3868e0f37c97d0816e21 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 11:34:58 -0600 Subject: [PATCH 27/69] Use Bundles more --- properties/test_index_manipulation.py | 76 ++++++++++++--------------- 1 file changed, 34 insertions(+), 42 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 417f95c7b46..08ca392f59c 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -17,8 +17,8 @@ Bundle, RuleBasedStateMachine, consumes, - given, invariant, + multiple, precondition, rule, ) @@ -75,14 +75,18 @@ def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: class DatasetStateMachine(RuleBasedStateMachine): - dims = Bundle("dims") + indexed_dims = Bundle("indexed_dims") + multi_indexed_dims = Bundle("multi_indexed_dims") def __init__(self): super().__init__() self.dataset = Dataset() self.check_default_indexes = True - @rule(var=xrst.variables(dims=DIM_NAME, dtype=pandas_index_dtypes()), target=dims) + @rule( + var=xrst.variables(dims=DIM_NAME, dtype=pandas_index_dtypes()), + target=indexed_dims, + ) def add_dim_coord(self, var): (name,) = var.dims # dim coord @@ -91,46 +95,37 @@ def add_dim_coord(self, var): self.dataset[name + "_"] = var return name - @rule() - @given(st.data()) - @precondition( - lambda self: len(set(self.dataset.dims) & set(self.dataset.xindexes)) >= 1 - ) - def reset_index(self, data): - dim = data.draw( - xrst.unique_subset_of( - tuple(set(self.dataset.dims) & set(self.dataset.xindexes)), - min_size=1, - max_size=1, - ) - ) + @rule(dim=st.one_of(consumes(indexed_dims), consumes(multi_indexed_dims))) + def reset_index(self, dim): self.check_default_indexes = False note(f"> resetting {dim}") self.dataset = self.dataset.reset_index(dim) - @rule(newname=UNIQUE_NAME) - @given(data=st.data()) - @precondition(lambda self: len(get_not_multiindex_dims(self.dataset)) >= 2) - def stack(self, data, newname): - choices = list(get_not_multiindex_dims(self.dataset)) - oldnames = data.draw(xrst.unique_subset_of(choices, min_size=2, max_size=2)) + @rule( + newname=UNIQUE_NAME, + oldnames=st.lists(consumes(indexed_dims), min_size=1), + target=multi_indexed_dims, + ) + def stack(self, newname, oldnames): note(f"> stacking {oldnames} as {newname}") self.dataset = self.dataset.stack({newname: oldnames}) - - @rule() - @given(data=st.data()) - def unstack(self, data): - choices = get_multiindex_dims(self.dataset) - if choices: - dim = data.draw(xrst.unique_subset_of(choices, min_size=1, max_size=1)) - assume(self.dataset.xindexes[dim].index.is_unique) - note(f"> unstacking {dim}") - self.dataset = self.dataset.unstack(dim) + return newname + + # TODO: add st.none() to dim + @rule(dim=consumes(multi_indexed_dims), target=indexed_dims) + def unstack(self, dim): + if dim is not None: + pd_index = self.dataset.xindexes[dim].index + assume(pd_index.is_unique) + note(f"> unstacking {dim}") + self.dataset = self.dataset.unstack(dim) + if dim is not None: + return multiple(pd_index.names) else: - self.dataset = self.dataset.unstack() + # TODO Fix this when adding st.none() + return multiple() - @rule(newname=UNIQUE_NAME, oldname=consumes(dims)) - @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) + @rule(newname=UNIQUE_NAME, oldname=consumes(indexed_dims)) def rename_vars(self, newname, oldname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key @@ -139,17 +134,13 @@ def rename_vars(self, newname, oldname): note(f"> renaming {oldname} to {newname}") self.dataset = self.dataset.rename_vars({oldname: newname}) - @rule(dim=consumes(dims)) - @given(data=st.data()) + @rule(data=st.data(), dim=consumes(indexed_dims), target=indexed_dims) @precondition(lambda self: len(self.dataset._variables) >= 2) - @precondition(lambda self: bool(get_dimension_coordinates(self.dataset))) def swap_dims(self, data, dim): ds = self.dataset - # need a dimension coordinate for swapping - dim = data.draw(xrst.unique_subset_of(get_dimension_coordinates(ds))) # Can only swap to a variable with the same dim to = data.draw( - xrst.unique_subset_of( + st.sampled_from( [name for name, var in ds._variables.items() if var.dims == (dim,)] ) ) @@ -158,6 +149,7 @@ def swap_dims(self, data, dim): # TODO: swapping from MultiIndex to a level of the same MultiIndex note(f"> swapping {dim} to {to}") self.dataset = ds.swap_dims({dim: to}) + return to # TODO: enable when we have serializable attrs only # @rule() @@ -172,7 +164,7 @@ def swap_dims(self, data, dim): @invariant() def assert_invariants(self): - note(f"> ===\n\n {self.dataset!r} \n===\n\n") + # note(f"> ===\n\n {self.dataset!r} \n===\n\n") _assert_internal_invariants(self.dataset, self.check_default_indexes) From e65a7a9723ee5bd6053685173ef93df99531d906 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 13:00:18 -0600 Subject: [PATCH 28/69] Add index_variables strategy --- properties/test_index_manipulation.py | 16 +-------------- xarray/testing/strategies.py | 28 +++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 15 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 08ca392f59c..94974093781 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,7 +1,6 @@ import random from collections.abc import Hashable -import numpy as np import pytest from xarray import Dataset @@ -10,7 +9,6 @@ pytest.importorskip("hypothesis") -import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import assume, note, settings from hypothesis.stateful import ( @@ -65,15 +63,6 @@ def unique(draw, strategy): DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) -# TODO: add datetime64[ns] -def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: - return ( - npst.integer_dtypes(endianness="<", sizes=(32, 64)) - | npst.unsigned_integer_dtypes(endianness="<", sizes=(32, 64)) - | npst.floating_dtypes(endianness="<", sizes=(32, 64)) - ) - - class DatasetStateMachine(RuleBasedStateMachine): indexed_dims = Bundle("indexed_dims") multi_indexed_dims = Bundle("multi_indexed_dims") @@ -83,10 +72,7 @@ def __init__(self): self.dataset = Dataset() self.check_default_indexes = True - @rule( - var=xrst.variables(dims=DIM_NAME, dtype=pandas_index_dtypes()), - target=indexed_dims, - ) + @rule(var=xrst.index_variables(dims=DIM_NAME), target=indexed_dims) def add_dim_coord(self, var): (name,) = var.dims # dim coord diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 18cb25a26dd..3e8ea13c20e 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -9,6 +9,7 @@ ) from e import hypothesis.extra.numpy as npst +import hypothesis.extra.pandas as pdst import numpy as np from hypothesis.errors import InvalidArgument @@ -26,6 +27,7 @@ "dimension_sizes", "attrs", "variables", + "index_variables", "unique_subset_of", ] @@ -62,6 +64,15 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: ) +# TODO: add datetime64[ns] +def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: + return ( + npst.integer_dtypes(endianness="<", sizes=(32, 64)) + | npst.unsigned_integer_dtypes(endianness="<", sizes=(32, 64)) + | npst.floating_dtypes(endianness="<", sizes=(32, 64)) + ) + + # TODO Generalize to all valid unicode characters once formatting bugs in xarray's reprs are fixed + docs can handle it. _readable_characters = st.characters( categories=["L", "N"], max_codepoint=0x017F @@ -382,6 +393,23 @@ def variables( return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) +@st.composite +def index_variables( + draw: st.DrawFn, + *, + dims: Union[ + st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], + None, + ] = None, + dtype: st.SearchStrategy[np.dtype] = pandas_index_dtypes(), + attrs: st.SearchStrategy[Mapping] = attrs(), +) -> xr.Variable: + + index = draw(pdst.indexes(min_size=1, dtype=draw(dtype))) + _dims = draw(dimension_names(min_dims=1, max_dims=1)) + return xr.Variable(dims=_dims, data=index, attrs=draw(attrs)) + + @overload def unique_subset_of( objs: Sequence[Hashable], From 98259378ab6749e48bab5a8c654ee6eab623256f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 13:00:44 -0600 Subject: [PATCH 29/69] Small improvement --- properties/test_index_manipulation.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 94974093781..6468c442ef3 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -124,12 +124,13 @@ def rename_vars(self, newname, oldname): @precondition(lambda self: len(self.dataset._variables) >= 2) def swap_dims(self, data, dim): ds = self.dataset + choices = [name for name, var in ds._variables.items() if var.dims == (dim,)] + # TODO: is there a better way to skip if choices == [] + # note(choices) + # if not choices: + # return dim # Can only swap to a variable with the same dim - to = data.draw( - st.sampled_from( - [name for name, var in ds._variables.items() if var.dims == (dim,)] - ) - ) + to = data.draw(st.sampled_from(choices)) # TODO: swapping a dimension to itself # TODO: swapping from Index to a MultiIndex level # TODO: swapping from MultiIndex to a level of the same MultiIndex From d933be3d83e7bee5cb67e4527f22d8871806b314 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 18 Mar 2024 13:17:09 -0600 Subject: [PATCH 30/69] fix --- properties/test_index_manipulation.py | 2 +- xarray/testing/strategies.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 6468c442ef3..8636b0cf2c1 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -89,7 +89,7 @@ def reset_index(self, dim): @rule( newname=UNIQUE_NAME, - oldnames=st.lists(consumes(indexed_dims), min_size=1), + oldnames=st.lists(consumes(indexed_dims), min_size=1, unique=True), target=multi_indexed_dims, ) def stack(self, newname, oldnames): diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 3e8ea13c20e..a1fb049a46f 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -406,7 +406,9 @@ def index_variables( ) -> xr.Variable: index = draw(pdst.indexes(min_size=1, dtype=draw(dtype))) - _dims = draw(dimension_names(min_dims=1, max_dims=1)) + if dims is None: + dims = dimension_names(min_dims=1, max_dims=1) + _dims = draw(dims) return xr.Variable(dims=_dims, data=index, attrs=draw(attrs)) From 50f60308fdbb5e04cc3e5263bb5283e25c82159d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Mar 2024 09:05:13 -0600 Subject: [PATCH 31/69] Use st.shared --- properties/test_index_manipulation.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 8636b0cf2c1..beda8c55bde 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -58,9 +58,9 @@ def unique(draw, strategy): random.seed(123456) -# Share to ensure we get unique names on each draw? -UNIQUE_NAME = unique(strategy=xrst.names()) -DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) +# This strategy will be shared to ensure we get unique names on each draw. +# So we don't rename to an already present dimension name, for example. +unique_names = unique(strategy=xrst.names()) class DatasetStateMachine(RuleBasedStateMachine): @@ -72,7 +72,14 @@ def __init__(self): self.dataset = Dataset() self.check_default_indexes = True - @rule(var=xrst.index_variables(dims=DIM_NAME), target=indexed_dims) + @rule( + var=xrst.index_variables( + dims=xrst.dimension_names( + name_strategy=st.shared(unique_names), min_dims=1, max_dims=1 + ) + ), + target=indexed_dims, + ) def add_dim_coord(self, var): (name,) = var.dims # dim coord @@ -88,7 +95,7 @@ def reset_index(self, dim): self.dataset = self.dataset.reset_index(dim) @rule( - newname=UNIQUE_NAME, + newname=st.shared(unique_names), oldnames=st.lists(consumes(indexed_dims), min_size=1, unique=True), target=multi_indexed_dims, ) @@ -111,7 +118,7 @@ def unstack(self, dim): # TODO Fix this when adding st.none() return multiple() - @rule(newname=UNIQUE_NAME, oldname=consumes(indexed_dims)) + @rule(newname=st.shared(unique_names), oldname=consumes(indexed_dims)) def rename_vars(self, newname, oldname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key From 4905b1caeef4484cf519fd3665886b7b2ff815c6 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Mar 2024 09:10:20 -0600 Subject: [PATCH 32/69] Revert "Use st.shared" This reverts commit 50f60308fdbb5e04cc3e5263bb5283e25c82159d. --- properties/test_index_manipulation.py | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index beda8c55bde..8636b0cf2c1 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -58,9 +58,9 @@ def unique(draw, strategy): random.seed(123456) -# This strategy will be shared to ensure we get unique names on each draw. -# So we don't rename to an already present dimension name, for example. -unique_names = unique(strategy=xrst.names()) +# Share to ensure we get unique names on each draw? +UNIQUE_NAME = unique(strategy=xrst.names()) +DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) class DatasetStateMachine(RuleBasedStateMachine): @@ -72,14 +72,7 @@ def __init__(self): self.dataset = Dataset() self.check_default_indexes = True - @rule( - var=xrst.index_variables( - dims=xrst.dimension_names( - name_strategy=st.shared(unique_names), min_dims=1, max_dims=1 - ) - ), - target=indexed_dims, - ) + @rule(var=xrst.index_variables(dims=DIM_NAME), target=indexed_dims) def add_dim_coord(self, var): (name,) = var.dims # dim coord @@ -95,7 +88,7 @@ def reset_index(self, dim): self.dataset = self.dataset.reset_index(dim) @rule( - newname=st.shared(unique_names), + newname=UNIQUE_NAME, oldnames=st.lists(consumes(indexed_dims), min_size=1, unique=True), target=multi_indexed_dims, ) @@ -118,7 +111,7 @@ def unstack(self, dim): # TODO Fix this when adding st.none() return multiple() - @rule(newname=st.shared(unique_names), oldname=consumes(indexed_dims)) + @rule(newname=UNIQUE_NAME, oldname=consumes(indexed_dims)) def rename_vars(self, newname, oldname): # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key From 4391833bb4ab7b085a584372ca263961c718b3a8 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Tue, 19 Mar 2024 09:15:23 -0600 Subject: [PATCH 33/69] fix unstacking --- properties/test_index_manipulation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 8636b0cf2c1..dd82965ae2a 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -106,7 +106,7 @@ def unstack(self, dim): note(f"> unstacking {dim}") self.dataset = self.dataset.unstack(dim) if dim is not None: - return multiple(pd_index.names) + return multiple(*pd_index.names) else: # TODO Fix this when adding st.none() return multiple() From dcc2e488619639194f5bc36a4cf49efb7569726d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 20 Mar 2024 19:28:50 -0600 Subject: [PATCH 34/69] cleanup --- properties/test_index_manipulation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index dd82965ae2a..b1f5872f5df 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,4 +1,3 @@ -import random from collections.abc import Hashable import pytest @@ -56,8 +55,6 @@ def unique(draw, strategy): ) -random.seed(123456) - # Share to ensure we get unique names on each draw? UNIQUE_NAME = unique(strategy=xrst.names()) DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) From 01e0670161270765845ae5b8593ca7780240879f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 25 Mar 2024 09:30:56 -0600 Subject: [PATCH 35/69] WIP --- properties/test_index_manipulation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index b1f5872f5df..58c11d192ab 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -59,7 +59,10 @@ def unique(draw, strategy): UNIQUE_NAME = unique(strategy=xrst.names()) DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) +from hypothesis import seed + +@seed(222637475654255579925165578590114755457) class DatasetStateMachine(RuleBasedStateMachine): indexed_dims = Bundle("indexed_dims") multi_indexed_dims = Bundle("multi_indexed_dims") From 360b926b739133750d1145b4e34ca8878b731e49 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 16:03:21 -0600 Subject: [PATCH 36/69] Remove bundles --- properties/test_index_manipulation.py | 153 ++++++++++++++------------ 1 file changed, 85 insertions(+), 68 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 58c11d192ab..aef575dd6bc 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,21 +1,17 @@ -from collections.abc import Hashable +import itertools import pytest from xarray import Dataset -from xarray.indexes import PandasMultiIndex from xarray.testing import _assert_internal_invariants pytest.importorskip("hypothesis") import hypothesis.strategies as st -from hypothesis import assume, note, settings +from hypothesis import note, settings from hypothesis.stateful import ( - Bundle, RuleBasedStateMachine, - consumes, invariant, - multiple, precondition, rule, ) @@ -23,29 +19,6 @@ import xarray.testing.strategies as xrst -def get_not_multiindex_dims(ds: Dataset) -> tuple[Hashable]: - dims = ds.dims - mindexes = [ - name - for name, index in ds.xindexes.items() - if isinstance(index, PandasMultiIndex) - ] - return tuple(set(dims) - set(mindexes)) - - -def get_multiindex_dims(ds: Dataset) -> list[Hashable]: - mindexes = [ - name - for name, index in ds.xindexes.items() - if isinstance(index, PandasMultiIndex) - ] - return mindexes - - -def get_dimension_coordinates(ds: Dataset) -> tuple[Hashable]: - return tuple(set(ds.dims) & set(ds._variables)) - - @st.composite def unique(draw, strategy): # https://stackoverflow.com/questions/73737073/create-hypothesis-strategy-that-returns-unique-values @@ -59,60 +32,87 @@ def unique(draw, strategy): UNIQUE_NAME = unique(strategy=xrst.names()) DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) -from hypothesis import seed - -@seed(222637475654255579925165578590114755457) class DatasetStateMachine(RuleBasedStateMachine): - indexed_dims = Bundle("indexed_dims") - multi_indexed_dims = Bundle("multi_indexed_dims") + # Can't use bundles because we'd need pre-conditions on consumes(bundle) + # indexed_dims = Bundle("indexed_dims") + # multi_indexed_dims = Bundle("multi_indexed_dims") def __init__(self): super().__init__() self.dataset = Dataset() self.check_default_indexes = True - @rule(var=xrst.index_variables(dims=DIM_NAME), target=indexed_dims) + # We track these separately as lists so we can guarantee order of iteration over them. + # Order of iteration over Dataset.dims is not guaranteed + self.indexed_dims = [] + self.multi_indexed_dims = [] + + @rule(var=xrst.index_variables(dims=DIM_NAME)) def add_dim_coord(self, var): (name,) = var.dims # dim coord self.dataset[name] = var # non-dim coord of same size; this allows renaming self.dataset[name + "_"] = var - return name - @rule(dim=st.one_of(consumes(indexed_dims), consumes(multi_indexed_dims))) - def reset_index(self, dim): + self.indexed_dims.append(name) + + @property + def has_dims(self) -> bool: + return bool(self.indexed_dims + self.multi_indexed_dims) + + @rule(data=st.data()) + @precondition(lambda self: self.has_dims) + def reset_index(self, data): + dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims)) self.check_default_indexes = False note(f"> resetting {dim}") self.dataset = self.dataset.reset_index(dim) - @rule( - newname=UNIQUE_NAME, - oldnames=st.lists(consumes(indexed_dims), min_size=1, unique=True), - target=multi_indexed_dims, - ) - def stack(self, newname, oldnames): + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + @rule(newname=UNIQUE_NAME, data=st.data()) + @precondition(lambda self: bool(self.indexed_dims)) + def stack(self, newname, data): + oldnames = data.draw( + st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) + ) note(f"> stacking {oldnames} as {newname}") self.dataset = self.dataset.stack({newname: oldnames}) - return newname - # TODO: add st.none() to dim - @rule(dim=consumes(multi_indexed_dims), target=indexed_dims) - def unstack(self, dim): + self.multi_indexed_dims += [newname] + for dim in oldnames: + del self.indexed_dims[self.indexed_dims.index(dim)] + + @rule(data=st.data()) + @precondition(lambda self: bool(self.multi_indexed_dims)) + def unstack(self, data): + # TODO: add None + dim = data.draw(st.sampled_from(self.multi_indexed_dims)) + note(f"> unstacking {dim}") if dim is not None: pd_index = self.dataset.xindexes[dim].index - assume(pd_index.is_unique) - note(f"> unstacking {dim}") self.dataset = self.dataset.unstack(dim) + + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + if dim is not None: - return multiple(*pd_index.names) + pd_index = self.dataset.xindexes[dim].index + self.indexed_dims.extend(pd_index.names) else: - # TODO Fix this when adding st.none() - return multiple() - - @rule(newname=UNIQUE_NAME, oldname=consumes(indexed_dims)) - def rename_vars(self, newname, oldname): + # TODO: fix this + pass + + @rule(newname=UNIQUE_NAME, data=st.data()) + @precondition(lambda self: self.has_dims) + def rename_vars(self, newname, data): + oldname = data.draw( + st.sampled_from(self.indexed_dims + self.multi_indexed_dims) + ) # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." @@ -120,23 +120,40 @@ def rename_vars(self, newname, oldname): note(f"> renaming {oldname} to {newname}") self.dataset = self.dataset.rename_vars({oldname: newname}) - @rule(data=st.data(), dim=consumes(indexed_dims), target=indexed_dims) - @precondition(lambda self: len(self.dataset._variables) >= 2) - def swap_dims(self, data, dim): + dim = oldname + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + + @property + def swappable_dims(self): + options = [] + for dim in self.indexed_dims: + choices = [ + name + for name, var in self.dataset._variables.items() + if var.dims == (dim,) + ] + options.extend( + (a, b) for a, b in itertools.zip_longest((dim,), choices, fillvalue=dim) + ) + note(f"found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}") + return options + + @rule(data=st.data()) + @precondition(lambda self: bool(self.swappable_dims)) + def swap_dims(self, data): ds = self.dataset - choices = [name for name, var in ds._variables.items() if var.dims == (dim,)] - # TODO: is there a better way to skip if choices == [] - # note(choices) - # if not choices: - # return dim - # Can only swap to a variable with the same dim - to = data.draw(st.sampled_from(choices)) + dim, to = data.draw(st.sampled_from(self.swappable_dims)) # TODO: swapping a dimension to itself # TODO: swapping from Index to a MultiIndex level # TODO: swapping from MultiIndex to a level of the same MultiIndex note(f"> swapping {dim} to {to}") self.dataset = ds.swap_dims({dim: to}) - return to + + del self.indexed_dims[self.indexed_dims.index(dim)] + self.indexed_dims += [to] # TODO: enable when we have serializable attrs only # @rule() @@ -155,5 +172,5 @@ def assert_invariants(self): _assert_internal_invariants(self.dataset, self.check_default_indexes) -DatasetStateMachine.TestCase.settings = settings(max_examples=1000, deadline=None) +DatasetStateMachine.TestCase.settings = settings(max_examples=200, deadline=None) DatasetTest = DatasetStateMachine.TestCase From de1d1e4c5b476acfe44bcc75e5986640d9c613fd Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 16:21:08 -0600 Subject: [PATCH 37/69] Fixes --- properties/test_index_manipulation.py | 27 ++++++++++++++------------- xarray/testing/strategies.py | 20 -------------------- 2 files changed, 14 insertions(+), 33 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index aef575dd6bc..ac1c32bb9d0 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -28,7 +28,9 @@ def unique(draw, strategy): ) -# Share to ensure we get unique names on each draw? +# Share to ensure we get unique names on each draw, +# so we don't try to add two variables with the same name +# or stack to a dimension with a name that already exists in the Dataset. UNIQUE_NAME = unique(strategy=xrst.names()) DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) @@ -101,7 +103,6 @@ def unstack(self, data): del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] if dim is not None: - pd_index = self.dataset.xindexes[dim].index self.indexed_dims.extend(pd_index.names) else: # TODO: fix this @@ -110,17 +111,14 @@ def unstack(self, data): @rule(newname=UNIQUE_NAME, data=st.data()) @precondition(lambda self: self.has_dims) def rename_vars(self, newname, data): - oldname = data.draw( - st.sampled_from(self.indexed_dims + self.multi_indexed_dims) - ) + dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims)) # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." self.check_default_indexes = False - note(f"> renaming {oldname} to {newname}") - self.dataset = self.dataset.rename_vars({oldname: newname}) + note(f"> renaming {dim} to {newname}") + self.dataset = self.dataset.rename_vars({dim: newname}) - dim = oldname if dim in self.indexed_dims: del self.indexed_dims[self.indexed_dims.index(dim)] elif dim in self.multi_indexed_dims: @@ -134,22 +132,25 @@ def swappable_dims(self): name for name, var in self.dataset._variables.items() if var.dims == (dim,) + # TODO: allow swapping a dimension to itself + and name != dim ] options.extend( (a, b) for a, b in itertools.zip_longest((dim,), choices, fillvalue=dim) ) - note(f"found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}") return options @rule(data=st.data()) @precondition(lambda self: bool(self.swappable_dims)) def swap_dims(self, data): ds = self.dataset - dim, to = data.draw(st.sampled_from(self.swappable_dims)) - # TODO: swapping a dimension to itself + options = self.swappable_dims + dim, to = data.draw(st.sampled_from(options)) # TODO: swapping from Index to a MultiIndex level # TODO: swapping from MultiIndex to a level of the same MultiIndex - note(f"> swapping {dim} to {to}") + note( + f"> swapping {dim} to {to}, found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}" + ) self.dataset = ds.swap_dims({dim: to}) del self.indexed_dims[self.indexed_dims.index(dim)] @@ -168,7 +169,7 @@ def swap_dims(self, data): @invariant() def assert_invariants(self): - # note(f"> ===\n\n {self.dataset!r} \n===\n\n") + note(f"> ===\n\n {self.dataset!r} \n===\n\n") _assert_internal_invariants(self.dataset, self.check_default_indexes) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index a1fb049a46f..487edb95f86 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -197,26 +197,6 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: Requires the hypothesis package to be installed. - See Also - -------- - :ref:`testing.hypothesis`_ - """ - return st.recursive( - st.dictionaries(_attr_keys, _attr_values), - lambda children: st.dictionaries(_attr_keys, children), - max_leaves=3, - ) - - -def serializable_attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: - """ - Generates arbitrary valid attributes dictionaries for xarray objects. - - These are intended to be serialized, and so, are less general than the - `attrs` function above. - - Requires the hypothesis package to be installed. - See Also -------- :ref:`testing.hypothesis`_ From ef51ad4e5ef387c6e4cf1c82089e59daa9d32057 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 16:40:52 -0600 Subject: [PATCH 38/69] Add hypothesis cache to CI --- .github/workflows/ci.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a37ff876e20..d69e919318c 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -127,6 +127,13 @@ jobs: run: | python -c "import xarray" + - name: Restore cached hypothesis directory + uses: actions/cache@v4 + with: + path: .hypothesis/ + key: cache-hypothesis + enableCrossOsArchive: true + - name: Run tests run: python -m pytest -n 4 --timeout 180 From 829f80c697aa906b75501005dbfd1a72259fa2a5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 16:49:03 -0600 Subject: [PATCH 39/69] Prevent index variables with NaNs, infs --- xarray/testing/strategies.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 487edb95f86..7b32f3aed99 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -384,8 +384,10 @@ def index_variables( dtype: st.SearchStrategy[np.dtype] = pandas_index_dtypes(), attrs: st.SearchStrategy[Mapping] = attrs(), ) -> xr.Variable: - - index = draw(pdst.indexes(min_size=1, dtype=draw(dtype))) + elements = npst.from_dtype( + dtype=draw(dtype), allow_nan=False, allow_infinity=False, allow_subnormal=False + ) + index = draw(pdst.indexes(elements=elements, min_size=1)) if dims is None: dims = dimension_names(min_dims=1, max_dims=1) _dims = draw(dims) From 6a38e271eed61ecb943f60d3aed0875d0437de7b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 16:41:20 -0600 Subject: [PATCH 40/69] [revert] --- .github/workflows/ci.yaml | 41 +++++++++++++++++---------------------- pyproject.toml | 2 +- 2 files changed, 19 insertions(+), 24 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index d69e919318c..a5e99c71922 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,25 +42,25 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest", "macos-latest", "windows-latest"] + os: ["ubuntu-latest"] # Bookend python versions - python-version: ["3.9", "3.11", "3.12"] + python-version: ["3.12"] env: [""] - include: - # Minimum python version: - - env: "bare-minimum" - python-version: "3.9" - os: ubuntu-latest - - env: "min-all-deps" - python-version: "3.9" - os: ubuntu-latest - # Latest python version: - - env: "all-but-dask" - python-version: "3.10" - os: ubuntu-latest - - env: "flaky" - python-version: "3.10" - os: ubuntu-latest + # include: + # # Minimum python version: + # - env: "bare-minimum" + # python-version: "3.9" + # os: ubuntu-latest + # - env: "min-all-deps" + # python-version: "3.9" + # os: ubuntu-latest + # # Latest python version: + # - env: "all-but-dask" + # python-version: "3.10" + # os: ubuntu-latest + # - env: "flaky" + # python-version: "3.10" + # os: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -135,12 +135,7 @@ jobs: enableCrossOsArchive: true - name: Run tests - run: python -m pytest -n 4 - --timeout 180 - --cov=xarray - --cov-report=xml - --junitxml=pytest.xml - $PYTEST_EXTRA_FLAGS + run: python -m pytest - name: Upload test results if: always() diff --git a/pyproject.toml b/pyproject.toml index d2a5c6b8748..995537525d7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -298,7 +298,7 @@ markers = [ ] minversion = "7" python_files = "test_*.py" -testpaths = ["xarray/tests", "properties"] +testpaths = ["properties"] [tool.aliases] test = "pytest" From ffb3d8313206ed5aed3c1ac0cd91fb72b8f89bf5 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 17:46:48 -0600 Subject: [PATCH 41/69] Always save hypothesis cache --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index a5e99c71922..771c27de5ca 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -133,6 +133,7 @@ jobs: path: .hypothesis/ key: cache-hypothesis enableCrossOsArchive: true + save-always: true - name: Run tests run: python -m pytest From b33239023485b83d3d42e4a70d6e984b3a8b75b9 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 21:02:46 -0600 Subject: [PATCH 42/69] Expand dtypes --- xarray/testing/strategies.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 7b32f3aed99..d6ebcc7cf32 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -61,15 +61,20 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: | npst.unsigned_integer_dtypes() | npst.floating_dtypes() | npst.complex_number_dtypes() + | npst.datetime64_dtypes() + | npst.timedelta64_dtypes() + | npst.unicode_string_dtypes() ) -# TODO: add datetime64[ns] def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: return ( npst.integer_dtypes(endianness="<", sizes=(32, 64)) | npst.unsigned_integer_dtypes(endianness="<", sizes=(32, 64)) | npst.floating_dtypes(endianness="<", sizes=(32, 64)) + | npst.datetime64_dtypes(endianness="<") + | npst.timedelta64_dtypes(endianness="<") + | npst.unicode_string_dtypes(endianness="<") ) From 4a958dcf0fb7acab3d5123b3bd2d4138fb522f98 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Fri, 29 Mar 2024 21:14:51 -0600 Subject: [PATCH 43/69] Add invariant check for #8646 --- xarray/testing/assertions.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 6418eb79b8b..0a89da3f70e 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -268,6 +268,10 @@ def _assert_indexes_invariants_checks( } assert indexes.keys() <= index_vars, (set(indexes), index_vars) + for k, v in possible_coord_variables.items(): + if isinstance(v, IndexVariable): + assert k == v.name, (k, v.name) + # check pandas index wrappers vs. coordinate data adapters for k, index in indexes.items(): if isinstance(index, PandasIndex): From 2d5ea84e306469a7ad103ad649b1e2a3ff837792 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:02:57 -0600 Subject: [PATCH 44/69] Add drop_dims --- properties/test_index_manipulation.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index ac1c32bb9d0..2b7510ec609 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -124,6 +124,23 @@ def rename_vars(self, newname, data): elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + @precondition(lambda self: self.has_dims) + @rule(data=st.data()) + def drop_dims(self, data): + dims = data.draw( + st.lists( + st.sampled_from(self.indexed_dims + self.multi_indexed_dims), min_size=1 + ) + ) + note(f"> dropping {dims}") + self.dataset = self.dataset.drop_dims(dims) + + for dim in dims: + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + @property def swappable_dims(self): options = [] From 35b8656920b150fd47f3880cc6bd4927bf781f33 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:11:28 -0600 Subject: [PATCH 45/69] Add create_index to stack --- properties/test_index_manipulation.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 2b7510ec609..abb3fd6ea32 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -77,16 +77,21 @@ def reset_index(self, data): elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] - @rule(newname=UNIQUE_NAME, data=st.data()) + @rule(newname=UNIQUE_NAME, data=st.data(), create_index=st.booleans()) @precondition(lambda self: bool(self.indexed_dims)) - def stack(self, newname, data): + def stack(self, newname, data, create_index): oldnames = data.draw( st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) ) note(f"> stacking {oldnames} as {newname}") - self.dataset = self.dataset.stack({newname: oldnames}) + self.dataset = self.dataset.stack( + {newname: oldnames}, create_index=create_index + ) + + if create_index: + self.multi_indexed_dims += [newname] - self.multi_indexed_dims += [newname] + # if create_index is False, then we just drop these for dim in oldnames: del self.indexed_dims[self.indexed_dims.index(dim)] From a4974c2e50d476ae6fe04efd344fa94dc86dbbc4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:17:18 -0600 Subject: [PATCH 46/69] Generalize a bit --- properties/test_index_manipulation.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index abb3fd6ea32..0b3aae7a7c5 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -61,11 +61,11 @@ def add_dim_coord(self, var): self.indexed_dims.append(name) @property - def has_dims(self) -> bool: + def has_indexed_dims(self) -> bool: return bool(self.indexed_dims + self.multi_indexed_dims) @rule(data=st.data()) - @precondition(lambda self: self.has_dims) + @precondition(lambda self: self.has_indexed_dims) def reset_index(self, data): dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims)) self.check_default_indexes = False @@ -114,9 +114,9 @@ def unstack(self, data): pass @rule(newname=UNIQUE_NAME, data=st.data()) - @precondition(lambda self: self.has_dims) + @precondition(lambda self: bool(self.dataset.variables)) def rename_vars(self, newname, data): - dim = data.draw(st.sampled_from(self.indexed_dims + self.multi_indexed_dims)) + dim = data.draw(st.sampled_from(sorted(self.dataset.variables))) # benbovy: "skip the default indexes invariant test when the name of an # existing dimension coordinate is passed as input kwarg or dict key # to .rename_vars()." @@ -129,13 +129,11 @@ def rename_vars(self, newname, data): elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] - @precondition(lambda self: self.has_dims) + @precondition(lambda self: bool(self.dataset.dims)) @rule(data=st.data()) def drop_dims(self, data): dims = data.draw( - st.lists( - st.sampled_from(self.indexed_dims + self.multi_indexed_dims), min_size=1 - ) + st.lists(st.sampled_from(sorted(tuple(self.dataset.dims))), min_size=1) ) note(f"> dropping {dims}") self.dataset = self.dataset.drop_dims(dims) From 7e98388128ba9c9a0e56cbe1cd6751469be6437a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:20:21 -0600 Subject: [PATCH 47/69] limit number of indexes to stack --- properties/test_index_manipulation.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 0b3aae7a7c5..b1227cbeaf4 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -81,7 +81,12 @@ def reset_index(self, data): @precondition(lambda self: bool(self.indexed_dims)) def stack(self, newname, data, create_index): oldnames = data.draw( - st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) + st.lists( + st.sampled_from(self.indexed_dims), + min_size=1, + max_size=3 if create_index else None, + unique=True, + ) ) note(f"> stacking {oldnames} as {newname}") self.dataset = self.dataset.stack( From 0762a9f065e97e5ca05a1e1f0e756f5bd3b051db Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:24:33 -0600 Subject: [PATCH 48/69] Fix endianness? --- xarray/testing/strategies.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index d6ebcc7cf32..5d90cfdb465 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -69,12 +69,12 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: return ( - npst.integer_dtypes(endianness="<", sizes=(32, 64)) - | npst.unsigned_integer_dtypes(endianness="<", sizes=(32, 64)) - | npst.floating_dtypes(endianness="<", sizes=(32, 64)) - | npst.datetime64_dtypes(endianness="<") - | npst.timedelta64_dtypes(endianness="<") - | npst.unicode_string_dtypes(endianness="<") + npst.integer_dtypes(endianness="=", sizes=(32, 64)) + | npst.unsigned_integer_dtypes(endianness="=", sizes=(32, 64)) + | npst.floating_dtypes(endianness="=", sizes=(32, 64)) + | npst.datetime64_dtypes(endianness="=") + | npst.timedelta64_dtypes(endianness="=") + | npst.unicode_string_dtypes(endianness="=") ) From ef7cfdd9dbb2845f4372f0bb1c7ed672c30d0c0a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:27:25 -0600 Subject: [PATCH 49/69] uniquify drop_dims --- properties/test_index_manipulation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index b1227cbeaf4..cae01dfda89 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -138,7 +138,11 @@ def rename_vars(self, newname, data): @rule(data=st.data()) def drop_dims(self, data): dims = data.draw( - st.lists(st.sampled_from(sorted(tuple(self.dataset.dims))), min_size=1) + st.lists( + st.sampled_from(sorted(tuple(self.dataset.dims))), + min_size=1, + unique=True, + ) ) note(f"> dropping {dims}") self.dataset = self.dataset.drop_dims(dims) From 18b0d6f074c3c0bd296c540b7b8cea49ca3edeb1 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:47:18 -0600 Subject: [PATCH 50/69] Avoid NaTs in index vars https://github.com/HypothesisWorks/hypothesis/issues/3943 --- properties/test_index_manipulation.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index cae01dfda89..318b13d778d 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -1,5 +1,6 @@ import itertools +import numpy as np import pytest from xarray import Dataset @@ -8,7 +9,7 @@ pytest.importorskip("hypothesis") import hypothesis.strategies as st -from hypothesis import note, settings +from hypothesis import assume, note, settings from hypothesis.stateful import ( RuleBasedStateMachine, invariant, @@ -52,6 +53,9 @@ def __init__(self): @rule(var=xrst.index_variables(dims=DIM_NAME)) def add_dim_coord(self, var): + # https://github.com/HypothesisWorks/hypothesis/issues/3943 + assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) + (name,) = var.dims # dim coord self.dataset[name] = var From de53f930e9310a1fcdf13fcf87a110160e1d3e0c Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:47:33 -0600 Subject: [PATCH 51/69] Guard swap_dims --- properties/test_index_manipulation.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 318b13d778d..547d7557edf 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -159,13 +159,14 @@ def drop_dims(self, data): @property def swappable_dims(self): + ds = self.dataset options = [] for dim in self.indexed_dims: choices = [ name - for name, var in self.dataset._variables.items() + for name, var in ds._variables.items() if var.dims == (dim,) - # TODO: allow swapping a dimension to itself + # TODO: Avoid swapping a dimension to itself and name != dim ] options.extend( @@ -174,13 +175,15 @@ def swappable_dims(self): return options @rule(data=st.data()) + # TODO: Avoid swapping from Index to a MultiIndex level + # TODO: Avoid swapping from MultiIndex to a level of the same MultiIndex + # TODO: Avoid swapping when a MultiIndex is present + @precondition(lambda self: not bool(self.multi_indexed_dims)) @precondition(lambda self: bool(self.swappable_dims)) def swap_dims(self, data): ds = self.dataset options = self.swappable_dims dim, to = data.draw(st.sampled_from(options)) - # TODO: swapping from Index to a MultiIndex level - # TODO: swapping from MultiIndex to a level of the same MultiIndex note( f"> swapping {dim} to {to}, found swappable dims: {options}, all_dims: {tuple(self.dataset.dims)}" ) @@ -202,9 +205,9 @@ def swap_dims(self, data): @invariant() def assert_invariants(self): - note(f"> ===\n\n {self.dataset!r} \n===\n\n") + # note(f"> ===\n\n {self.dataset!r} \n===\n\n") _assert_internal_invariants(self.dataset, self.check_default_indexes) -DatasetStateMachine.TestCase.settings = settings(max_examples=200, deadline=None) +DatasetStateMachine.TestCase.settings = settings(max_examples=300, deadline=None) DatasetTest = DatasetStateMachine.TestCase From 6f0a016a843107f99053ad237d89b58669a7bb01 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:47:53 -0600 Subject: [PATCH 52/69] Revert "Add invariant check for #8646" This reverts commit 4a958dcf0fb7acab3d5123b3bd2d4138fb522f98. --- xarray/testing/assertions.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/xarray/testing/assertions.py b/xarray/testing/assertions.py index 0a89da3f70e..6418eb79b8b 100644 --- a/xarray/testing/assertions.py +++ b/xarray/testing/assertions.py @@ -268,10 +268,6 @@ def _assert_indexes_invariants_checks( } assert indexes.keys() <= index_vars, (set(indexes), index_vars) - for k, v in possible_coord_variables.items(): - if isinstance(v, IndexVariable): - assert k == v.name, (k, v.name) - # check pandas index wrappers vs. coordinate data adapters for k, index in indexes.items(): if isinstance(index, PandasIndex): From dd2e151b340b8b5e2fe975f8339d259a932f71a4 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:45:29 -0600 Subject: [PATCH 53/69] Add drop_indexes --- properties/test_index_manipulation.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 547d7557edf..e8d7f7bcb01 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -157,6 +157,23 @@ def drop_dims(self, data): elif dim in self.multi_indexed_dims: del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + @precondition(lambda self: bool(self.indexed_dims)) + @rule(data=st.data()) + def drop_indexes(self, data): + self.check_default_indexes = False + + dims = data.draw( + st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) + ) + note(f"> dropping {dims}") + self.dataset = self.dataset.drop_indexes(dims) + + for dim in dims: + if dim in self.indexed_dims: + del self.indexed_dims[self.indexed_dims.index(dim)] + elif dim in self.multi_indexed_dims: + del self.multi_indexed_dims[self.multi_indexed_dims.index(dim)] + @property def swappable_dims(self): ds = self.dataset From a5a873b04ffc972f5c15829d17a440a8c0b0ab2b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 21:49:59 -0600 Subject: [PATCH 54/69] Add assign_coords --- properties/test_index_manipulation.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index e8d7f7bcb01..1127d63a0aa 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -64,6 +64,16 @@ def add_dim_coord(self, var): self.indexed_dims.append(name) + @rule(var=xrst.index_variables(dims=DIM_NAME)) + def assign_coords(self, var): + # https://github.com/HypothesisWorks/hypothesis/issues/3943 + assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) + + (name,) = var.dims + self.dataset = self.dataset.assign_coords({name: var}) + + self.indexed_dims.append(name) + @property def has_indexed_dims(self) -> bool: return bool(self.indexed_dims + self.multi_indexed_dims) From 87db79a9d58a8302e83cee91021c010668497a3a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 22:15:09 -0600 Subject: [PATCH 55/69] Fix max_period for pandas timedelta --- xarray/testing/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 5d90cfdb465..1cc216c4ad0 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -73,7 +73,7 @@ def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: | npst.unsigned_integer_dtypes(endianness="=", sizes=(32, 64)) | npst.floating_dtypes(endianness="=", sizes=(32, 64)) | npst.datetime64_dtypes(endianness="=") - | npst.timedelta64_dtypes(endianness="=") + | npst.timedelta64_dtypes(endianness="=", max_period="D") | npst.unicode_string_dtypes(endianness="=") ) From 4d35d56e684c84eafe8f5677fb5bfe10a69c5fc2 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 22:15:29 -0600 Subject: [PATCH 56/69] Add xfailed test --- properties/test_index_manipulation.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 1127d63a0aa..cfcdcedbc49 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -238,3 +238,12 @@ def assert_invariants(self): DatasetStateMachine.TestCase.settings = settings(max_examples=300, deadline=None) DatasetTest = DatasetStateMachine.TestCase + + +@pytest.mark.skip(reason="failure detected by hypothesis") +def test_unstack_object(): + import xarray as xr + + ds = xr.Dataset() + ds["0"] = np.array(["", "\x000"], dtype=object) + ds.stack({"1": ["0"]}).unstack() From d8e00d33f4ea28bd62dcd488ce32388b0c3a9b54 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 22:25:27 -0600 Subject: [PATCH 57/69] Add notes --- properties/test_index_manipulation.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index cfcdcedbc49..9248a9689ad 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -57,6 +57,7 @@ def add_dim_coord(self, var): assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) (name,) = var.dims + note(f"setting {name}") # dim coord self.dataset[name] = var # non-dim coord of same size; this allows renaming @@ -70,6 +71,7 @@ def assign_coords(self, var): assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) (name,) = var.dims + note(f"assign_coords: {name}") self.dataset = self.dataset.assign_coords({name: var}) self.indexed_dims.append(name) @@ -158,7 +160,7 @@ def drop_dims(self, data): unique=True, ) ) - note(f"> dropping {dims}") + note(f"> drop_dims: {dims}") self.dataset = self.dataset.drop_dims(dims) for dim in dims: @@ -175,7 +177,7 @@ def drop_indexes(self, data): dims = data.draw( st.lists(st.sampled_from(self.indexed_dims), min_size=1, unique=True) ) - note(f"> dropping {dims}") + note(f"> drop_indexes: {dims}") self.dataset = self.dataset.drop_indexes(dims) for dim in dims: From 06808a1ce87dbfa4dea2e11dae59d1eef5c7655d Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sat, 30 Mar 2024 22:25:37 -0600 Subject: [PATCH 58/69] Skip timedelta indexes --- properties/test_index_manipulation.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 9248a9689ad..84adbba2abe 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -51,7 +51,13 @@ def __init__(self): self.indexed_dims = [] self.multi_indexed_dims = [] - @rule(var=xrst.index_variables(dims=DIM_NAME)) + # TODO: stacking with a timedelta64 index and unstacking converts it to object + @rule( + var=xrst.index_variables( + dims=DIM_NAME, + dtype=xrst.pandas_index_dtypes().filter(lambda x: x.kind != "m"), + ) + ) def add_dim_coord(self, var): # https://github.com/HypothesisWorks/hypothesis/issues/3943 assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) From cf9e86a92d8e31144b9c6c7fe9d1dc054a4d42ea Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 31 Mar 2024 19:58:50 -0600 Subject: [PATCH 59/69] to_zarr --- properties/test_index_manipulation.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 84adbba2abe..d901b7aab3d 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -230,13 +230,16 @@ def swap_dims(self, data): # TODO: enable when we have serializable attrs only # @rule() # def roundtrip_zarr(self): - # if not has_zarr: - # return + # note("> roundtrip to zarr") + # from xarray.tests.test_backends import create_tmp_file, ON_WINDOWS + # import xarray as xr + # # if not has_zarr: + # # return # expected = self.dataset # with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as path: # self.dataset.to_zarr(path + ".zarr") # with xr.open_dataset(path + ".zarr", engine="zarr") as ds: - # assert_identical(expected, ds) + # xr.testing.assert_identical(expected, ds) @invariant() def assert_invariants(self): From e5333fa1c13ea20c2489441aa648516952a10759 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Sun, 31 Mar 2024 20:22:57 -0600 Subject: [PATCH 60/69] small tweaks --- properties/test_index_manipulation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index d901b7aab3d..3870e230136 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -63,7 +63,7 @@ def add_dim_coord(self, var): assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) (name,) = var.dims - note(f"setting {name}") + note(f"adding dimension coordinate {name}") # dim coord self.dataset[name] = var # non-dim coord of same size; this allows renaming @@ -210,6 +210,7 @@ def swappable_dims(self): return options @rule(data=st.data()) + # TODO: swap_dims is basically all broken if a multiindex is present # TODO: Avoid swapping from Index to a MultiIndex level # TODO: Avoid swapping from MultiIndex to a level of the same MultiIndex # TODO: Avoid swapping when a MultiIndex is present From b85f5e83e5d327b7f46a80d8e5519d3577f96d0e Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 17:39:25 -0600 Subject: [PATCH 61/69] Remove NaT assume --- properties/test_index_manipulation.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 3870e230136..5a9fd54c041 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -9,7 +9,7 @@ pytest.importorskip("hypothesis") import hypothesis.strategies as st -from hypothesis import assume, note, settings +from hypothesis import note, settings from hypothesis.stateful import ( RuleBasedStateMachine, invariant, @@ -59,9 +59,6 @@ def __init__(self): ) ) def add_dim_coord(self, var): - # https://github.com/HypothesisWorks/hypothesis/issues/3943 - assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) - (name,) = var.dims note(f"adding dimension coordinate {name}") # dim coord @@ -73,9 +70,6 @@ def add_dim_coord(self, var): @rule(var=xrst.index_variables(dims=DIM_NAME)) def assign_coords(self, var): - # https://github.com/HypothesisWorks/hypothesis/issues/3943 - assume(np.all(~np.isnat(var.data)) if var.dtype.kind in ["mM"] else True) - (name,) = var.dims note(f"assign_coords: {name}") self.dataset = self.dataset.assign_coords({name: var}) From 15e00ff14f8160fd34a0dde4f1e1006b48b3976b Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 17:59:01 -0600 Subject: [PATCH 62/69] Revert "[revert]" This reverts commit 6a38e271eed61ecb943f60d3aed0875d0437de7b. --- .github/workflows/ci.yaml | 41 ++++++++++++++++++++++----------------- pyproject.toml | 2 +- 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 771c27de5ca..459660e2bfa 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -42,25 +42,25 @@ jobs: strategy: fail-fast: false matrix: - os: ["ubuntu-latest"] + os: ["ubuntu-latest", "macos-latest", "windows-latest"] # Bookend python versions - python-version: ["3.12"] + python-version: ["3.9", "3.11", "3.12"] env: [""] - # include: - # # Minimum python version: - # - env: "bare-minimum" - # python-version: "3.9" - # os: ubuntu-latest - # - env: "min-all-deps" - # python-version: "3.9" - # os: ubuntu-latest - # # Latest python version: - # - env: "all-but-dask" - # python-version: "3.10" - # os: ubuntu-latest - # - env: "flaky" - # python-version: "3.10" - # os: ubuntu-latest + include: + # Minimum python version: + - env: "bare-minimum" + python-version: "3.9" + os: ubuntu-latest + - env: "min-all-deps" + python-version: "3.9" + os: ubuntu-latest + # Latest python version: + - env: "all-but-dask" + python-version: "3.10" + os: ubuntu-latest + - env: "flaky" + python-version: "3.10" + os: ubuntu-latest steps: - uses: actions/checkout@v4 with: @@ -136,7 +136,12 @@ jobs: save-always: true - name: Run tests - run: python -m pytest + run: python -m pytest -n 4 + --timeout 180 + --cov=xarray + --cov-report=xml + --junitxml=pytest.xml + $PYTEST_EXTRA_FLAGS - name: Upload test results if: always() diff --git a/pyproject.toml b/pyproject.toml index 995537525d7..d2a5c6b8748 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -298,7 +298,7 @@ markers = [ ] minversion = "7" python_files = "test_*.py" -testpaths = ["properties"] +testpaths = ["xarray/tests", "properties"] [tool.aliases] test = "pytest" From 082d9f686da8b3ca44265f2da79f89a36b2ab573 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 17:52:38 -0600 Subject: [PATCH 63/69] Add hypothesis workflow --- .github/workflows/ci-additional.yaml | 75 ++++++++++++++++++++++++++- properties/conftest.py | 21 ++++++++ properties/test_index_manipulation.py | 17 +++--- pyproject.toml | 1 + 4 files changed, 106 insertions(+), 8 deletions(-) diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 9aa3b17746f..41e9a36cbb7 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -35,14 +35,13 @@ jobs: runs-on: "ubuntu-latest" needs: detect-ci-trigger if: needs.detect-ci-trigger.outputs.triggered == 'false' + defaults: run: shell: bash -l {0} - env: CONDA_ENV_FILE: ci/requirements/environment.yml PYTHON_VERSION: "3.11" - steps: - uses: actions/checkout@v4 with: @@ -82,6 +81,78 @@ jobs: # [MHS, 01/25/2024] Skip datatree_ documentation remove after #8572 python -m pytest --doctest-modules xarray --ignore xarray/tests --ignore xarray/datatree_ -Werror + hypothesis: + name: Slow Hypothesis Tests + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: | + always() + && ( + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + || needs.detect-ci-trigger.outputs.triggered == 'true' + || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis') + ) + defaults: + run: + shell: bash -l {0} + + env: + CONDA_ENV_FILE: ci/requirements/environment.yml + PYTHON_VERSION: "3.12" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ${{env.CONDA_ENV_FILE}} + environment-name: xarray-tests + create-args: >- + python=${{env.PYTHON_VERSION}} + conda + cache-environment: true + cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Restore cached hypothesis directory + uses: actions/cache@v4 + with: + path: .hypothesis/ + key: cache-hypothesis + enableCrossOsArchive: true + save-always: true + - name: Run slow Hypothesis tests + if: success() + id: status + run: | + python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ + --report-log output-${{ matrix.python-version }}-log.jsonl + - name: Generate and publish the report + if: | + failure() + && steps.status.outcome == 'failure' + && github.event_name == 'schedule' + && github.repository_owner == 'pydata' + uses: xarray-contrib/issue-from-pytest-log@v1 + with: + log-path: output-${{ matrix.python-version }}-log.jsonl + issue-title: "Nightly Hypothesis tests failed" + issue-label: "topic-hypothesis" + mypy: name: Mypy runs-on: "ubuntu-latest" diff --git a/properties/conftest.py b/properties/conftest.py index 0a66d92ebc6..30e638161a1 100644 --- a/properties/conftest.py +++ b/properties/conftest.py @@ -1,3 +1,24 @@ +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--run-slow-hypothesis", + action="store_true", + default=False, + help="run slow hypothesis tests", + ) + + +def pytest_collection_modifyitems(config, items): + if config.getoption("--run-slow-hypothesis"): + return + skip_slow_hyp = pytest.mark.skip(reason="need --run-slow-hypothesis option to run") + for item in items: + if "slow_hypothesis" in item.keywords: + item.add_marker(skip_slow_hyp) + + try: from hypothesis import settings except ImportError: diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 5a9fd54c041..2e20c2b490f 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -7,6 +7,7 @@ from xarray.testing import _assert_internal_invariants pytest.importorskip("hypothesis") +pytestmark = pytest.mark.slow_hypothesis import hypothesis.strategies as st from hypothesis import note, settings @@ -52,12 +53,7 @@ def __init__(self): self.multi_indexed_dims = [] # TODO: stacking with a timedelta64 index and unstacking converts it to object - @rule( - var=xrst.index_variables( - dims=DIM_NAME, - dtype=xrst.pandas_index_dtypes().filter(lambda x: x.kind != "m"), - ) - ) + @rule(var=xrst.index_variables(dims=DIM_NAME, dtype=xrst.pandas_index_dtypes())) def add_dim_coord(self, var): (name,) = var.dims note(f"adding dimension coordinate {name}") @@ -253,3 +249,12 @@ def test_unstack_object(): ds = xr.Dataset() ds["0"] = np.array(["", "\x000"], dtype=object) ds.stack({"1": ["0"]}).unstack() + + +@pytest.mark.skip(reason="failure detected by hypothesis") +def test_unstack_timedelta_index(): + import xarray as xr + + ds = xr.Dataset() + ds["0"] = np.array([0, 1, 2, 3], dtype="timedelta64[ns]") + ds.stack({"1": ["0"]}).unstack() diff --git a/pyproject.toml b/pyproject.toml index d2a5c6b8748..532dc40e859 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -295,6 +295,7 @@ markers = [ "flaky: flaky tests", "network: tests requiring a network connection", "slow: slow tests", + "slow_hypothesis: slow hypothesis tests", ] minversion = "7" python_files = "test_*.py" From b9433b6d6211e6bb66fddce714d76ed29a0399e7 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 18:06:57 -0600 Subject: [PATCH 64/69] Swtich out --- .github/workflows/ci-additional.yaml | 72 ------------------- .github/workflows/hypothesis.yaml | 100 +++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 72 deletions(-) create mode 100644 .github/workflows/hypothesis.yaml diff --git a/.github/workflows/ci-additional.yaml b/.github/workflows/ci-additional.yaml index 41e9a36cbb7..2be128b72b0 100644 --- a/.github/workflows/ci-additional.yaml +++ b/.github/workflows/ci-additional.yaml @@ -81,78 +81,6 @@ jobs: # [MHS, 01/25/2024] Skip datatree_ documentation remove after #8572 python -m pytest --doctest-modules xarray --ignore xarray/tests --ignore xarray/datatree_ -Werror - hypothesis: - name: Slow Hypothesis Tests - runs-on: "ubuntu-latest" - needs: detect-ci-trigger - if: | - always() - && ( - (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') - || needs.detect-ci-trigger.outputs.triggered == 'true' - || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis') - ) - defaults: - run: - shell: bash -l {0} - - env: - CONDA_ENV_FILE: ci/requirements/environment.yml - PYTHON_VERSION: "3.12" - - steps: - - uses: actions/checkout@v4 - with: - fetch-depth: 0 # Fetch all history for all branches and tags. - - - name: set environment variables - run: | - echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV - - - name: Setup micromamba - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: ${{env.CONDA_ENV_FILE}} - environment-name: xarray-tests - create-args: >- - python=${{env.PYTHON_VERSION}} - conda - cache-environment: true - cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" - - - name: Install xarray - run: | - python -m pip install --no-deps -e . - - name: Version info - run: | - conda info -a - conda list - python xarray/util/print_versions.py - - name: Restore cached hypothesis directory - uses: actions/cache@v4 - with: - path: .hypothesis/ - key: cache-hypothesis - enableCrossOsArchive: true - save-always: true - - name: Run slow Hypothesis tests - if: success() - id: status - run: | - python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ - --report-log output-${{ matrix.python-version }}-log.jsonl - - name: Generate and publish the report - if: | - failure() - && steps.status.outcome == 'failure' - && github.event_name == 'schedule' - && github.repository_owner == 'pydata' - uses: xarray-contrib/issue-from-pytest-log@v1 - with: - log-path: output-${{ matrix.python-version }}-log.jsonl - issue-title: "Nightly Hypothesis tests failed" - issue-label: "topic-hypothesis" - mypy: name: Mypy runs-on: "ubuntu-latest" diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml new file mode 100644 index 00000000000..d53b6cca185 --- /dev/null +++ b/.github/workflows/hypothesis.yaml @@ -0,0 +1,100 @@ +name: Slow Hypothesis CI +on: + push: + branches: + - "main" + pull_request: + branches: + - "main" + types: [opened, reopened, synchronize, labeled] + workflow_dispatch: # allows you to trigger manually + +jobs: + detect-ci-trigger: + name: detect ci trigger + runs-on: ubuntu-latest + if: | + github.repository == 'pydata/xarray' + && (github.event_name == 'push' || github.event_name == 'pull_request') + outputs: + triggered: ${{ steps.detect-trigger.outputs.trigger-found }} + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 2 + - uses: xarray-contrib/ci-trigger@v1 + id: detect-trigger + with: + keyword: "[skip-ci]" + + hypothesis: + name: Slow Hypothesis Tests + runs-on: "ubuntu-latest" + needs: detect-ci-trigger + if: | + always() + && ( + (github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') + || needs.detect-ci-trigger.outputs.triggered == 'true' + || contains( github.event.pull_request.labels.*.name, 'run-slow-hypothesis') + ) + defaults: + run: + shell: bash -l {0} + + env: + CONDA_ENV_FILE: ci/requirements/environment.yml + PYTHON_VERSION: "3.12" + + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 # Fetch all history for all branches and tags. + + - name: set environment variables + run: | + echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV + + - name: Setup micromamba + uses: mamba-org/setup-micromamba@v1 + with: + environment-file: ${{env.CONDA_ENV_FILE}} + environment-name: xarray-tests + create-args: >- + python=${{env.PYTHON_VERSION}} + conda + cache-environment: true + cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" + + - name: Install xarray + run: | + python -m pip install --no-deps -e . + - name: Version info + run: | + conda info -a + conda list + python xarray/util/print_versions.py + - name: Restore cached hypothesis directory + uses: actions/cache@v4 + with: + path: .hypothesis/ + key: cache-hypothesis + enableCrossOsArchive: true + save-always: true + - name: Run slow Hypothesis tests + if: success() + id: status + run: | + python -m pytest --hypothesis-show-statistics --run-slow-hypothesis properties/*.py \ + --report-log output-${{ matrix.python-version }}-log.jsonl + - name: Generate and publish the report + if: | + failure() + && steps.status.outcome == 'failure' + && github.event_name == 'schedule' + && github.repository_owner == 'pydata' + uses: xarray-contrib/issue-from-pytest-log@v1 + with: + log-path: output-${{ matrix.python-version }}-log.jsonl + issue-title: "Nightly Hypothesis tests failed" + issue-label: "topic-hypothesis" From a216531c2f43ed8103ac9e73b3618f129e839b8a Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 18:15:09 -0600 Subject: [PATCH 65/69] fix --- .github/workflows/hypothesis.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/hypothesis.yaml b/.github/workflows/hypothesis.yaml index d53b6cca185..7e9459c6598 100644 --- a/.github/workflows/hypothesis.yaml +++ b/.github/workflows/hypothesis.yaml @@ -58,11 +58,11 @@ jobs: - name: Setup micromamba uses: mamba-org/setup-micromamba@v1 with: - environment-file: ${{env.CONDA_ENV_FILE}} + environment-file: ci/requirements/environment.yml environment-name: xarray-tests create-args: >- python=${{env.PYTHON_VERSION}} - conda + pytest-reportlog cache-environment: true cache-environment-key: "${{runner.os}}-${{runner.arch}}-py${{env.PYTHON_VERSION}}-${{env.TODAY}}-${{hashFiles(env.CONDA_ENV_FILE)}}" From d2af875cc7037267eb33261d93374597893a55f3 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 20:51:52 -0600 Subject: [PATCH 66/69] Use st.builds --- properties/test_index_manipulation.py | 17 ++++++++++-- xarray/testing/strategies.py | 40 ++++++++------------------- 2 files changed, 27 insertions(+), 30 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 2e20c2b490f..2bcd4163ca4 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -3,12 +3,14 @@ import numpy as np import pytest +import xarray as xr from xarray import Dataset from xarray.testing import _assert_internal_invariants pytest.importorskip("hypothesis") pytestmark = pytest.mark.slow_hypothesis +import hypothesis.extra.numpy as npst import hypothesis.strategies as st from hypothesis import note, settings from hypothesis.stateful import ( @@ -35,6 +37,17 @@ def unique(draw, strategy): # or stack to a dimension with a name that already exists in the Dataset. UNIQUE_NAME = unique(strategy=xrst.names()) DIM_NAME = xrst.dimension_names(name_strategy=UNIQUE_NAME, min_dims=1, max_dims=1) +index_variables = st.builds( + xr.Variable, + data=npst.arrays( + dtype=xrst.pandas_index_dtypes(), + shape=npst.array_shapes(min_dims=1, max_dims=1), + elements=dict(allow_nan=False, allow_infinity=False, allow_subnormal=False), + unique=True, + ), + dims=DIM_NAME, + attrs=xrst.attrs(), +) class DatasetStateMachine(RuleBasedStateMachine): @@ -53,7 +66,7 @@ def __init__(self): self.multi_indexed_dims = [] # TODO: stacking with a timedelta64 index and unstacking converts it to object - @rule(var=xrst.index_variables(dims=DIM_NAME, dtype=xrst.pandas_index_dtypes())) + @rule(var=index_variables) def add_dim_coord(self, var): (name,) = var.dims note(f"adding dimension coordinate {name}") @@ -64,7 +77,7 @@ def add_dim_coord(self, var): self.indexed_dims.append(name) - @rule(var=xrst.index_variables(dims=DIM_NAME)) + @rule(var=index_variables) def assign_coords(self, var): (name,) = var.dims note(f"assign_coords: {name}") diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 1cc216c4ad0..c97ed8b5dfa 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -9,7 +9,6 @@ ) from e import hypothesis.extra.numpy as npst -import hypothesis.extra.pandas as pdst import numpy as np from hypothesis.errors import InvalidArgument @@ -22,12 +21,12 @@ __all__ = [ "supported_dtypes", + "pandas_index_dtypes", "names", "dimension_names", "dimension_sizes", "attrs", "variables", - "index_variables", "unique_subset_of", ] @@ -61,19 +60,25 @@ def supported_dtypes() -> st.SearchStrategy[np.dtype]: | npst.unsigned_integer_dtypes() | npst.floating_dtypes() | npst.complex_number_dtypes() - | npst.datetime64_dtypes() - | npst.timedelta64_dtypes() - | npst.unicode_string_dtypes() + # | npst.datetime64_dtypes() + # | npst.timedelta64_dtypes() + # | npst.unicode_string_dtypes() ) def pandas_index_dtypes() -> st.SearchStrategy[np.dtype]: + """ + Dtypes supported by pandas indexes. + Restrict datetime64 and timedelta64 to ns frequency till Xarray relaxes that. + """ return ( npst.integer_dtypes(endianness="=", sizes=(32, 64)) | npst.unsigned_integer_dtypes(endianness="=", sizes=(32, 64)) | npst.floating_dtypes(endianness="=", sizes=(32, 64)) - | npst.datetime64_dtypes(endianness="=") - | npst.timedelta64_dtypes(endianness="=", max_period="D") + # TODO: unset max_period + | npst.datetime64_dtypes(endianness="=", max_period="ns") + # TODO: set max_period="D" + | npst.timedelta64_dtypes(endianness="=", max_period="ns") | npst.unicode_string_dtypes(endianness="=") ) @@ -378,27 +383,6 @@ def variables( return xr.Variable(dims=dim_names, data=_data, attrs=draw(attrs)) -@st.composite -def index_variables( - draw: st.DrawFn, - *, - dims: Union[ - st.SearchStrategy[Union[Sequence[Hashable], Mapping[Hashable, int]]], - None, - ] = None, - dtype: st.SearchStrategy[np.dtype] = pandas_index_dtypes(), - attrs: st.SearchStrategy[Mapping] = attrs(), -) -> xr.Variable: - elements = npst.from_dtype( - dtype=draw(dtype), allow_nan=False, allow_infinity=False, allow_subnormal=False - ) - index = draw(pdst.indexes(elements=elements, min_size=1)) - if dims is None: - dims = dimension_names(min_dims=1, max_dims=1) - _dims = draw(dims) - return xr.Variable(dims=_dims, data=index, attrs=draw(attrs)) - - @overload def unique_subset_of( objs: Sequence[Hashable], From 5aae9716b7fd194432f378fcbd0ea0fb772a533f Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 20:58:13 -0600 Subject: [PATCH 67/69] cleanup --- properties/test_index_manipulation.py | 14 -------------- xarray/testing/strategies.py | 6 +++++- 2 files changed, 5 insertions(+), 15 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index 2bcd4163ca4..c255a037a98 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -231,20 +231,6 @@ def swap_dims(self, data): del self.indexed_dims[self.indexed_dims.index(dim)] self.indexed_dims += [to] - # TODO: enable when we have serializable attrs only - # @rule() - # def roundtrip_zarr(self): - # note("> roundtrip to zarr") - # from xarray.tests.test_backends import create_tmp_file, ON_WINDOWS - # import xarray as xr - # # if not has_zarr: - # # return - # expected = self.dataset - # with create_tmp_file(allow_cleanup_failure=ON_WINDOWS) as path: - # self.dataset.to_zarr(path + ".zarr") - # with xr.open_dataset(path + ".zarr", engine="zarr") as ds: - # xr.testing.assert_identical(expected, ds) - @invariant() def assert_invariants(self): # note(f"> ===\n\n {self.dataset!r} \n===\n\n") diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index c97ed8b5dfa..6ad4ee08fdc 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -211,7 +211,11 @@ def attrs() -> st.SearchStrategy[Mapping[Hashable, Any]]: -------- :ref:`testing.hypothesis`_ """ - return st.dictionaries(_attr_keys, _attr_values) + return st.recursive( + st.dictionaries(_attr_keys, _attr_values), + lambda children: st.dictionaries(_attr_keys, children), + max_leaves=3, + ) @st.composite From 7d8b6fffa336b32197bfec1490ddc774f4a5f8bf Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Mon, 1 Apr 2024 21:08:33 -0600 Subject: [PATCH 68/69] Add initialize --- properties/test_index_manipulation.py | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/properties/test_index_manipulation.py b/properties/test_index_manipulation.py index c255a037a98..77b7fcbcd99 100644 --- a/properties/test_index_manipulation.py +++ b/properties/test_index_manipulation.py @@ -15,6 +15,7 @@ from hypothesis import note, settings from hypothesis.stateful import ( RuleBasedStateMachine, + initialize, invariant, precondition, rule, @@ -50,6 +51,14 @@ def unique(draw, strategy): ) +def add_dim_coord_and_data_var(ds, var): + (name,) = var.dims + # dim coord + ds[name] = var + # non-dim coord of same size; this allows renaming + ds[name + "_"] = var + + class DatasetStateMachine(RuleBasedStateMachine): # Can't use bundles because we'd need pre-conditions on consumes(bundle) # indexed_dims = Bundle("indexed_dims") @@ -65,15 +74,20 @@ def __init__(self): self.indexed_dims = [] self.multi_indexed_dims = [] + @initialize(var=index_variables) + def init_ds(self, var): + """Initialize the Dataset so that at least one rule will always fire.""" + (name,) = var.dims + add_dim_coord_and_data_var(self.dataset, var) + + self.indexed_dims.append(name) + # TODO: stacking with a timedelta64 index and unstacking converts it to object @rule(var=index_variables) def add_dim_coord(self, var): (name,) = var.dims note(f"adding dimension coordinate {name}") - # dim coord - self.dataset[name] = var - # non-dim coord of same size; this allows renaming - self.dataset[name + "_"] = var + add_dim_coord_and_data_var(self.dataset, var) self.indexed_dims.append(name) From 926bf54aaea070237e8f1df14e272fb7fd4871bb Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 3 Apr 2024 15:05:44 -0600 Subject: [PATCH 69/69] review feedback --- xarray/testing/strategies.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/testing/strategies.py b/xarray/testing/strategies.py index 6ad4ee08fdc..d2503dfd535 100644 --- a/xarray/testing/strategies.py +++ b/xarray/testing/strategies.py @@ -108,7 +108,7 @@ def names() -> st.SearchStrategy[str]: def dimension_names( *, - name_strategy=None, + name_strategy=names(), min_dims: int = 0, max_dims: int = 3, ) -> st.SearchStrategy[list[Hashable]]: @@ -127,10 +127,8 @@ def dimension_names( Maximum number of dimensions in generated list. """ - elements = names() if name_strategy is None else name_strategy - return st.lists( - elements=elements, + elements=name_strategy, min_size=min_dims, max_size=max_dims, unique=True,