diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 1b0f2f18efb..5c957dcb882 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -28,10 +28,13 @@ New Features By `Mathias Hauser `_. - Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. - - Xarray now defers to flox's `heuristics `_ to set default `method` for groupby problems. This only applies to ``flox>=0.9``. By `Deepak Cherian `_. +- All `quantile` methods (e.g. :py:meth:`DataArray.quantile`) now use `numbagg` + for the calculation of nanquantiles (i.e., `skipna=True`) if it is installed. + This is currently limited to the linear interpolation method (`method='linear'`). + (:issue:`7377`, :pull:`8684`) By `Marco Wolsza `_. Breaking changes ~~~~~~~~~~~~~~~~ diff --git a/xarray/core/nputils.py b/xarray/core/nputils.py index 0151d715f6f..84642d09f18 100644 --- a/xarray/core/nputils.py +++ b/xarray/core/nputils.py @@ -195,6 +195,14 @@ def f(values, axis=None, **kwargs): and values.dtype.kind in "uifc" # and values.dtype.isnative and (dtype is None or np.dtype(dtype) == values.dtype) + # numbagg.nanquantile only available after 0.8.0 and with linear method + and ( + name != "nanquantile" + or ( + pycompat.mod_version("numbagg") >= Version("0.8.0") + and kwargs.get("method", "linear") == "linear" + ) + ) ): import numbagg @@ -206,6 +214,9 @@ def f(values, axis=None, **kwargs): # to ddof=1 above. if pycompat.mod_version("numbagg") < Version("0.7.0"): kwargs.pop("ddof", None) + if name == "nanquantile": + kwargs["quantiles"] = kwargs.pop("q") + kwargs.pop("method", None) return nba_func(values, axis=axis, **kwargs) if ( _BOTTLENECK_AVAILABLE @@ -285,3 +296,4 @@ def least_squares(lhs, rhs, rcond=None, skipna=False): nancumprod = _create_method("nancumprod") nanargmin = _create_method("nanargmin") nanargmax = _create_method("nanargmax") +nanquantile = _create_method("nanquantile") diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a39981bb8fc..6b07fcd44a4 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1992,7 +1992,7 @@ def quantile( method = interpolation if skipna or (skipna is None and self.dtype.kind in "cfO"): - _quantile_func = np.nanquantile + _quantile_func = nputils.nanquantile else: _quantile_func = np.quantile diff --git a/xarray/tests/conftest.py b/xarray/tests/conftest.py index 9c10bd6d18a..7c30759e499 100644 --- a/xarray/tests/conftest.py +++ b/xarray/tests/conftest.py @@ -14,9 +14,11 @@ def backend(request): return request.param -@pytest.fixture(params=["numbagg", "bottleneck"]) +@pytest.fixture(params=["numbagg", "bottleneck", None]) def compute_backend(request): - if request.param == "bottleneck": + if request.param is None: + options = dict(use_bottleneck=False, use_numbagg=False) + elif request.param == "bottleneck": options = dict(use_bottleneck=True, use_numbagg=False) elif request.param == "numbagg": options = dict(use_bottleneck=False, use_numbagg=True) diff --git a/xarray/tests/test_dataarray.py b/xarray/tests/test_dataarray.py index 4b279745d16..38d57c393c2 100644 --- a/xarray/tests/test_dataarray.py +++ b/xarray/tests/test_dataarray.py @@ -2888,12 +2888,13 @@ def test_reduce_out(self) -> None: with pytest.raises(TypeError): orig.mean(out=np.ones(orig.shape)) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize("skipna", [True, False, None]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) @pytest.mark.parametrize( "axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]]) ) - def test_quantile(self, q, axis, dim, skipna) -> None: + def test_quantile(self, q, axis, dim, skipna, compute_backend) -> None: va = self.va.copy(deep=True) va[0, 0] = np.nan diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index cc6d583cdf6..d370d523757 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -5612,9 +5612,10 @@ def test_reduce_keepdims(self) -> None: ) assert_identical(expected, actual) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize("skipna", [True, False, None]) @pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]]) - def test_quantile(self, q, skipna) -> None: + def test_quantile(self, q, skipna, compute_backend) -> None: ds = create_test_data(seed=123) ds.var1.data[0, 0] = np.nan @@ -5635,8 +5636,9 @@ def test_quantile(self, q, skipna) -> None: assert "dim3" in ds_quantile.dims assert all(d not in ds_quantile.dims for d in dim) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize("skipna", [True, False]) - def test_quantile_skipna(self, skipna) -> None: + def test_quantile_skipna(self, skipna, compute_backend) -> None: q = 0.1 dim = "time" ds = Dataset({"a": ([dim], np.arange(0, 11))}) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 0bd454866c3..f2a036f02b7 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2014,6 +2014,7 @@ def test_squeeze(self, dim, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "func", ( @@ -2035,7 +2036,7 @@ def test_squeeze(self, dim, dtype): ), ids=repr, ) - def test_computation(self, func, dtype): + def test_computation(self, func, dtype, compute_backend): base_unit = unit_registry.m array = np.linspace(0, 5, 5 * 10).reshape(5, 10).astype(dtype) * base_unit variable = xr.Variable(("x", "y"), array) @@ -3767,6 +3768,7 @@ def test_differentiate_integrate(self, func, variant, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "variant", ( @@ -3787,7 +3789,7 @@ def test_differentiate_integrate(self, func, variant, dtype): ), ids=repr, ) - def test_computation(self, func, variant, dtype): + def test_computation(self, func, variant, dtype, compute_backend): unit = unit_registry.m variants = { @@ -3893,6 +3895,7 @@ def test_resample(self, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "variant", ( @@ -3913,7 +3916,7 @@ def test_resample(self, dtype): ), ids=repr, ) - def test_grouped_operations(self, func, variant, dtype): + def test_grouped_operations(self, func, variant, dtype, compute_backend): unit = unit_registry.m variants = { @@ -5250,6 +5253,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype): assert_units_equal(expected, actual) assert_equal(expected, actual) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "func", ( @@ -5272,7 +5276,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype): "coords", ), ) - def test_computation(self, func, variant, dtype): + def test_computation(self, func, variant, dtype, compute_backend): variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), @@ -5404,6 +5408,7 @@ def test_resample(self, variant, dtype): assert_units_equal(expected, actual) assert_equal(expected, actual) + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize( "func", ( @@ -5425,7 +5430,7 @@ def test_resample(self, variant, dtype): "coords", ), ) - def test_grouped_operations(self, func, variant, dtype): + def test_grouped_operations(self, func, variant, dtype, compute_backend): variants = { "data": ((unit_registry.degK, unit_registry.Pa), 1, 1), "dims": ((1, 1), unit_registry.m, 1), diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index fb083586415..2ce76c68103 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1842,13 +1842,15 @@ def test_quantile_chunked_dim_error(self): with pytest.raises(ValueError, match=r"consists of multiple chunks"): v.quantile(0.5, dim="x") + @pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True) @pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]]) - def test_quantile_out_of_bounds(self, q): + def test_quantile_out_of_bounds(self, q, compute_backend): v = Variable(["x", "y"], self.d) # escape special characters with pytest.raises( - ValueError, match=r"Quantiles must be in the range \[0, 1\]" + ValueError, + match=r"(Q|q)uantiles must be in the range \[0, 1\]", ): v.quantile(q, dim="x")