Skip to content

Commit

Permalink
Enable numbagg in calculation of quantiles (#8684)
Browse files Browse the repository at this point in the history
* Use `numbagg.nanquantile` by default when `method=linear` and `skipna=True`

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add `"None"` option to `compute_backend`

* skip tests when `compute_backend == "numbagg"`

* adjust regex pattern to include numbagg error message

* skip test if `compute_backend == "numbagg"` and `q == -0.1`

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* test quantile method w/o numbagg backend

* change `compute_backend` param `"None"` to `None`

* add numbagg `minversion` requirement in `quantile` method

* align `test_quantile_out_of_bounds` with numbagg>=0.7.2

* avoid using numbagg on pint arrays; remove exclusion from tests

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* move numbagg nanquantiles logic to `nputils`-module

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* fix logic related to numbagg `nanquantiles`

* fix logic related to numbagg `nanquantiles`

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* add `whats-new` entry

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
maawoo and pre-commit-ci[bot] authored Feb 7, 2024
1 parent 0f7a034 commit 0eb6658
Show file tree
Hide file tree
Showing 8 changed files with 41 additions and 14 deletions.
5 changes: 4 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,13 @@ New Features
By `Mathias Hauser <https://github.com/mathause>`_.
- Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to`
(:pull:`8380`) By `Anderson Banihirwe <https://github.com/andersy005>`_.

- Xarray now defers to flox's `heuristics <https://flox.readthedocs.io/en/latest/implementation.html#heuristics>`_
to set default `method` for groupby problems. This only applies to ``flox>=0.9``.
By `Deepak Cherian <https://github.com/dcherian>`_.
- All `quantile` methods (e.g. :py:meth:`DataArray.quantile`) now use `numbagg`
for the calculation of nanquantiles (i.e., `skipna=True`) if it is installed.
This is currently limited to the linear interpolation method (`method='linear'`).
(:issue:`7377`, :pull:`8684`) By `Marco Wolsza <https://github.com/maawoo>`_.

Breaking changes
~~~~~~~~~~~~~~~~
Expand Down
12 changes: 12 additions & 0 deletions xarray/core/nputils.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,14 @@ def f(values, axis=None, **kwargs):
and values.dtype.kind in "uifc"
# and values.dtype.isnative
and (dtype is None or np.dtype(dtype) == values.dtype)
# numbagg.nanquantile only available after 0.8.0 and with linear method
and (
name != "nanquantile"
or (
pycompat.mod_version("numbagg") >= Version("0.8.0")
and kwargs.get("method", "linear") == "linear"
)
)
):
import numbagg

Expand All @@ -206,6 +214,9 @@ def f(values, axis=None, **kwargs):
# to ddof=1 above.
if pycompat.mod_version("numbagg") < Version("0.7.0"):
kwargs.pop("ddof", None)
if name == "nanquantile":
kwargs["quantiles"] = kwargs.pop("q")
kwargs.pop("method", None)
return nba_func(values, axis=axis, **kwargs)
if (
_BOTTLENECK_AVAILABLE
Expand Down Expand Up @@ -285,3 +296,4 @@ def least_squares(lhs, rhs, rcond=None, skipna=False):
nancumprod = _create_method("nancumprod")
nanargmin = _create_method("nanargmin")
nanargmax = _create_method("nanargmax")
nanquantile = _create_method("nanquantile")
2 changes: 1 addition & 1 deletion xarray/core/variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1992,7 +1992,7 @@ def quantile(
method = interpolation

if skipna or (skipna is None and self.dtype.kind in "cfO"):
_quantile_func = np.nanquantile
_quantile_func = nputils.nanquantile
else:
_quantile_func = np.quantile

Expand Down
6 changes: 4 additions & 2 deletions xarray/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ def backend(request):
return request.param


@pytest.fixture(params=["numbagg", "bottleneck"])
@pytest.fixture(params=["numbagg", "bottleneck", None])
def compute_backend(request):
if request.param == "bottleneck":
if request.param is None:
options = dict(use_bottleneck=False, use_numbagg=False)
elif request.param == "bottleneck":
options = dict(use_bottleneck=True, use_numbagg=False)
elif request.param == "numbagg":
options = dict(use_bottleneck=False, use_numbagg=True)
Expand Down
3 changes: 2 additions & 1 deletion xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -2888,12 +2888,13 @@ def test_reduce_out(self) -> None:
with pytest.raises(TypeError):
orig.mean(out=np.ones(orig.shape))

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
@pytest.mark.parametrize(
"axis, dim", zip([None, 0, [0], [0, 1]], [None, "x", ["x"], ["x", "y"]])
)
def test_quantile(self, q, axis, dim, skipna) -> None:
def test_quantile(self, q, axis, dim, skipna, compute_backend) -> None:
va = self.va.copy(deep=True)
va[0, 0] = np.nan

Expand Down
6 changes: 4 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -5612,9 +5612,10 @@ def test_reduce_keepdims(self) -> None:
)
assert_identical(expected, actual)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("skipna", [True, False, None])
@pytest.mark.parametrize("q", [0.25, [0.50], [0.25, 0.75]])
def test_quantile(self, q, skipna) -> None:
def test_quantile(self, q, skipna, compute_backend) -> None:
ds = create_test_data(seed=123)
ds.var1.data[0, 0] = np.nan

Expand All @@ -5635,8 +5636,9 @@ def test_quantile(self, q, skipna) -> None:
assert "dim3" in ds_quantile.dims
assert all(d not in ds_quantile.dims for d in dim)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("skipna", [True, False])
def test_quantile_skipna(self, skipna) -> None:
def test_quantile_skipna(self, skipna, compute_backend) -> None:
q = 0.1
dim = "time"
ds = Dataset({"a": ([dim], np.arange(0, 11))})
Expand Down
15 changes: 10 additions & 5 deletions xarray/tests/test_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -2014,6 +2014,7 @@ def test_squeeze(self, dim, dtype):
assert_units_equal(expected, actual)
assert_identical(expected, actual)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize(
"func",
(
Expand All @@ -2035,7 +2036,7 @@ def test_squeeze(self, dim, dtype):
),
ids=repr,
)
def test_computation(self, func, dtype):
def test_computation(self, func, dtype, compute_backend):
base_unit = unit_registry.m
array = np.linspace(0, 5, 5 * 10).reshape(5, 10).astype(dtype) * base_unit
variable = xr.Variable(("x", "y"), array)
Expand Down Expand Up @@ -3767,6 +3768,7 @@ def test_differentiate_integrate(self, func, variant, dtype):
assert_units_equal(expected, actual)
assert_identical(expected, actual)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize(
"variant",
(
Expand All @@ -3787,7 +3789,7 @@ def test_differentiate_integrate(self, func, variant, dtype):
),
ids=repr,
)
def test_computation(self, func, variant, dtype):
def test_computation(self, func, variant, dtype, compute_backend):
unit = unit_registry.m

variants = {
Expand Down Expand Up @@ -3893,6 +3895,7 @@ def test_resample(self, dtype):
assert_units_equal(expected, actual)
assert_identical(expected, actual)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize(
"variant",
(
Expand All @@ -3913,7 +3916,7 @@ def test_resample(self, dtype):
),
ids=repr,
)
def test_grouped_operations(self, func, variant, dtype):
def test_grouped_operations(self, func, variant, dtype, compute_backend):
unit = unit_registry.m

variants = {
Expand Down Expand Up @@ -5250,6 +5253,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
assert_units_equal(expected, actual)
assert_equal(expected, actual)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize(
"func",
(
Expand All @@ -5272,7 +5276,7 @@ def test_interp_reindex_like_indexing(self, func, unit, error, dtype):
"coords",
),
)
def test_computation(self, func, variant, dtype):
def test_computation(self, func, variant, dtype, compute_backend):
variants = {
"data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
"dims": ((1, 1), unit_registry.m, 1),
Expand Down Expand Up @@ -5404,6 +5408,7 @@ def test_resample(self, variant, dtype):
assert_units_equal(expected, actual)
assert_equal(expected, actual)

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize(
"func",
(
Expand All @@ -5425,7 +5430,7 @@ def test_resample(self, variant, dtype):
"coords",
),
)
def test_grouped_operations(self, func, variant, dtype):
def test_grouped_operations(self, func, variant, dtype, compute_backend):
variants = {
"data": ((unit_registry.degK, unit_registry.Pa), 1, 1),
"dims": ((1, 1), unit_registry.m, 1),
Expand Down
6 changes: 4 additions & 2 deletions xarray/tests/test_variable.py
Original file line number Diff line number Diff line change
Expand Up @@ -1842,13 +1842,15 @@ def test_quantile_chunked_dim_error(self):
with pytest.raises(ValueError, match=r"consists of multiple chunks"):
v.quantile(0.5, dim="x")

@pytest.mark.parametrize("compute_backend", ["numbagg", None], indirect=True)
@pytest.mark.parametrize("q", [-0.1, 1.1, [2], [0.25, 2]])
def test_quantile_out_of_bounds(self, q):
def test_quantile_out_of_bounds(self, q, compute_backend):
v = Variable(["x", "y"], self.d)

# escape special characters
with pytest.raises(
ValueError, match=r"Quantiles must be in the range \[0, 1\]"
ValueError,
match=r"(Q|q)uantiles must be in the range \[0, 1\]",
):
v.quantile(q, dim="x")

Expand Down

0 comments on commit 0eb6658

Please sign in to comment.