-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ddof vs correction kwargs in std/var #8573
base: main
Are you sure you want to change the base?
Changes from 3 commits
d65a4bf
b4059c2
d3aedd4
d1b8f72
1ef5f42
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -11,6 +11,7 @@ | |||||||||||||
import warnings | ||||||||||||||
from functools import partial | ||||||||||||||
from importlib import import_module | ||||||||||||||
from typing import Any | ||||||||||||||
|
||||||||||||||
import numpy as np | ||||||||||||||
import pandas as pd | ||||||||||||||
|
@@ -37,6 +38,7 @@ | |||||||||||||
from xarray.core.options import OPTIONS | ||||||||||||||
from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array | ||||||||||||||
from xarray.core.pycompat import array_type, is_duck_dask_array | ||||||||||||||
from xarray.core.types import T_DuckArray | ||||||||||||||
from xarray.core.utils import is_duck_array, module_available | ||||||||||||||
|
||||||||||||||
dask_available = module_available("dask") | ||||||||||||||
|
@@ -387,6 +389,9 @@ def f(values, axis=None, skipna=None, **kwargs): | |||||||||||||
if coerce_strings and values.dtype.kind in "SU": | ||||||||||||||
values = astype(values, object) | ||||||||||||||
|
||||||||||||||
if name in ["std", "var"]: | ||||||||||||||
kwargs = _handle_ddof_vs_correction_kwargs(kwargs, values) | ||||||||||||||
|
||||||||||||||
func = None | ||||||||||||||
if skipna or (skipna is None and values.dtype.kind in "cfO"): | ||||||||||||||
nanname = "nan" + name | ||||||||||||||
|
@@ -417,6 +422,42 @@ def f(values, axis=None, skipna=None, **kwargs): | |||||||||||||
return f | ||||||||||||||
|
||||||||||||||
|
||||||||||||||
def _handle_ddof_vs_correction_kwargs( | ||||||||||||||
kwargs: dict[str, Any], values: T_DuckArray | ||||||||||||||
) -> dict[str, Any]: | ||||||||||||||
# handle ddof vs correction kwargs, see GH8566 | ||||||||||||||
|
||||||||||||||
ddof = kwargs.pop("ddof", None) | ||||||||||||||
correction = kwargs.pop("correction", None) | ||||||||||||||
|
||||||||||||||
if ddof is None and correction is None: | ||||||||||||||
degrees_of_freedom_correction_val = 0 # default to 0, see GH8566 | ||||||||||||||
elif ddof is not None and correction is not None: | ||||||||||||||
if ddof != correction: | ||||||||||||||
raise ValueError( | ||||||||||||||
"ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. " | ||||||||||||||
f"Got ddof={ddof} but correction={correction}" | ||||||||||||||
) | ||||||||||||||
else: | ||||||||||||||
# both kwargs were passed, but they are the same value | ||||||||||||||
warnings.warn( | ||||||||||||||
"ddof and correction both refer to the same thing - you don't need to pass them both" | ||||||||||||||
) | ||||||||||||||
Comment on lines
+537
to
+539
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||
degrees_of_freedom_correction_val = ddof | ||||||||||||||
else: | ||||||||||||||
# only one of the two kwargs passed | ||||||||||||||
degrees_of_freedom_correction_val = ddof if ddof is not None else correction | ||||||||||||||
|
||||||||||||||
# assume that only array-API-compliant libraries will implement correction over ddof | ||||||||||||||
# reasonable since array-API people made this name change, see https://github.com/data-apis/array-api/issues/695 | ||||||||||||||
if hasattr(values, "__array_namespace__"): | ||||||||||||||
kwargs["correction"] = degrees_of_freedom_correction_val | ||||||||||||||
else: | ||||||||||||||
kwargs["ddof"] = degrees_of_freedom_correction_val | ||||||||||||||
|
||||||||||||||
return kwargs | ||||||||||||||
|
||||||||||||||
|
||||||||||||||
# Attributes `numeric_only`, `available_min_count` is used for docs. | ||||||||||||||
# See ops.inject_reduce_methods | ||||||||||||||
argmax = _create_nan_agg_method("argmax", coerce_strings=True) | ||||||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -571,7 +571,7 @@ def std( | |
dim: Dims = None, | ||
*, | ||
skipna: bool | None = None, | ||
ddof: int = 0, | ||
ddof: int | None = None, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know that I'm supposed to regenerate these files instead of changing them directly, but I don't understand why when I change this directly There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I can't reproduce that. What code do you use to see that behavior? However, if you tried |
||
**kwargs: Any, | ||
) -> Self: | ||
""" | ||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -122,3 +122,45 @@ | |||||||
actual = xr.where(xp_arr, 1, 0) | ||||||||
assert isinstance(actual.data, Array) | ||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
|
||||||||
def test_statistics(arrays) -> None: | ||||||||
with xr.set_options(use_bottleneck=False): | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Haven't tried to make this work with bottleneck / numbagg yet, so turning off bottleneck temporarily for simplicity. |
||||||||
np_arr_nans, xp_arr_nans = arrays | ||||||||
|
||||||||
# TODO this is not really a test of the array API compatibility specifically, just of the ddof vs correction kwarg handling | ||||||||
expected = np_arr_nans.std(ddof=1) | ||||||||
actual = np_arr_nans.std(correction=1) | ||||||||
Check failure on line 133 in xarray/tests/test_array_api.py
|
||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
# test the default value | ||||||||
expected = np_arr_nans.std() | ||||||||
actual = np_arr_nans.std(correction=0) | ||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
# TODO have to remove the NaNs in the example data because array API standard can't handle them yet, see https://github.com/pydata/xarray/pull/7424#issuecomment-1373979208 | ||||||||
np_arr, xp_arr = np_arr_nans.fillna(0), xp_arr_nans.fillna(0) | ||||||||
|
||||||||
expected = np_arr.std() | ||||||||
actual = xp_arr.std(skipna=False) | ||||||||
assert isinstance(actual.data, Array) | ||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
expected = np_arr.std(ddof=1) | ||||||||
actual = xp_arr.std(skipna=False, ddof=1) | ||||||||
assert isinstance(actual.data, Array) | ||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
expected = np_arr.std(ddof=1) | ||||||||
actual = xp_arr.std(skipna=False, correction=1) | ||||||||
assert isinstance(actual.data, Array) | ||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
# TODO check this warns | ||||||||
expected = np_arr.std(ddof=1) | ||||||||
actual = xp_arr.std(skipna=False, ddof=1, correction=1) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
assert isinstance(actual.data, Array) | ||||||||
assert_equal(actual, expected) | ||||||||
|
||||||||
with pytest.raises(ValueError): | ||||||||
xp_arr.std(skipna=False, ddof=1, correction=0) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should we be using
emit_user_level_warning
?