Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ddof vs correction kwargs in std/var #8573

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion xarray/core/_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -1961,7 +1961,7 @@ def var(
dim: Dims = None,
*,
skipna: bool | None = None,
ddof: int = 0,
ddof: int | None = None,
keep_attrs: bool | None = None,
**kwargs: Any,
) -> Self:
Expand Down
41 changes: 41 additions & 0 deletions xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import warnings
from functools import partial
from importlib import import_module
from typing import Any

import numpy as np
import pandas as pd
Expand All @@ -37,6 +38,7 @@
from xarray.core.options import OPTIONS
from xarray.core.parallelcompat import get_chunked_array_type, is_chunked_array
from xarray.core.pycompat import array_type, is_duck_dask_array
from xarray.core.types import T_DuckArray
from xarray.core.utils import is_duck_array, module_available

dask_available = module_available("dask")
Expand Down Expand Up @@ -387,6 +389,9 @@ def f(values, axis=None, skipna=None, **kwargs):
if coerce_strings and values.dtype.kind in "SU":
values = astype(values, object)

if name in ["std", "var"]:
kwargs = _handle_ddof_vs_correction_kwargs(kwargs, values)

func = None
if skipna or (skipna is None and values.dtype.kind in "cfO"):
nanname = "nan" + name
Expand Down Expand Up @@ -417,6 +422,42 @@ def f(values, axis=None, skipna=None, **kwargs):
return f


def _handle_ddof_vs_correction_kwargs(
kwargs: dict[str, Any], values: T_DuckArray
) -> dict[str, Any]:
# handle ddof vs correction kwargs, see GH8566

ddof = kwargs.pop("ddof", None)
correction = kwargs.pop("correction", None)

if ddof is None and correction is None:
degrees_of_freedom_correction_val = 0 # default to 0, see GH8566
elif ddof is not None and correction is not None:
if ddof != correction:
raise ValueError(
"ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. "
f"Got ddof={ddof} but correction={correction}"
)
else:
# both kwargs were passed, but they are the same value
warnings.warn(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we be using emit_user_level_warning?

"ddof and correction both refer to the same thing - you don't need to pass them both"
)
Comment on lines +537 to +539
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
warnings.warn(
"ddof and correction both refer to the same thing - you don't need to pass them both"
)
emit_user_level_warning(
"ddof and correction both refer to the same thing - you don't need to pass them both"
)

degrees_of_freedom_correction_val = ddof
else:
# only one of the two kwargs passed
degrees_of_freedom_correction_val = ddof if ddof is not None else correction

# assume that only array-API-compliant libraries will implement correction over ddof
# reasonable since array-API people made this name change, see https://github.com/data-apis/array-api/issues/695
if hasattr(values, "__array_namespace__"):
kwargs["correction"] = degrees_of_freedom_correction_val
else:
kwargs["ddof"] = degrees_of_freedom_correction_val

return kwargs


# Attributes `numeric_only`, `available_min_count` is used for docs.
# See ops.inject_reduce_methods
argmax = _create_nan_agg_method("argmax", coerce_strings=True)
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def nanvar(a, axis=None, dtype=None, out=None, ddof=0):
return nputils.nanvar(a, axis=axis, dtype=dtype, ddof=ddof)


def nanstd(a, axis=None, dtype=None, out=None, ddof=0):
def nanstd(a, axis=None, dtype=None, out=None, ddof=None):
return nputils.nanstd(a, axis=axis, dtype=dtype, ddof=ddof)


Expand Down
2 changes: 1 addition & 1 deletion xarray/namedarray/_aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ def std(
dim: Dims = None,
*,
skipna: bool | None = None,
ddof: int = 0,
ddof: int | None = None,
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know that I'm supposed to regenerate these files instead of changing them directly, but I don't understand why when I change this directly ddof=0 still gets passed down explicitly.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can't reproduce that. What code do you use to see that behavior? However, if you tried Dataset.std, then the reason is that you didn't update the defaults on DatasetAggregations, yet.

**kwargs: Any,
) -> Self:
"""
Expand Down
42 changes: 42 additions & 0 deletions xarray/tests/test_array_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,3 +122,45 @@
actual = xr.where(xp_arr, 1, 0)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)


def test_statistics(arrays) -> None:
with xr.set_options(use_bottleneck=False):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Haven't tried to make this work with bottleneck / numbagg yet, so turning off bottleneck temporarily for simplicity.

np_arr_nans, xp_arr_nans = arrays

# TODO this is not really a test of the array API compatibility specifically, just of the ddof vs correction kwarg handling
expected = np_arr_nans.std(ddof=1)
actual = np_arr_nans.std(correction=1)

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9 bare-minimum

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 all-but-dask

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9 min-all-deps

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 flaky

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.11

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1

Check failure on line 133 in xarray/tests/test_array_api.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.9

test_statistics ValueError: ddof and correction both refer to the same thing, and so cannot be meaningfully set to different values. Got ddof=0 but correction=1
assert_equal(actual, expected)

# test the default value
expected = np_arr_nans.std()
actual = np_arr_nans.std(correction=0)
assert_equal(actual, expected)

# TODO have to remove the NaNs in the example data because array API standard can't handle them yet, see https://github.com/pydata/xarray/pull/7424#issuecomment-1373979208
np_arr, xp_arr = np_arr_nans.fillna(0), xp_arr_nans.fillna(0)

expected = np_arr.std()
actual = xp_arr.std(skipna=False)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)

expected = np_arr.std(ddof=1)
actual = xp_arr.std(skipna=False, ddof=1)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)

expected = np_arr.std(ddof=1)
actual = xp_arr.std(skipna=False, correction=1)
assert isinstance(actual.data, Array)
assert_equal(actual, expected)

# TODO check this warns
expected = np_arr.std(ddof=1)
actual = xp_arr.std(skipna=False, ddof=1, correction=1)
Copy link
Collaborator

@keewis keewis Dec 30, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
actual = xp_arr.std(skipna=False, ddof=1, correction=1)
with pytest.warns(UserWarning, match="ddof and correction both refer to the same thing"):
actual = xp_arr.std(skipna=False, ddof=1, correction=1)

assert isinstance(actual.data, Array)
assert_equal(actual, expected)

with pytest.raises(ValueError):
xp_arr.std(skipna=False, ddof=1, correction=0)
Loading