Skip to content

Commit

Permalink
Merge branch 'main' into map-blocks-indexes-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
dcherian authored Jan 3, 2024
2 parents 7dd26bb + 41d33f5 commit 3989c08
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 34 deletions.
9 changes: 4 additions & 5 deletions .github/workflows/ci-additional.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,10 @@ jobs:
# Raise an error if there are warnings in the doctests, with `-Werror`.
# This is a trial; if it presents an problem, feel free to remove.
# See https://github.com/pydata/xarray/issues/7164 for more info.
# ignores:
# 1. h5py: see https://github.com/pydata/xarray/issues/8537
python -m pytest --doctest-modules xarray --ignore xarray/tests -Werror \
-W "ignore:h5py is running against HDF5 1.14.3:UserWarning"
#
# If dependencies emit warnings we can't do anything about, add ignores to
# `xarray/tests/__init__.py`.
python -m pytest --doctest-modules xarray --ignore xarray/tests -Werror
mypy:
name: Mypy
Expand Down
10 changes: 5 additions & 5 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,24 +18,24 @@ repos:
files: ^xarray/
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.1.6'
rev: 'v0.1.9'
hooks:
- id: ruff
args: ["--fix"]
# https://github.com/python/black#version-control-integration
- repo: https://github.com/psf/black
rev: 23.11.0
- repo: https://github.com/psf/black-pre-commit-mirror
rev: 23.12.1
hooks:
- id: black-jupyter
- repo: https://github.com/keewis/blackdoc
rev: v0.3.9
hooks:
- id: blackdoc
exclude: "generate_aggregations.py"
additional_dependencies: ["black==23.11.0"]
additional_dependencies: ["black==23.12.1"]
- id: blackdoc-autoupdate-black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.7.1
rev: v1.8.0
hooks:
- id: mypy
# Copied from setup.cfg
Expand Down
7 changes: 7 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,10 @@ New Features

- :py:meth:`xr.cov` and :py:meth:`xr.corr` now support using weights (:issue:`8527`, :pull:`7392`).
By `Llorenç Lledó <https://github.com/lluritu>`_.
- Accept the compression arguments new in netCDF 1.6.0 in the netCDF4 backend.
See `netCDF4 documentation <https://unidata.github.io/netcdf4-python/#efficient-compression-of-netcdf-variables>`_ for details.
By `Markel García-Díez <https://github.com/markelg>`_. (:issue:`6929`, :pull:`7551`) Note that some
new compression filters needs plugins to be installed which may not be available in all netCDF distributions.

Breaking changes
~~~~~~~~~~~~~~~~
Expand All @@ -38,6 +42,9 @@ Deprecations
Bug fixes
~~~~~~~~~

- Reverse index output of bottleneck's rolling move_argmax/move_argmin functions (:issue:`8541`, :pull:`8552`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.


Documentation
~~~~~~~~~~~~~
Expand Down
25 changes: 17 additions & 8 deletions xarray/backends/netCDF4_.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,12 @@ def _extract_nc4_variable_encoding(
"_FillValue",
"dtype",
"compression",
"significant_digits",
"quantize_mode",
"blosc_shuffle",
"szip_coding",
"szip_pixels_per_block",
"endian",
}
if lsd_okay:
valid_encodings.add("least_significant_digit")
Expand Down Expand Up @@ -497,20 +503,23 @@ def prepare_variable(
if name in self.ds.variables:
nc4_var = self.ds.variables[name]
else:
nc4_var = self.ds.createVariable(
default_args = dict(
varname=name,
datatype=datatype,
dimensions=variable.dims,
zlib=encoding.get("zlib", False),
complevel=encoding.get("complevel", 4),
shuffle=encoding.get("shuffle", True),
fletcher32=encoding.get("fletcher32", False),
contiguous=encoding.get("contiguous", False),
chunksizes=encoding.get("chunksizes"),
zlib=False,
complevel=4,
shuffle=True,
fletcher32=False,
contiguous=False,
chunksizes=None,
endian="native",
least_significant_digit=encoding.get("least_significant_digit"),
least_significant_digit=None,
fill_value=fill_value,
)
default_args.update(encoding)
default_args.pop("_FillValue", None)
nc4_var = self.ds.createVariable(**default_args)

nc4_var.setncatts(attrs)

Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
try:
from dask.delayed import Delayed
except ImportError:
Delayed = None # type: ignore
Delayed = None # type: ignore[misc,assignment]
try:
from iris.cube import Cube as iris_Cube
except ImportError:
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@
try:
from dask.delayed import Delayed
except ImportError:
Delayed = None # type: ignore
Delayed = None # type: ignore[misc,assignment]
try:
from dask.dataframe import DataFrame as DaskDataFrame
except ImportError:
Expand Down
5 changes: 5 additions & 0 deletions xarray/core/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -596,6 +596,11 @@ def _bottleneck_reduce(self, func, keep_attrs, **kwargs):
values = func(
padded.data, window=self.window[0], min_count=min_count, axis=axis
)
# index 0 is at the rightmost edge of the window
# need to reverse index here
# see GH #8541
if func in [bottleneck.move_argmin, bottleneck.move_argmax]:
values = self.window[0] - 1 - values

if self.center[0]:
values = values[valid]
Expand Down
52 changes: 42 additions & 10 deletions xarray/tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import importlib
import platform
import string
import warnings
from contextlib import contextmanager, nullcontext
from unittest import mock # noqa: F401
Expand Down Expand Up @@ -70,10 +71,17 @@ def _importorskip(
message="'cgi' is deprecated and slated for removal in Python 3.13",
category=DeprecationWarning,
)

has_pydap, requires_pydap = _importorskip("pydap.client")
has_netCDF4, requires_netCDF4 = _importorskip("netCDF4")
has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf")
with warnings.catch_warnings():
# see https://github.com/pydata/xarray/issues/8537
warnings.filterwarnings(
"ignore",
message="h5py is running against HDF5 1.14.3",
category=UserWarning,
)

has_h5netcdf, requires_h5netcdf = _importorskip("h5netcdf")
has_pynio, requires_pynio = _importorskip("Nio")
has_cftime, requires_cftime = _importorskip("cftime")
has_dask, requires_dask = _importorskip("dask")
Expand All @@ -83,7 +91,14 @@ def _importorskip(
has_fsspec, requires_fsspec = _importorskip("fsspec")
has_iris, requires_iris = _importorskip("iris")
has_numbagg, requires_numbagg = _importorskip("numbagg", "0.4.0")
has_seaborn, requires_seaborn = _importorskip("seaborn")
with warnings.catch_warnings():
warnings.filterwarnings(
"ignore",
message="is_categorical_dtype is deprecated and will be removed in a future version.",
category=DeprecationWarning,
)
# seaborn uses the deprecated `pandas.is_categorical_dtype`
has_seaborn, requires_seaborn = _importorskip("seaborn")
has_sparse, requires_sparse = _importorskip("sparse")
has_cupy, requires_cupy = _importorskip("cupy")
has_cartopy, requires_cartopy = _importorskip("cartopy")
Expand Down Expand Up @@ -112,6 +127,10 @@ def _importorskip(
not has_h5netcdf_ros3[0], reason="requires h5netcdf 1.3.0"
)

has_netCDF4_1_6_2_or_above, requires_netCDF4_1_6_2_or_above = _importorskip(
"netCDF4", "1.6.2"
)

# change some global options for tests
set_options(warn_for_unclosed_files=True)

Expand Down Expand Up @@ -262,28 +281,41 @@ def assert_allclose(a, b, check_default_indexes=True, **kwargs):
xarray.testing._assert_internal_invariants(b, check_default_indexes)


def create_test_data(seed: int | None = None, add_attrs: bool = True) -> Dataset:
_DEFAULT_TEST_DIM_SIZES = (8, 9, 10)


def create_test_data(
seed: int | None = None,
add_attrs: bool = True,
dim_sizes: tuple[int, int, int] = _DEFAULT_TEST_DIM_SIZES,
) -> Dataset:
rs = np.random.RandomState(seed)
_vars = {
"var1": ["dim1", "dim2"],
"var2": ["dim1", "dim2"],
"var3": ["dim3", "dim1"],
}
_dims = {"dim1": 8, "dim2": 9, "dim3": 10}
_dims = {"dim1": dim_sizes[0], "dim2": dim_sizes[1], "dim3": dim_sizes[2]}

obj = Dataset()
obj["dim2"] = ("dim2", 0.5 * np.arange(_dims["dim2"]))
obj["dim3"] = ("dim3", list("abcdefghij"))
if _dims["dim3"] > 26:
raise RuntimeError(
f'Not enough letters for filling this dimension size ({_dims["dim3"]})'
)
obj["dim3"] = ("dim3", list(string.ascii_lowercase[0 : _dims["dim3"]]))
obj["time"] = ("time", pd.date_range("2000-01-01", periods=20))
for v, dims in sorted(_vars.items()):
data = rs.normal(size=tuple(_dims[d] for d in dims))
obj[v] = (dims, data)
if add_attrs:
obj[v].attrs = {"foo": "variable"}
obj.coords["numbers"] = (
"dim3",
np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64"),
)

if dim_sizes == _DEFAULT_TEST_DIM_SIZES:
numbers_values = np.array([0, 1, 2, 0, 0, 1, 1, 2, 2, 3], dtype="int64")
else:
numbers_values = np.random.randint(0, 3, _dims["dim3"], dtype="int64")
obj.coords["numbers"] = ("dim3", numbers_values)
obj.encoding = {"foo": "bar"}
assert all(obj.data.flags.writeable for obj in obj.variables.values())
return obj
Expand Down
73 changes: 71 additions & 2 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
requires_h5netcdf_ros3,
requires_iris,
requires_netCDF4,
requires_netCDF4_1_6_2_or_above,
requires_pydap,
requires_pynio,
requires_scipy,
Expand Down Expand Up @@ -1486,7 +1487,7 @@ def test_dump_and_open_encodings(self) -> None:
assert ds.variables["time"].getncattr("units") == units
assert_array_equal(ds.variables["time"], np.arange(10) + 4)

def test_compression_encoding(self) -> None:
def test_compression_encoding_legacy(self) -> None:
data = create_test_data()
data["var2"].encoding.update(
{
Expand Down Expand Up @@ -1767,6 +1768,74 @@ def test_setncattr_string(self) -> None:
assert_array_equal(one_element_list_of_strings, totest.attrs["bar"])
assert one_string == totest.attrs["baz"]

@pytest.mark.parametrize(
"compression",
[
None,
"zlib",
"szip",
"zstd",
"blosc_lz",
"blosc_lz4",
"blosc_lz4hc",
"blosc_zlib",
"blosc_zstd",
],
)
@requires_netCDF4_1_6_2_or_above
@pytest.mark.xfail(ON_WINDOWS, reason="new compression not yet implemented")
def test_compression_encoding(self, compression: str | None) -> None:
data = create_test_data(dim_sizes=(20, 80, 10))
encoding_params: dict[str, Any] = dict(compression=compression, blosc_shuffle=1)
data["var2"].encoding.update(encoding_params)
data["var2"].encoding.update(
{
"chunksizes": (20, 40),
"original_shape": data.var2.shape,
"blosc_shuffle": 1,
"fletcher32": False,
}
)
with self.roundtrip(data) as actual:
expected_encoding = data["var2"].encoding.copy()
# compression does not appear in the retrieved encoding, that differs
# from the input encoding. shuffle also chantges. Here we modify the
# expected encoding to account for this
compression = expected_encoding.pop("compression")
blosc_shuffle = expected_encoding.pop("blosc_shuffle")
if compression is not None:
if "blosc" in compression and blosc_shuffle:
expected_encoding["blosc"] = {
"compressor": compression,
"shuffle": blosc_shuffle,
}
expected_encoding["shuffle"] = False
elif compression == "szip":
expected_encoding["szip"] = {
"coding": "nn",
"pixels_per_block": 8,
}
expected_encoding["shuffle"] = False
else:
# This will set a key like zlib=true which is what appears in
# the encoding when we read it.
expected_encoding[compression] = True
if compression == "zstd":
expected_encoding["shuffle"] = False
else:
expected_encoding["shuffle"] = False

actual_encoding = actual["var2"].encoding
assert expected_encoding.items() <= actual_encoding.items()
if (
encoding_params["compression"] is not None
and "blosc" not in encoding_params["compression"]
):
# regression test for #156
expected = data.isel(dim1=0)
with self.roundtrip(expected) as actual:
assert_equal(expected, actual)

@pytest.mark.skip(reason="https://github.com/Unidata/netcdf4-python/issues/1195")
def test_refresh_from_disk(self) -> None:
super().test_refresh_from_disk()
Expand Down Expand Up @@ -4518,7 +4587,7 @@ def test_extract_nc4_variable_encoding(self) -> None:
assert {} == encoding

@requires_netCDF4
def test_extract_nc4_variable_encoding_netcdf4(self, monkeypatch):
def test_extract_nc4_variable_encoding_netcdf4(self):
# New netCDF4 1.6.0 compression argument.
var = xr.Variable(("x",), [1, 2, 3], {}, {"compression": "szlib"})
_extract_nc4_variable_encoding(var, backend="netCDF4", raise_on_invalid=True)
Expand Down
12 changes: 10 additions & 2 deletions xarray/tests/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,9 @@ def test_rolling_properties(self, da) -> None:
):
da.rolling(foo=2)

@pytest.mark.parametrize("name", ("sum", "mean", "std", "min", "max", "median"))
@pytest.mark.parametrize(
"name", ("sum", "mean", "std", "min", "max", "median", "argmin", "argmax")
)
@pytest.mark.parametrize("center", (True, False, None))
@pytest.mark.parametrize("min_periods", (1, None))
@pytest.mark.parametrize("backend", ["numpy"], indirect=True)
Expand All @@ -133,9 +135,15 @@ def test_rolling_wrapped_bottleneck(

func_name = f"move_{name}"
actual = getattr(rolling_obj, name)()
window = 7
expected = getattr(bn, func_name)(
da.values, window=7, axis=1, min_count=min_periods
da.values, window=window, axis=1, min_count=min_periods
)
# index 0 is at the rightmost edge of the window
# need to reverse index here
# see GH #8541
if func_name in ["move_argmin", "move_argmax"]:
expected = window - 1 - expected

# Using assert_allclose because we get tiny (1e-17) differences in numbagg.
np.testing.assert_allclose(actual.values, expected)
Expand Down

0 comments on commit 3989c08

Please sign in to comment.