From ac9bce13fd73b61580ed51dbfa779ac8a00636c0 Mon Sep 17 00:00:00 2001 From: Deepak Cherian Date: Wed, 12 Jun 2024 15:35:50 -0600 Subject: [PATCH] Updates --- doc/user-guide/dask.rst | 5 +++++ xarray/core/dataarray.py | 6 ++++-- xarray/core/dataset.py | 7 +++++-- xarray/tests/test_dataset.py | 3 +++ 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/doc/user-guide/dask.rst b/doc/user-guide/dask.rst index 27e7449b7c3..e56fc49036e 100644 --- a/doc/user-guide/dask.rst +++ b/doc/user-guide/dask.rst @@ -296,6 +296,11 @@ loaded into Dask or not: Automatic parallelization with ``apply_ufunc`` and ``map_blocks`` ----------------------------------------------------------------- +.. tip:: + + Some problems can become embarassingly parallel and thus easy to parallelize automatically + by rechunk to a frequency: e.g. ``ds.chunk(time="YE")``. See :py:meth:`Dataset.chunk` for more. + Almost all of xarray's built-in operations work on Dask arrays. If you want to use a function that isn't wrapped by xarray, and have it applied in parallel on each block of your xarray object, you have three options: diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 4dc897c1878..431ead33246 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1356,11 +1356,13 @@ def chunk( sizes along that dimension will not be updated; non-dask arrays will be converted into dask arrays with a single block. + Along datetime-like dimensions, a pandas frequency string is also accepted. + Parameters ---------- - chunks : int, "auto", tuple of int or mapping of Hashable to int, optional + chunks : int, "auto", tuple of int or mapping of hashable to int or a pandas frequency string, optional Chunk sizes along each dimension, e.g., ``5``, ``"auto"``, ``(5, 5)`` or - ``{"x": 5, "y": 5}``. + ``{"x": 5, "y": 5}`` or ``{"x": 5, "time": "YE"}``. name_prefix : str, optional Prefix for the name of the new dask array. token : str, optional diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 469ea8f9d5c..282ca671129 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -159,6 +159,7 @@ QueryParserOptions, ReindexMethodOptions, SideOptions, + T_NormalizedChunks, T_Xarray, ) from xarray.core.weighted import DatasetWeighted @@ -2755,9 +2756,11 @@ def _resolve_frequency(name: Hashable, freq: str) -> tuple[int]: ) return chunks - chunks_mapping_ints = { + chunks_mapping_ints: T_NormalizedChunks = { name: ( - _resolve_frequency(name, chunks) if isinstance(chunks, str) else chunks + _resolve_frequency(name, chunks) + if isinstance(chunks, str) and chunks != "auto" + else chunks ) for name, chunks in chunks_mapping.items() } diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index eb15d82d5ab..2279118b202 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -1229,6 +1229,9 @@ def test_chunk_by_frequecy_errors(self): ds["x"] = ("x", [1, 2, 3]) with pytest.raises(ValueError, match="datetime variables"): ds.chunk(x="YE") + ds["x"] = ("x", xr.date_range("2001-01-01", periods=3, freq="D")) + with pytest.raises(ValueError, match="Invalid frequency"): + ds.chunk(x="foo") @requires_dask def test_dask_is_lazy(self) -> None: