From be8e17e4dc5da67d7cbb09db87d80c1bbc71a64e Mon Sep 17 00:00:00 2001 From: Martin Raspaud Date: Mon, 17 Jun 2024 16:25:18 +0200 Subject: [PATCH] Support duplicate dimensions in `.chunk` (#9099) * Allow duplicate dimensions in chunking * Address review comments * fix whats-new * add comment * Update xarray/tests/test_dask.py --------- Co-authored-by: Deepak Cherian Co-authored-by: Deepak Cherian --- doc/whats-new.rst | 4 ++-- xarray/namedarray/core.py | 7 ++++++- xarray/tests/test_dask.py | 7 +++++++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7ec6e08ef96..e7a48458ae2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,7 +22,8 @@ v2024.06.1 (unreleased) New Features ~~~~~~~~~~~~ - +- Allow chunking for arrays with duplicated dimension names (:issue:`8759`, :pull:`9099`). + By `Martin Raspaud `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -73,7 +74,6 @@ Bug fixes support arbitrary kwargs such as ``order`` for polynomial interpolation (:issue:`8762`). By `Nicolas Karasiak `_. - Documentation ~~~~~~~~~~~~~ - Add link to CF Conventions on packed data and sentence on type determination in the I/O user guide (:issue:`9041`, :pull:`9045`). diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 960ab9d4d1d..fe47bf50533 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -812,7 +812,12 @@ def chunk( chunks = either_dict_or_kwargs(chunks, chunks_kwargs, "chunk") if is_dict_like(chunks): - chunks = {self.get_axis_num(dim): chunk for dim, chunk in chunks.items()} + # This method of iteration allows for duplicated dimension names, GH8579 + chunks = { + dim_number: chunks[dim] + for dim_number, dim in enumerate(self.dims) + if dim in chunks + } chunkmanager = guess_chunkmanager(chunked_array_type) diff --git a/xarray/tests/test_dask.py b/xarray/tests/test_dask.py index 3ac638c3c5f..20491eca91a 100644 --- a/xarray/tests/test_dask.py +++ b/xarray/tests/test_dask.py @@ -638,6 +638,13 @@ def counting_get(*args, **kwargs): assert count[0] == 1 + def test_duplicate_dims(self): + data = np.random.normal(size=(4, 4)) + arr = DataArray(data, dims=("x", "x")) + chunked_array = arr.chunk({"x": 2}) + assert chunked_array.chunks == ((2, 2), (2, 2)) + assert chunked_array.chunksizes == {"x": (2, 2)} + def test_stack(self): data = da.random.normal(size=(2, 3, 4), chunks=(1, 3, 4)) arr = DataArray(data, dims=("w", "x", "y"))