From 1224b5479623169e015e217b97b66c986f6439b6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 16:13:11 -0700 Subject: [PATCH 01/53] add `.set_dims()`, `.transpose()` and `.T` to namedarray --- xarray/core/dataarray.py | 13 +--- xarray/core/dataset.py | 3 +- xarray/core/utils.py | 30 ---------- xarray/core/variable.py | 102 ------------------------------- xarray/namedarray/_typing.py | 4 ++ xarray/namedarray/core.py | 113 ++++++++++++++++++++++++++++++++++- xarray/namedarray/utils.py | 92 +++++++++++++++++++++++++--- xarray/tests/test_utils.py | 5 +- 8 files changed, 206 insertions(+), 156 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index b892cf595b5..eada9ca290c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4,15 +4,7 @@ import warnings from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence from os import PathLike -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Generic, - Literal, - NoReturn, - overload, -) +from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, NoReturn, overload import numpy as np import pandas as pd @@ -64,6 +56,7 @@ as_compatible_data, as_variable, ) +from xarray.namedarray.utils import infix_dims from xarray.plot.accessor import DataArrayPlotAccessor from xarray.plot.utils import _get_units_from_attrs from xarray.util.deprecation_helpers import _deprecate_positional_args @@ -2994,7 +2987,7 @@ def transpose( Dataset.transpose """ if dims: - dims = tuple(utils.infix_dims(dims, self.dims, missing_dims)) + dims = tuple(infix_dims(dims, self.dims, missing_dims)) variable = self.variable.transpose(*dims) if transpose_coords: coords: dict[Hashable, Variable] = {} diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 248f516b61b..661050b39c4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -110,8 +110,6 @@ decode_numpy_dict_values, drop_dims_from_indexers, either_dict_or_kwargs, - infix_dims, - is_dict_like, is_scalar, maybe_wrap_array, ) @@ -122,6 +120,7 @@ broadcast_variables, calculate_dimensions, ) +from xarray.namedarray.utils import infix_dims, is_dict_like from xarray.plot.accessor import DatasetPlotAccessor from xarray.util.deprecation_helpers import _deprecate_positional_args diff --git a/xarray/core/utils.py b/xarray/core/utils.py index ad86b2c7fec..aab02b1a141 100644 --- a/xarray/core/utils.py +++ b/xarray/core/utils.py @@ -789,36 +789,6 @@ def __len__(self) -> int: return len(self._data) - num_hidden -def infix_dims( - dims_supplied: Collection, - dims_all: Collection, - missing_dims: ErrorOptionsWithWarn = "raise", -) -> Iterator: - """ - Resolves a supplied list containing an ellipsis representing other items, to - a generator with the 'realized' list of all items - """ - if ... in dims_supplied: - if len(set(dims_all)) != len(dims_all): - raise ValueError("Cannot use ellipsis with repeated dims") - if list(dims_supplied).count(...) > 1: - raise ValueError("More than one ellipsis supplied") - other_dims = [d for d in dims_all if d not in dims_supplied] - existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) - for d in existing_dims: - if d is ...: - yield from other_dims - else: - yield d - else: - existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) - if set(existing_dims) ^ set(dims_all): - raise ValueError( - f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included" - ) - yield from existing_dims - - def get_temp_dimname(dims: Container[Hashable], new_dim: Hashable) -> Hashable: """Get an new dimension name based on new_dim, that is not used in dims. If the same name exists, we add an underscore(s) in the head. diff --git a/xarray/core/variable.py b/xarray/core/variable.py index f18c4044f40..92904ea5130 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -42,7 +42,6 @@ drop_dims_from_indexers, either_dict_or_kwargs, ensure_us_time_resolution, - infix_dims, is_duck_array, maybe_coerce_to_str, ) @@ -1382,107 +1381,6 @@ def roll(self, shifts=None, **shifts_kwargs): result = result._roll_one_dim(dim, count) return result - def transpose( - self, - *dims: Hashable | ellipsis, - missing_dims: ErrorOptionsWithWarn = "raise", - ) -> Self: - """Return a new Variable object with transposed dimensions. - - Parameters - ---------- - *dims : Hashable, optional - By default, reverse the dimensions. Otherwise, reorder the - dimensions to this order. - missing_dims : {"raise", "warn", "ignore"}, default: "raise" - What to do if dimensions that should be selected from are not present in the - Variable: - - "raise": raise an exception - - "warn": raise a warning, and ignore the missing dimensions - - "ignore": ignore the missing dimensions - - Returns - ------- - transposed : Variable - The returned object has transposed data and dimensions with the - same attributes as the original. - - Notes - ----- - This operation returns a view of this variable's data. It is - lazy for dask-backed Variables but not for numpy-backed Variables. - - See Also - -------- - numpy.transpose - """ - if len(dims) == 0: - dims = self.dims[::-1] - else: - dims = tuple(infix_dims(dims, self.dims, missing_dims)) - - if len(dims) < 2 or dims == self.dims: - # no need to transpose if only one dimension - # or dims are in same order - return self.copy(deep=False) - - axes = self.get_axis_num(dims) - data = as_indexable(self._data).transpose(axes) - return self._replace(dims=dims, data=data) - - @property - def T(self) -> Self: - return self.transpose() - - def set_dims(self, dims, shape=None): - """Return a new variable with given set of dimensions. - This method might be used to attach new dimension(s) to variable. - - When possible, this operation does not copy this variable's data. - - Parameters - ---------- - dims : str or sequence of str or dict - Dimensions to include on the new variable. If a dict, values are - used to provide the sizes of new dimensions; otherwise, new - dimensions are inserted with length 1. - - Returns - ------- - Variable - """ - if isinstance(dims, str): - dims = [dims] - - if shape is None and utils.is_dict_like(dims): - shape = dims.values() - - missing_dims = set(self.dims) - set(dims) - if missing_dims: - raise ValueError( - f"new dimensions {dims!r} must be a superset of " - f"existing dimensions {self.dims!r}" - ) - - self_dims = set(self.dims) - expanded_dims = tuple(d for d in dims if d not in self_dims) + self.dims - - if self.dims == expanded_dims: - # don't use broadcast_to unless necessary so the result remains - # writeable if possible - expanded_data = self.data - elif shape is not None: - dims_map = dict(zip(dims, shape)) - tmp_shape = tuple(dims_map[d] for d in expanded_dims) - expanded_data = duck_array_ops.broadcast_to(self.data, tmp_shape) - else: - expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] - - expanded_var = Variable( - expanded_dims, expanded_data, self._attrs, self._encoding, fastpath=True - ) - return expanded_var.transpose(*dims) - def _stack_once(self, dims: list[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): raise ValueError(f"invalid existing dimensions: {dims}") diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 10314fe9440..db0c0c675a0 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -6,6 +6,7 @@ TYPE_CHECKING, Any, Callable, + Literal, Protocol, SupportsIndex, TypeVar, @@ -263,3 +264,6 @@ def todense(self) -> NDArray[_ScalarType_co]: # NamedArray can most likely use both __array_function__ and __array_namespace__: _sparsearrayfunction_or_api = (_sparsearrayfunction, _sparsearrayapi) + +ErrorOptions = Literal["raise", "ignore"] +ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index feff052101b..fcdaebad8bb 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -22,17 +22,25 @@ from xarray.core import dtypes, formatting, formatting_html from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray._typing import ( + ErrorOptionsWithWarn, _arrayapi, _arrayfunction_or_api, _chunkedarray, _dtype, _DType_co, _ScalarType_co, + _ShapeLike, _ShapeType_co, _SupportsImag, _SupportsReal, ) -from xarray.namedarray.utils import _default, is_duck_dask_array, to_0d_object_array +from xarray.namedarray.utils import ( + _default, + infix_dims, + is_dict_like, + is_duck_dask_array, + to_0d_object_array, +) if TYPE_CHECKING: from numpy.typing import ArrayLike, NDArray @@ -847,5 +855,108 @@ def _to_dense(self) -> Self: else: raise TypeError("self.data is not a sparse array") + def transpose( + self, + *dims: Hashable | ellipsis, + missing_dims: ErrorOptionsWithWarn = "raise", + ) -> Self: + """Return a new object with transposed dimensions. + + Parameters + ---------- + *dims : Hashable, optional + By default, reverse the dimensions. Otherwise, reorder the + dimensions to this order. + missing_dims : {"raise", "warn", "ignore"}, default: "raise" + What to do if dimensions that should be selected from are not present in the + NamedArray: + - "raise": raise an exception + - "warn": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions + + Returns + ------- + transposed : NamedArray + The returned object has transposed data and dimensions with the + same attributes as the original. + + Notes + ----- + This operation returns a view of this variable's data. It is + lazy for dask-backed Variables but not for numpy-backed Variables. + + See Also + -------- + numpy.transpose + """ + from xarray.core.indexing import as_indexable + + if len(dims) == 0: + dims = self.dims[::-1] + else: + dims = tuple(infix_dims(dims, self.dims, missing_dims)) + + if len(dims) < 2 or dims == self.dims: + # no need to transpose if only one dimension + # or dims are in same order + return self.copy(deep=False) + + axes = self.get_axis_num(dims) + data = as_indexable(self._data).transpose(axes) + return self._replace(dims=dims, data=data) + + @property + def T(self) -> Self: + return self.transpose() + + def set_dims(self, dims: _DimsLike, shape: _ShapeLike = None) -> Self: + """ + Return a new namedarray with given set of dimensions. + This method might be used to attach new dimension(s) to namedarray. + + When possible, this operation does not copy this namedarray's data. + + Parameters + ---------- + dims : str or sequence of str or dict + Dimensions to include on the new namedarray. If a dict, values are used to + provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + + shape : sequence of int, optional + Shape of the new namedarray. If not provided, the shape is inferred from the data. + """ + + from xarray.core import duck_array_ops # TODO: remove this import + + if isinstance(dims, str): + dims = [dims] + + if shape is None and is_dict_like(dims): + shape = dims.values() + + missing_dims = set(self.dims) - set(dims) + if missing_dims: + raise ValueError( + f"new dimensions {dims!r} must be a superset of " + f"existing dimensions {self.dims!r}" + ) + + self_dims = set(self.dims) + expanded_dims = tuple(dim for dim in dims if dim not in self_dims) + self.dims + + if self.dims == expanded_dims: + # don't use broadcast_to unless necessary so the result remains + # writeable if possible + expanded_data = self.data + elif shape is not None: + dims_map = dict(zip(dims, shape)) + temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) + expanded_data = duck_array_ops.broadcast_to(self.data, temporary_shape) + else: + expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] + + expanded_obj = self._replace(data=expanded_data, dims=expanded_dims) + return expanded_obj.transpose(*dims) + _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 03eb0134231..e5f0bafb95e 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -1,16 +1,15 @@ from __future__ import annotations import sys -from collections.abc import Hashable +import warnings +from collections.abc import Collection, Hashable, Iterable, Iterator, Mapping from enum import Enum -from typing import ( - TYPE_CHECKING, - Any, - Final, -) +from typing import TYPE_CHECKING, Any, Final import numpy as np +from xarray.namedarray._typing import ErrorOptionsWithWarn + if TYPE_CHECKING: if sys.version_info >= (3, 10): from typing import TypeGuard @@ -19,9 +18,7 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import ( - duckarray, - ) + from xarray.namedarray._typing import duckarray try: from dask.array.core import Array as DaskArray @@ -80,6 +77,83 @@ def to_0d_object_array( return result +def is_dict_like(value: Any) -> TypeGuard[Mapping]: + return hasattr(value, "keys") and hasattr(value, "__getitem__") + + +def drop_missing_dims( + supplied_dims: Iterable[Hashable], + dims: Iterable[Hashable], + missing_dims: ErrorOptionsWithWarn, +) -> Iterable[Hashable]: + """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that + are not present in dims. + + Parameters + ---------- + supplied_dims : Iterable of Hashable + dims : Iterable of Hashable + missing_dims : {"raise", "warn", "ignore"} + """ + + if missing_dims == "raise": + supplied_dims_set = {val for val in supplied_dims if val is not ...} + invalid = supplied_dims_set - set(dims) + if invalid: + raise ValueError( + f"Dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return supplied_dims + + elif missing_dims == "warn": + invalid = set(supplied_dims) - set(dims) + if invalid: + warnings.warn( + f"Dimensions {invalid} do not exist. Expected one or more of {dims}" + ) + + return [val for val in supplied_dims if val in dims or val is ...] + + elif missing_dims == "ignore": + return [val for val in supplied_dims if val in dims or val is ...] + + else: + raise ValueError( + f"Unrecognised option {missing_dims} for missing_dims argument" + ) + + +def infix_dims( + dims_supplied: Collection, + dims_all: Collection, + missing_dims: ErrorOptionsWithWarn = "raise", +) -> Iterator: + """ + Resolves a supplied list containing an ellipsis representing other items, to + a generator with the 'realized' list of all items + """ + if ... in dims_supplied: + if len(set(dims_all)) != len(dims_all): + raise ValueError("Cannot use ellipsis with repeated dims") + if list(dims_supplied).count(...) > 1: + raise ValueError("More than one ellipsis supplied") + other_dims = [d for d in dims_all if d not in dims_supplied] + existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) + for d in existing_dims: + if d is ...: + yield from other_dims + else: + yield d + else: + existing_dims = drop_missing_dims(dims_supplied, dims_all, missing_dims) + if set(existing_dims) ^ set(dims_all): + raise ValueError( + f"{dims_supplied} must be a permuted list of {dims_all}, unless `...` is included" + ) + yield from existing_dims + + class ReprObject: """Object that prints as the given value, for use with sentinel values.""" diff --git a/xarray/tests/test_utils.py b/xarray/tests/test_utils.py index 36f62fad71f..d46d27c3c34 100644 --- a/xarray/tests/test_utils.py +++ b/xarray/tests/test_utils.py @@ -8,6 +8,7 @@ from xarray.core import duck_array_ops, utils from xarray.core.utils import either_dict_or_kwargs, iterate_nested +from xarray.namedarray.utils import infix_dims from xarray.tests import assert_array_equal, requires_dask @@ -239,7 +240,7 @@ def test_either_dict_or_kwargs(): ], ) def test_infix_dims(supplied, all_, expected): - result = list(utils.infix_dims(supplied, all_)) + result = list(infix_dims(supplied, all_)) assert result == expected @@ -248,7 +249,7 @@ def test_infix_dims(supplied, all_, expected): ) def test_infix_dims_errors(supplied, all_): with pytest.raises(ValueError): - list(utils.infix_dims(supplied, all_)) + list(infix_dims(supplied, all_)) @pytest.mark.parametrize( From 409c5de0e369e646c7938477a5291e5584ba1e91 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 16:30:37 -0700 Subject: [PATCH 02/53] more typying fixes --- xarray/namedarray/core.py | 18 +++++++++--------- xarray/namedarray/utils.py | 8 ++++---- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index fcdaebad8bb..6f3ff8849ca 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -23,6 +23,7 @@ from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray._typing import ( ErrorOptionsWithWarn, + SupportsIndex, _arrayapi, _arrayfunction_or_api, _chunkedarray, @@ -891,7 +892,7 @@ def transpose( """ from xarray.core.indexing import as_indexable - if len(dims) == 0: + if not dims: dims = self.dims[::-1] else: dims = tuple(infix_dims(dims, self.dims, missing_dims)) @@ -902,14 +903,14 @@ def transpose( return self.copy(deep=False) axes = self.get_axis_num(dims) - data = as_indexable(self._data).transpose(axes) + data = as_indexable(self._data).transpose(axes) # type: ignore return self._replace(dims=dims, data=data) @property def T(self) -> Self: return self.transpose() - def set_dims(self, dims: _DimsLike, shape: _ShapeLike = None) -> Self: + def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: """ Return a new namedarray with given set of dimensions. This method might be used to attach new dimension(s) to namedarray. @@ -932,13 +933,12 @@ def set_dims(self, dims: _DimsLike, shape: _ShapeLike = None) -> Self: dims = [dims] if shape is None and is_dict_like(dims): - shape = dims.values() + shape = list(dims.values()) - missing_dims = set(self.dims) - set(dims) - if missing_dims: + if missing_dims := set(self.dims) - set(dims): raise ValueError( f"new dimensions {dims!r} must be a superset of " - f"existing dimensions {self.dims!r}" + f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" ) self_dims = set(self.dims) @@ -949,9 +949,9 @@ def set_dims(self, dims: _DimsLike, shape: _ShapeLike = None) -> Self: # writeable if possible expanded_data = self.data elif shape is not None: - dims_map = dict(zip(dims, shape)) + dims_map = dict(zip(dims, cast(Iterable[SupportsIndex], shape))) temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) - expanded_data = duck_array_ops.broadcast_to(self.data, temporary_shape) + expanded_data = duck_array_ops.broadcast_to(self.data, temporary_shape) # type: ignore else: expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index e5f0bafb95e..ca8d7f62176 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -77,7 +77,7 @@ def to_0d_object_array( return result -def is_dict_like(value: Any) -> TypeGuard[Mapping]: +def is_dict_like(value: Any) -> TypeGuard[Mapping[Any, Any]]: return hasattr(value, "keys") and hasattr(value, "__getitem__") @@ -125,10 +125,10 @@ def drop_missing_dims( def infix_dims( - dims_supplied: Collection, - dims_all: Collection, + dims_supplied: Collection[Any], + dims_all: Collection[Any], missing_dims: ErrorOptionsWithWarn = "raise", -) -> Iterator: +) -> Iterator[Any]: """ Resolves a supplied list containing an ellipsis representing other items, to a generator with the 'realized' list of all items From 43e10d8cd9b14cb51930def999b85451312ba413 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 17:36:02 -0700 Subject: [PATCH 03/53] more typing fixes --- xarray/core/dataset.py | 4 ++-- xarray/namedarray/core.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 661050b39c4..dc55f3c0b2d 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5142,7 +5142,7 @@ def _stack_once( add_dims = [d for d in dims if d not in var.dims] vdims = list(var.dims) + add_dims shape = [self.dims[d] for d in vdims] - exp_var = var.set_dims(vdims, shape) + exp_var = var.set_dims(vdims, shape) # type: ignore stacked_var = exp_var.stack(**{new_dim: dims}) new_variables[name] = stacked_var stacked_var_names.append(name) @@ -7337,7 +7337,7 @@ def to_dask_dataframe( # Broadcast then flatten the array: var_new_dims = var.set_dims(ordered_dims).chunk(ds_chunks) - dask_array = var_new_dims._data.reshape(-1) + dask_array = var_new_dims._data.reshape(-1) # type: ignore series = dd.from_dask_array(dask_array, columns=name, meta=df_meta) series_list.append(series) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 6f3ff8849ca..00ac253ee92 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -11,6 +11,7 @@ Callable, Generic, Literal, + SupportsIndex, TypeVar, cast, overload, @@ -23,7 +24,6 @@ from xarray.namedarray._aggregations import NamedArrayAggregations from xarray.namedarray._typing import ( ErrorOptionsWithWarn, - SupportsIndex, _arrayapi, _arrayfunction_or_api, _chunkedarray, @@ -953,7 +953,7 @@ def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) expanded_data = duck_array_ops.broadcast_to(self.data, temporary_shape) # type: ignore else: - expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] + expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] # type: ignore expanded_obj = self._replace(data=expanded_data, dims=expanded_dims) return expanded_obj.transpose(*dims) From d6c7758788076214cc44016feed81b152f43a18f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 18:03:27 -0700 Subject: [PATCH 04/53] override set_dims for IndexVariable --- xarray/core/dataset.py | 4 ++-- xarray/core/variable.py | 28 ++++++++++++++++++++++++++++ xarray/namedarray/core.py | 24 +++++++++++++++++++++++- 3 files changed, 53 insertions(+), 3 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index dc55f3c0b2d..66ad568ee28 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -5142,8 +5142,8 @@ def _stack_once( add_dims = [d for d in dims if d not in var.dims] vdims = list(var.dims) + add_dims shape = [self.dims[d] for d in vdims] - exp_var = var.set_dims(vdims, shape) # type: ignore - stacked_var = exp_var.stack(**{new_dim: dims}) + exp_var = var.set_dims(vdims, shape) + stacked_var = exp_var.stack(**{new_dim: dims}) # type: ignore new_variables[name] = stacked_var stacked_var_names.append(name) else: diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 92904ea5130..9051ecb24f0 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,6 +45,7 @@ is_duck_array, maybe_coerce_to_str, ) +from xarray.namedarray._typing import _DimsLike, _ShapeLike from xarray.namedarray.core import NamedArray NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -2749,6 +2750,33 @@ def _inplace_binary_op(self, other, f): "Values of an IndexVariable are immutable and can not be modified inplace" ) + def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Variable: + """ + Return a new variable with given set of dimensions. + This method might be used to attach new dimension(s) to variable. + + When possible, this operation does not copy this variable's data. + + Parameters + ---------- + dims : str or sequence of str or dict + Dimensions to include on the new variable. If a dict, values are used to + provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + + shape : sequence of int, optional + Shape of the new variable. If not provided, the shape is inferred from the data. + """ + + expanded_data, expanded_dims = self._get_expanded_data_and_dims(dims, shape) + expanded_obj = Variable( + data=expanded_data, + dims=expanded_dims, + attrs=self._attrs, + encoding=self._encoding, + fastpath=True, + ) + return expanded_obj.transpose(*dims) + def _unified_dims(variables): # validate dimensions diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 00ac253ee92..9e858b5416c 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -910,7 +910,9 @@ def transpose( def T(self) -> Self: return self.transpose() - def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: + def _get_expanded_data_and_dims( + self, dims: _DimsLike, shape: _ShapeLike + ) -> tuple[duckarray[Any, _DType_co], _Dims]: """ Return a new namedarray with given set of dimensions. This method might be used to attach new dimension(s) to namedarray. @@ -955,6 +957,26 @@ def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: else: expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] # type: ignore + return expanded_data, expanded_dims + + def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: + """ + Return a new namedarray with given set of dimensions. + This method might be used to attach new dimension(s) to namedarray. + + When possible, this operation does not copy this namedarray's data. + + Parameters + ---------- + dims : str or sequence of str or dict + Dimensions to include on the new namedarray. If a dict, values are used to + provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + + shape : sequence of int, optional + Shape of the new namedarray. If not provided, the shape is inferred from the data. + """ + + expanded_data, expanded_dims = self._get_expanded_data_and_dims(dims, shape) expanded_obj = self._replace(data=expanded_data, dims=expanded_dims) return expanded_obj.transpose(*dims) From a920e11cf8c1ef77f4ec5b7b5d57c877c8dc04e0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 19:54:24 -0700 Subject: [PATCH 05/53] fix dims --- xarray/core/variable.py | 2 +- xarray/namedarray/core.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 9051ecb24f0..2271c3145bc 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -2750,7 +2750,7 @@ def _inplace_binary_op(self, other, f): "Values of an IndexVariable are immutable and can not be modified inplace" ) - def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Variable: + def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Variable: # type: ignore """ Return a new variable with given set of dimensions. This method might be used to attach new dimension(s) to variable. diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 9e858b5416c..5d6b24e48ee 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -911,10 +911,10 @@ def T(self) -> Self: return self.transpose() def _get_expanded_data_and_dims( - self, dims: _DimsLike, shape: _ShapeLike + self, dims: _DimsLike, shape: _ShapeLike | None = None ) -> tuple[duckarray[Any, _DType_co], _Dims]: """ - Return a new namedarray with given set of dimensions. + Return a tuple of new namedarray with given set of dimensions and dims This method might be used to attach new dimension(s) to namedarray. When possible, this operation does not copy this namedarray's data. @@ -931,9 +931,6 @@ def _get_expanded_data_and_dims( from xarray.core import duck_array_ops # TODO: remove this import - if isinstance(dims, str): - dims = [dims] - if shape is None and is_dict_like(dims): shape = list(dims.values()) @@ -976,6 +973,9 @@ def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: Shape of the new namedarray. If not provided, the shape is inferred from the data. """ + if isinstance(dims, str): + dims = [dims] + expanded_data, expanded_dims = self._get_expanded_data_and_dims(dims, shape) expanded_obj = self._replace(data=expanded_data, dims=expanded_dims) return expanded_obj.transpose(*dims) From e765d7dee91411ae6611be5f3c23452b11523fe9 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 22:21:56 -0700 Subject: [PATCH 06/53] split `.set_dims()` into `.expand_dims()` and `broadcast_to()` --- xarray/core/alignment.py | 2 +- xarray/core/concat.py | 4 +- xarray/core/dataset.py | 10 ++--- xarray/core/groupby.py | 11 +----- xarray/core/merge.py | 2 +- xarray/core/variable.py | 38 +++++-------------- xarray/namedarray/core.py | 69 +++++++++++++++-------------------- xarray/tests/test_groupby.py | 4 +- xarray/tests/test_units.py | 4 +- xarray/tests/test_variable.py | 14 +++---- 10 files changed, 60 insertions(+), 98 deletions(-) diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 732ec5d3ea6..31daa6fbed8 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -1068,7 +1068,7 @@ def _set_dims(var): # ignore dim not in var.dims var_dims_map[dim] = var.shape[var.dims.index(dim)] - return var.set_dims(var_dims_map) + return var.expand_dims(var_dims_map) def _broadcast_array(array: T_DataArray) -> T_DataArray: data = _set_dims(array.variable) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index a136480b2fb..a7305776feb 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -552,7 +552,7 @@ def ensure_common_dims(vars, concat_dim_lengths): for var, dim_len in zip(vars, concat_dim_lengths): if var.dims != common_dims: common_shape = tuple(dims_sizes.get(d, dim_len) for d in common_dims) - var = var.set_dims(common_dims, common_shape) + var = var.expand_dims(common_dims, common_shape) yield var # get the indexes to concatenate together, create a PandasIndex @@ -567,7 +567,7 @@ def get_indexes(name): elif name == dim: var = ds._variables[name] if not var.dims: - data = var.set_dims(dim).values + data = var.expand_dims(dim).values yield PandasIndex(data, dim, coord_dtype=var.dtype) # create concatenation index, needed for later reindexing diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 66ad568ee28..0e79b924acd 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4614,12 +4614,12 @@ def expand_dims( all_dims = list(zip(v.dims, v.shape)) for d, c in zip_axis_dim: all_dims.insert(d, c) - variables[k] = v.set_dims(dict(all_dims)) + variables[k] = v.expand_dims(dict(all_dims)) else: if k not in variables: # If dims includes a label of a non-dimension coordinate, # it will be promoted to a 1D coordinate with a single value. - index, index_vars = create_default_index_implicit(v.set_dims(k)) + index, index_vars = create_default_index_implicit(v.expand_dims(k)) indexes[k] = index variables.update(index_vars) @@ -5142,7 +5142,7 @@ def _stack_once( add_dims = [d for d in dims if d not in var.dims] vdims = list(var.dims) + add_dims shape = [self.dims[d] for d in vdims] - exp_var = var.set_dims(vdims, shape) + exp_var = var.expand_dims(vdims, shape) stacked_var = exp_var.stack(**{new_dim: dims}) # type: ignore new_variables[name] = stacked_var stacked_var_names.append(name) @@ -7090,7 +7090,7 @@ def to_pandas(self) -> pd.Series | pd.DataFrame: def _to_dataframe(self, ordered_dims: Mapping[Any, int]): columns = [k for k in self.variables if k not in self.dims] data = [ - self._variables[k].set_dims(ordered_dims).values.reshape(-1) + self._variables[k].expand_dims(ordered_dims).values.reshape(-1) for k in columns ] index = self.coords.to_index([*ordered_dims]) @@ -7336,7 +7336,7 @@ def to_dask_dataframe( var = var.chunk() # Broadcast then flatten the array: - var_new_dims = var.set_dims(ordered_dims).chunk(ds_chunks) + var_new_dims = var.expand_dims(ordered_dims).chunk(ds_chunks) dask_array = var_new_dims._data.reshape(-1) # type: ignore series = dd.from_dask_array(dask_array, columns=name, meta=df_meta) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index 788e1efa80b..ded765780a3 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -5,14 +5,7 @@ from abc import ABC, abstractmethod from collections.abc import Hashable, Iterator, Mapping, Sequence from dataclasses import dataclass, field -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Generic, - Literal, - Union, -) +from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, Union import numpy as np import pandas as pd @@ -1052,7 +1045,7 @@ def _flox_reduce( # broadcast and restore non-numeric data variables (backcompat) for name, var in non_numeric.items(): if all(d not in var.dims for d in parsed_dim): - result[name] = var.variable.set_dims( + result[name] = var.variable.expand_dims( (grouper.name,) + var.dims, (result.sizes[grouper.name],) + var.shape, ) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index a8e54ad1231..a3c55e2600c 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -120,7 +120,7 @@ def unique_variable( if compat == "broadcast_equals": dim_lengths = broadcast_dimension_size(variables) - out = out.set_dims(dim_lengths) + out = out.expand_dims(dim_lengths) if compat == "no_conflicts": combine_method = "fillna" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 2271c3145bc..b3414069ec2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,7 +45,6 @@ is_duck_array, maybe_coerce_to_str, ) -from xarray.namedarray._typing import _DimsLike, _ShapeLike from xarray.namedarray.core import NamedArray NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -878,7 +877,7 @@ def __setitem__(self, key, value): else: value = Variable(dims[-value.ndim :], value) # broadcast to become assignable - value = value.set_dims(dims).data + value = value.expand_dims(dims).data if new_order: value = duck_array_ops.asarray(value) @@ -2750,32 +2749,10 @@ def _inplace_binary_op(self, other, f): "Values of an IndexVariable are immutable and can not be modified inplace" ) - def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Variable: # type: ignore - """ - Return a new variable with given set of dimensions. - This method might be used to attach new dimension(s) to variable. - - When possible, this operation does not copy this variable's data. - - Parameters - ---------- - dims : str or sequence of str or dict - Dimensions to include on the new variable. If a dict, values are used to - provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. - - shape : sequence of int, optional - Shape of the new variable. If not provided, the shape is inferred from the data. - """ - - expanded_data, expanded_dims = self._get_expanded_data_and_dims(dims, shape) - expanded_obj = Variable( - data=expanded_data, - dims=expanded_dims, - attrs=self._attrs, - encoding=self._encoding, - fastpath=True, + def _create_expanded_obj(self, expanded_data, expanded_dims) -> Variable: # type: ignore + return Variable( + expanded_dims, expanded_data, self._attrs, self._encoding, fastpath=True ) - return expanded_obj.transpose(*dims) def _unified_dims(variables): @@ -2806,7 +2783,9 @@ def _broadcast_compat_variables(*variables): dimensions of size 1 instead of the size of the broadcast dimension. """ dims = tuple(_unified_dims(variables)) - return tuple(var.set_dims(dims) if var.dims != dims else var for var in variables) + return tuple( + var.expand_dims(dims) if var.dims != dims else var for var in variables + ) def broadcast_variables(*variables: Variable) -> tuple[Variable, ...]: @@ -2822,7 +2801,8 @@ def broadcast_variables(*variables: Variable) -> tuple[Variable, ...]: dims_map = _unified_dims(variables) dims_tuple = tuple(dims_map) return tuple( - var.set_dims(dims_map) if var.dims != dims_tuple else var for var in variables + var.expand_dims(dims_map) if var.dims != dims_tuple else var + for var in variables ) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5d6b24e48ee..791d330d758 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -910,26 +910,36 @@ def transpose( def T(self) -> Self: return self.transpose() - def _get_expanded_data_and_dims( - self, dims: _DimsLike, shape: _ShapeLike | None = None - ) -> tuple[duckarray[Any, _DType_co], _Dims]: + def broadcast_to(self, shape: _ShapeLike) -> duckarray[Any, Any]: + from xarray.core import duck_array_ops # TODO: remove this import + + return duck_array_ops.broadcast_to(self.data, shape) # type: ignore + + def _create_expanded_obj( + self, expanded_data, expanded_dims + ) -> Self: # type: ignore + return self._replace(dims=expanded_dims, data=expanded_data) + + def expand_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: """ - Return a tuple of new namedarray with given set of dimensions and dims - This method might be used to attach new dimension(s) to namedarray. + Expand the dimensions of the object. - When possible, this operation does not copy this namedarray's data. + This method adds new dimensions to the object and optionally broadcasts + the data to the new shape if provided. - Parameters - ---------- - dims : str or sequence of str or dict - Dimensions to include on the new namedarray. If a dict, values are used to - provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + Parameters + ---------- + dims : str or sequence of str or dict + Dimensions to include on the new object (must be a superset of the existing dimensions). + If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. - shape : sequence of int, optional - Shape of the new namedarray. If not provided, the shape is inferred from the data. + shape : sequence of int, optional + Shape to broadcast the data to. Must be specified in the same order as `dims`. + If not provided, new dimensions are inserted with length 1. """ - from xarray.core import duck_array_ops # TODO: remove this import + if isinstance(dims, str): + dims = [dims] if shape is None and is_dict_like(dims): shape = list(dims.values()) @@ -947,38 +957,17 @@ def _get_expanded_data_and_dims( # don't use broadcast_to unless necessary so the result remains # writeable if possible expanded_data = self.data + elif shape is not None: dims_map = dict(zip(dims, cast(Iterable[SupportsIndex], shape))) temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) - expanded_data = duck_array_ops.broadcast_to(self.data, temporary_shape) # type: ignore + expanded_data = self.broadcast_to(temporary_shape) + else: expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] # type: ignore - return expanded_data, expanded_dims - - def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: - """ - Return a new namedarray with given set of dimensions. - This method might be used to attach new dimension(s) to namedarray. - - When possible, this operation does not copy this namedarray's data. - - Parameters - ---------- - dims : str or sequence of str or dict - Dimensions to include on the new namedarray. If a dict, values are used to - provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. - - shape : sequence of int, optional - Shape of the new namedarray. If not provided, the shape is inferred from the data. - """ - - if isinstance(dims, str): - dims = [dims] - - expanded_data, expanded_dims = self._get_expanded_data_and_dims(dims, shape) - expanded_obj = self._replace(data=expanded_data, dims=expanded_dims) - return expanded_obj.transpose(*dims) + expanded_obj = self._create_expanded_obj(expanded_data, expanded_dims) + return expanded_obj.transpose(*dims) # type: ignore _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 320ba999318..275e4df3186 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -746,7 +746,7 @@ def test_groupby_dataset_reduce() -> None: ) expected = data.mean("y") - expected["yonly"] = expected["yonly"].variable.set_dims({"x": 3}) + expected["yonly"] = expected["yonly"].variable.expand_dims({"x": 3}) actual = data.groupby("x").mean(...) assert_allclose(expected, actual) @@ -757,7 +757,7 @@ def test_groupby_dataset_reduce() -> None: expected = Dataset( { "xy": data["xy"].groupby(letters).mean(...), - "xonly": (data["xonly"].mean().variable.set_dims({"letters": 2})), + "xonly": (data["xonly"].mean().variable.expand_dims({"letters": 2})), "yonly": data["yonly"].groupby(letters).mean(), } ) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 7e1105e2e5d..a8a8f2993db 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2153,9 +2153,9 @@ def test_set_dims(self, dtype): dims = {"z": 6, "x": 3, "a": 1, "b": 4, "y": 10} expected = attach_units( - strip_units(variable).set_dims(dims), extract_units(variable) + strip_units(variable).expand_dims(dims), extract_units(variable) ) - actual = variable.set_dims(dims) + actual = variable.expand_dims(dims) assert_units_equal(expected, actual) assert_identical(expected, actual) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 8a73e435977..a8674afa623 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -466,7 +466,7 @@ def test_encoding_preserved(self): expected[...], expected.squeeze(), expected.isel(x=slice(None)), - expected.set_dims({"x": 3}), + expected.expand_dims({"x": 3}), expected.copy(deep=True), expected.copy(deep=False), ]: @@ -1608,28 +1608,28 @@ def test_get_axis_num(self): def test_set_dims(self): v = Variable(["x"], [0, 1]) - actual = v.set_dims(["x", "y"]) + actual = v.expand_dims(["x", "y"]) expected = Variable(["x", "y"], [[0], [1]]) assert_identical(actual, expected) - actual = v.set_dims(["y", "x"]) + actual = v.expand_dims(["y", "x"]) assert_identical(actual, expected.T) - actual = v.set_dims({"x": 2, "y": 2}) + actual = v.expand_dims({"x": 2, "y": 2}) expected = Variable(["x", "y"], [[0, 0], [1, 1]]) assert_identical(actual, expected) v = Variable(["foo"], [0, 1]) - actual = v.set_dims("foo") + actual = v.expand_dims("foo") expected = v assert_identical(actual, expected) with pytest.raises(ValueError, match=r"must be a superset"): - v.set_dims(["z"]) + v.expand_dims(["z"]) def test_set_dims_object_dtype(self): v = Variable([], ("a", 1)) - actual = v.set_dims(("x",), (3,)) + actual = v.expand_dims(("x",), (3,)) exp_values = np.empty((3,), dtype=object) for i in range(3): exp_values[i] = ("a", 1) From 00504f44c802945f15e9624bbfe213ee5541ff3f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 26 Oct 2023 22:26:29 -0700 Subject: [PATCH 07/53] more typing fixes --- xarray/namedarray/core.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 791d330d758..3c49ee80e10 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -916,8 +916,8 @@ def broadcast_to(self, shape: _ShapeLike) -> duckarray[Any, Any]: return duck_array_ops.broadcast_to(self.data, shape) # type: ignore def _create_expanded_obj( - self, expanded_data, expanded_dims - ) -> Self: # type: ignore + self, expanded_data: duckarray[Any, Any], expanded_dims: _DimsLike + ) -> Self: return self._replace(dims=expanded_dims, data=expanded_data) def expand_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: @@ -967,7 +967,7 @@ def expand_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] # type: ignore expanded_obj = self._create_expanded_obj(expanded_data, expanded_dims) - return expanded_obj.transpose(*dims) # type: ignore + return expanded_obj.transpose(*dims) _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] From 5a06decce721acac03d91f1073c1a3d0138bc880 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 11:21:27 -0700 Subject: [PATCH 08/53] update whats-new --- doc/api-hidden.rst | 3 ++- doc/whats-new.rst | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index c96b0aa5c3b..fcb911291b1 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -208,6 +208,7 @@ Variable.argsort Variable.astype Variable.broadcast_equals + Variable.broadcast_to Variable.chunk Variable.clip Variable.coarsen @@ -220,6 +221,7 @@ Variable.cumprod Variable.cumsum Variable.equals + Variable.expand_dims Variable.fillna Variable.get_axis_num Variable.identical @@ -242,7 +244,6 @@ Variable.rolling_window Variable.round Variable.searchsorted - Variable.set_dims Variable.shift Variable.squeeze Variable.stack diff --git a/doc/whats-new.rst b/doc/whats-new.rst index c88f685b0ba..77e4d45caa2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,6 +26,9 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- Split :py:meth:`Variable.set_dims` into :py:meth:`Variable.expand_dims` and :py:meth:`Variable.broadcast_to` + (:pull:`8380`) By `Anderson Banihirwe `_. + Deprecations ~~~~~~~~~~~~ From ec1748938a2db7fb661301178975b30854f52c09 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 11:37:31 -0700 Subject: [PATCH 09/53] update tests --- xarray/tests/test_sparse.py | 2 +- xarray/tests/test_units.py | 2 +- xarray/tests/test_variable.py | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index 489836b70fd..a9f52c995a4 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -109,7 +109,7 @@ def test_variable_property(prop): (do("notnull"), True), (do("roll"), True), (do("round"), True), - (do("set_dims", dims=("x", "y", "z")), True), + (do("expand_dims", dims=("x", "y", "z")), True), (do("stack", dimensions={"flat": ("x", "y")}), True), (do("to_base_variable"), True), (do("transpose"), True), diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index a8a8f2993db..b0b66c57d15 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2147,7 +2147,7 @@ def test_concat(self, unit, error, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) - def test_set_dims(self, dtype): + def test_expand_dims(self, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index a8674afa623..bccd0b216e4 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1606,7 +1606,7 @@ def test_get_axis_num(self): with pytest.raises(ValueError, match=r"not found in array dim"): v.get_axis_num("foobar") - def test_set_dims(self): + def test_expand_dims(self): v = Variable(["x"], [0, 1]) actual = v.expand_dims(["x", "y"]) expected = Variable(["x", "y"], [[0], [1]]) @@ -1627,7 +1627,7 @@ def test_set_dims(self): with pytest.raises(ValueError, match=r"must be a superset"): v.expand_dims(["z"]) - def test_set_dims_object_dtype(self): + def test_expand_dims_object_dtype(self): v = Variable([], ("a", 1)) actual = v.expand_dims(("x",), (3,)) exp_values = np.empty((3,), dtype=object) From a245021c7a4b5c148ad10909636d83a65d878446 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 12:01:23 -0700 Subject: [PATCH 10/53] doc fixes --- doc/api-hidden.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index fcb911291b1..873653ea920 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -295,6 +295,7 @@ IndexVariable.cumprod IndexVariable.cumsum IndexVariable.equals + IndexVariable.expand_dims IndexVariable.fillna IndexVariable.get_axis_num IndexVariable.get_level_variable @@ -318,7 +319,6 @@ IndexVariable.rolling_window IndexVariable.round IndexVariable.searchsorted - IndexVariable.set_dims IndexVariable.shift IndexVariable.squeeze IndexVariable.stack From 447f226751f96ed4902fe2ac26694352590a3bb6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 14:39:55 -0700 Subject: [PATCH 11/53] update whats-new --- doc/whats-new.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 77e4d45caa2..7fb995a0ec2 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,7 +26,7 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ -- Split :py:meth:`Variable.set_dims` into :py:meth:`Variable.expand_dims` and :py:meth:`Variable.broadcast_to` +- Split ``Variable.set_dims`` into :py:meth:`Variable.expand_dims` and :py:meth:`Variable.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. @@ -3533,7 +3533,7 @@ Breaking changes - ``xarray.broadcast_array`` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) - ``Variable.expand_dims`` is removed (previously deprecated in favor of - :py:meth:`Variable.set_dims`) + ``Variable.set_dims``) New functions/methods ~~~~~~~~~~~~~~~~~~~~~ From 9ad56a96997f22c6b8c3593a50cbf5d19e16db36 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 15:11:10 -0700 Subject: [PATCH 12/53] keep `.set_dims()` on `Variable()` --- doc/api-hidden.rst | 4 ++-- doc/whats-new.rst | 17 +++++++++-------- xarray/core/alignment.py | 2 +- xarray/core/concat.py | 6 +++--- xarray/core/dataarray.py | 10 +++++----- xarray/core/dataset.py | 20 ++++++++++---------- xarray/core/groupby.py | 4 ++-- xarray/core/merge.py | 2 +- xarray/core/variable.py | 9 +++------ xarray/namedarray/core.py | 17 +++++++++-------- xarray/tests/test_groupby.py | 4 ++-- xarray/tests/test_units.py | 4 ++-- xarray/tests/test_variable.py | 14 +++++++------- 13 files changed, 56 insertions(+), 57 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index 873653ea920..b4aaf617d5e 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -221,7 +221,7 @@ Variable.cumprod Variable.cumsum Variable.equals - Variable.expand_dims + Variable.set_dims Variable.fillna Variable.get_axis_num Variable.identical @@ -295,7 +295,7 @@ IndexVariable.cumprod IndexVariable.cumsum IndexVariable.equals - IndexVariable.expand_dims + IndexVariable.set_dims IndexVariable.fillna IndexVariable.get_axis_num IndexVariable.get_level_variable diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7fb995a0ec2..09298fa26df 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -22,12 +22,13 @@ v2023.10.2 (unreleased) New Features ~~~~~~~~~~~~ +- Add :py:meth:`NamedArray.expand_dims` and :py:meth:`NamedArray.broadcast_to` + (:pull:`8380`) By `Anderson Banihirwe `_. + Breaking changes ~~~~~~~~~~~~~~~~ -- Split ``Variable.set_dims`` into :py:meth:`Variable.expand_dims` and :py:meth:`Variable.broadcast_to` - (:pull:`8380`) By `Anderson Banihirwe `_. Deprecations @@ -351,7 +352,7 @@ Documentation - Added page on the internal design of xarray objects. (:pull:`7991`) By `Tom Nicholas `_. - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, - :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`,:py:meth:`Dataset.drop_vars` + :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.set_dims`,:py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. - Add docstrings for the :py:class:`Index` base class and add some documentation on how to create custom, Xarray-compatible indexes (:pull:`6975`) @@ -396,7 +397,7 @@ Documentation ~~~~~~~~~~~~~ - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, - :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`,:py:meth:`Dataset.drop_vars` + :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.set_dims`,:py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. - Added page on wrapping chunked numpy-like arrays as alternatives to dask arrays. (:pull:`7951`) By `Tom Nicholas `_. @@ -1128,7 +1129,7 @@ Documentation By `Zach Moon `_. - Raise a more informative error when trying to open a non-existent zarr store. (:issue:`6484`, :pull:`7060`) By `Sam Levang `_. -- Added examples to docstrings for :py:meth:`DataArray.expand_dims`, :py:meth:`DataArray.drop_duplicates`, :py:meth:`DataArray.reset_coords`, :py:meth:`DataArray.equals`, :py:meth:`DataArray.identical`, :py:meth:`DataArray.broadcast_equals`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.dropna`, :py:meth:`DataArray.drop_isel`, :py:meth:`DataArray.drop_sel`, :py:meth:`DataArray.head`, :py:meth:`DataArray.tail`. (:issue:`5816`, :pull:`7088`) +- Added examples to docstrings for :py:meth:`DataArray.set_dims`, :py:meth:`DataArray.drop_duplicates`, :py:meth:`DataArray.reset_coords`, :py:meth:`DataArray.equals`, :py:meth:`DataArray.identical`, :py:meth:`DataArray.broadcast_equals`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.dropna`, :py:meth:`DataArray.drop_isel`, :py:meth:`DataArray.drop_sel`, :py:meth:`DataArray.head`, :py:meth:`DataArray.tail`. (:issue:`5816`, :pull:`7088`) By `Patrick Naylor `_. - Add missing docstrings to various array properties. (:pull:`7090`) By `Tom Nicholas `_. @@ -3533,7 +3534,7 @@ Breaking changes - ``xarray.broadcast_array`` is removed (previously deprecated in favor of :py:func:`~xarray.broadcast`) - ``Variable.expand_dims`` is removed (previously deprecated in favor of - ``Variable.set_dims``) + :py:meth:`Variable.set_dims`) New functions/methods ~~~~~~~~~~~~~~~~~~~~~ @@ -3640,7 +3641,7 @@ Bug fixes - Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert unicode indices to dtype=object (:issue:`3094`). By `Guido Imperiale `_. -- Improved error handling and documentation for `.expand_dims()` +- Improved error handling and documentation for `.set_dims()` read-only view. - Fix tests for big-endian systems (:issue:`3125`). By `Graham Inggs `_. @@ -5464,7 +5465,7 @@ Enhancements enable persisting data in distributed memory when using Dask (:issue:`1344`). By `Matthew Rocklin `_. -- New :py:meth:`~xarray.DataArray.expand_dims` method for ``DataArray`` and +- New :py:meth:`~xarray.DataArray.set_dims` method for ``DataArray`` and ``Dataset`` (:issue:`1326`). By `Keisuke Fujii `_. diff --git a/xarray/core/alignment.py b/xarray/core/alignment.py index 31daa6fbed8..732ec5d3ea6 100644 --- a/xarray/core/alignment.py +++ b/xarray/core/alignment.py @@ -1068,7 +1068,7 @@ def _set_dims(var): # ignore dim not in var.dims var_dims_map[dim] = var.shape[var.dims.index(dim)] - return var.expand_dims(var_dims_map) + return var.set_dims(var_dims_map) def _broadcast_array(array: T_DataArray) -> T_DataArray: data = _set_dims(array.variable) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index a7305776feb..cfb87254f48 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -504,7 +504,7 @@ def _dataset_concat( # case where concat dimension is a coordinate or data_var but not a dimension if (dim in coord_names or dim in data_names) and dim not in dim_names: - datasets = [ds.expand_dims(dim) for ds in datasets] + datasets = [ds.set_dims(dim) for ds in datasets] # determine which variables to concatenate concat_over, equals, concat_dim_lengths = _calc_concat_over( @@ -552,7 +552,7 @@ def ensure_common_dims(vars, concat_dim_lengths): for var, dim_len in zip(vars, concat_dim_lengths): if var.dims != common_dims: common_shape = tuple(dims_sizes.get(d, dim_len) for d in common_dims) - var = var.expand_dims(common_dims, common_shape) + var = var.set_dims(common_dims, common_shape) yield var # get the indexes to concatenate together, create a PandasIndex @@ -567,7 +567,7 @@ def get_indexes(name): elif name == dim: var = ds._variables[name] if not var.dims: - data = var.expand_dims(dim).values + data = var.set_dims(dim).values yield PandasIndex(data, dim, coord_dtype=var.dtype) # create concatenation index, needed for later reindexing diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index eada9ca290c..8c9a08f2e37 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2537,7 +2537,7 @@ def expand_dims( See Also -------- - Dataset.expand_dims + Dataset.set_dims Examples -------- @@ -2549,13 +2549,13 @@ def expand_dims( Add new dimension of length 2: - >>> da.expand_dims(dim={"y": 2}) + >>> da.set_dims(dim={"y": 2}) array([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) Dimensions without coordinates: y, x - >>> da.expand_dims(dim={"y": 2}, axis=1) + >>> da.set_dims(dim={"y": 2}, axis=1) array([[0, 0], [1, 1], @@ -2566,7 +2566,7 @@ def expand_dims( Add a new dimension with coordinates from array: - >>> da.expand_dims(dim={"y": np.arange(5)}, axis=0) + >>> da.set_dims(dim={"y": np.arange(5)}, axis=0) array([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], @@ -2587,7 +2587,7 @@ def expand_dims( dim = {dim: 1} dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") - ds = self._to_temp_dataset().expand_dims(dim, axis) + ds = self._to_temp_dataset().set_dims(dim, axis) return self._from_temp_dataset(ds) def set_index( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 0e79b924acd..3280308cd95 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4482,7 +4482,7 @@ def expand_dims( # Expand the dataset with a new dimension called "time" - >>> dataset.expand_dims(dim="time") + >>> dataset.set_dims(dim="time") Dimensions: (time: 1) Dimensions without coordinates: time @@ -4502,7 +4502,7 @@ def expand_dims( # Expand the dataset with a new dimension called "time" using axis argument - >>> dataset_1d.expand_dims(dim="time", axis=0) + >>> dataset_1d.set_dims(dim="time", axis=0) Dimensions: (time: 1, x: 3) Dimensions without coordinates: time, x @@ -4522,7 +4522,7 @@ def expand_dims( # Expand the dataset with a new dimension called "time" using axis argument - >>> dataset_2d.expand_dims(dim="time", axis=2) + >>> dataset_2d.set_dims(dim="time", axis=2) Dimensions: (y: 3, x: 4, time: 1) Dimensions without coordinates: y, x, time @@ -4531,7 +4531,7 @@ def expand_dims( See Also -------- - DataArray.expand_dims + DataArray.set_dims """ if dim is None: pass @@ -4614,12 +4614,12 @@ def expand_dims( all_dims = list(zip(v.dims, v.shape)) for d, c in zip_axis_dim: all_dims.insert(d, c) - variables[k] = v.expand_dims(dict(all_dims)) + variables[k] = v.set_dims(dict(all_dims)) else: if k not in variables: # If dims includes a label of a non-dimension coordinate, # it will be promoted to a 1D coordinate with a single value. - index, index_vars = create_default_index_implicit(v.expand_dims(k)) + index, index_vars = create_default_index_implicit(v.set_dims(k)) indexes[k] = index variables.update(index_vars) @@ -5142,7 +5142,7 @@ def _stack_once( add_dims = [d for d in dims if d not in var.dims] vdims = list(var.dims) + add_dims shape = [self.dims[d] for d in vdims] - exp_var = var.expand_dims(vdims, shape) + exp_var = var.set_dims(vdims, shape) stacked_var = exp_var.stack(**{new_dim: dims}) # type: ignore new_variables[name] = stacked_var stacked_var_names.append(name) @@ -5327,7 +5327,7 @@ def stack_dataarray(da): return ( da.assign_coords(**missing_stack_coords) - .expand_dims(missing_stack_dims) + .set_dims(missing_stack_dims) .stack({new_dim: (variable_dim,) + stacking_dims}) ) @@ -7090,7 +7090,7 @@ def to_pandas(self) -> pd.Series | pd.DataFrame: def _to_dataframe(self, ordered_dims: Mapping[Any, int]): columns = [k for k in self.variables if k not in self.dims] data = [ - self._variables[k].expand_dims(ordered_dims).values.reshape(-1) + self._variables[k].set_dims(ordered_dims).values.reshape(-1) for k in columns ] index = self.coords.to_index([*ordered_dims]) @@ -7336,7 +7336,7 @@ def to_dask_dataframe( var = var.chunk() # Broadcast then flatten the array: - var_new_dims = var.expand_dims(ordered_dims).chunk(ds_chunks) + var_new_dims = var.set_dims(ordered_dims).chunk(ds_chunks) dask_array = var_new_dims._data.reshape(-1) # type: ignore series = dd.from_dask_array(dask_array, columns=name, meta=df_meta) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index ded765780a3..af1d32fa6ac 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -872,7 +872,7 @@ def _binary_op(self, other, f, reflexive=False): for var in other.coords: if other[var].ndim == 0: other[var] = ( - other[var].drop_vars(var).expand_dims({name: other.sizes[name]}) + other[var].drop_vars(var).set_dims({name: other.sizes[name]}) ) # need to handle NaNs in group or elements that don't belong to any bins @@ -1045,7 +1045,7 @@ def _flox_reduce( # broadcast and restore non-numeric data variables (backcompat) for name, var in non_numeric.items(): if all(d not in var.dims for d in parsed_dim): - result[name] = var.variable.expand_dims( + result[name] = var.variable.set_dims( (grouper.name,) + var.dims, (result.sizes[grouper.name],) + var.shape, ) diff --git a/xarray/core/merge.py b/xarray/core/merge.py index a3c55e2600c..a8e54ad1231 100644 --- a/xarray/core/merge.py +++ b/xarray/core/merge.py @@ -120,7 +120,7 @@ def unique_variable( if compat == "broadcast_equals": dim_lengths = broadcast_dimension_size(variables) - out = out.expand_dims(dim_lengths) + out = out.set_dims(dim_lengths) if compat == "no_conflicts": combine_method = "fillna" diff --git a/xarray/core/variable.py b/xarray/core/variable.py index b3414069ec2..a675f3e2f0f 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -877,7 +877,7 @@ def __setitem__(self, key, value): else: value = Variable(dims[-value.ndim :], value) # broadcast to become assignable - value = value.expand_dims(dims).data + value = value.set_dims(dims).data if new_order: value = duck_array_ops.asarray(value) @@ -2783,9 +2783,7 @@ def _broadcast_compat_variables(*variables): dimensions of size 1 instead of the size of the broadcast dimension. """ dims = tuple(_unified_dims(variables)) - return tuple( - var.expand_dims(dims) if var.dims != dims else var for var in variables - ) + return tuple(var.set_dims(dims) if var.dims != dims else var for var in variables) def broadcast_variables(*variables: Variable) -> tuple[Variable, ...]: @@ -2801,8 +2799,7 @@ def broadcast_variables(*variables: Variable) -> tuple[Variable, ...]: dims_map = _unified_dims(variables) dims_tuple = tuple(dims_map) return tuple( - var.expand_dims(dims_map) if var.dims != dims_tuple else var - for var in variables + var.set_dims(dims_map) if var.dims != dims_tuple else var for var in variables ) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 3c49ee80e10..c2fe807c1d3 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -910,10 +910,11 @@ def transpose( def T(self) -> Self: return self.transpose() - def broadcast_to(self, shape: _ShapeLike) -> duckarray[Any, Any]: + def broadcast_to(self, shape: _ShapeLike) -> NamedArray[Any, _DType_co]: from xarray.core import duck_array_ops # TODO: remove this import - return duck_array_ops.broadcast_to(self.data, shape) # type: ignore + data = duck_array_ops.broadcast_to(self.data, shape) + return self._replace(data=data) def _create_expanded_obj( self, expanded_data: duckarray[Any, Any], expanded_dims: _DimsLike @@ -924,16 +925,16 @@ def expand_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: """ Expand the dimensions of the object. - This method adds new dimensions to the object and optionally broadcasts - the data to the new shape if provided. + This method adds new dimensions to the object and optionally broadcasts + the data to the new shape if provided. - Parameters - ---------- - dims : str or sequence of str or dict + Parameters + ---------- + dims : str or sequence of str or dict Dimensions to include on the new object (must be a superset of the existing dimensions). If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. - shape : sequence of int, optional + shape : sequence of int, optional Shape to broadcast the data to. Must be specified in the same order as `dims`. If not provided, new dimensions are inserted with length 1. """ diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index 275e4df3186..320ba999318 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -746,7 +746,7 @@ def test_groupby_dataset_reduce() -> None: ) expected = data.mean("y") - expected["yonly"] = expected["yonly"].variable.expand_dims({"x": 3}) + expected["yonly"] = expected["yonly"].variable.set_dims({"x": 3}) actual = data.groupby("x").mean(...) assert_allclose(expected, actual) @@ -757,7 +757,7 @@ def test_groupby_dataset_reduce() -> None: expected = Dataset( { "xy": data["xy"].groupby(letters).mean(...), - "xonly": (data["xonly"].mean().variable.expand_dims({"letters": 2})), + "xonly": (data["xonly"].mean().variable.set_dims({"letters": 2})), "yonly": data["yonly"].groupby(letters).mean(), } ) diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index b0b66c57d15..93fc1272d5b 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2153,9 +2153,9 @@ def test_expand_dims(self, dtype): dims = {"z": 6, "x": 3, "a": 1, "b": 4, "y": 10} expected = attach_units( - strip_units(variable).expand_dims(dims), extract_units(variable) + strip_units(variable).set_dims(dims), extract_units(variable) ) - actual = variable.expand_dims(dims) + actual = variable.set_dims(dims) assert_units_equal(expected, actual) assert_identical(expected, actual) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index bccd0b216e4..6e06c9e11d1 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -466,7 +466,7 @@ def test_encoding_preserved(self): expected[...], expected.squeeze(), expected.isel(x=slice(None)), - expected.expand_dims({"x": 3}), + expected.set_dims({"x": 3}), expected.copy(deep=True), expected.copy(deep=False), ]: @@ -1608,28 +1608,28 @@ def test_get_axis_num(self): def test_expand_dims(self): v = Variable(["x"], [0, 1]) - actual = v.expand_dims(["x", "y"]) + actual = v.set_dims(["x", "y"]) expected = Variable(["x", "y"], [[0], [1]]) assert_identical(actual, expected) - actual = v.expand_dims(["y", "x"]) + actual = v.set_dims(["y", "x"]) assert_identical(actual, expected.T) - actual = v.expand_dims({"x": 2, "y": 2}) + actual = v.set_dims({"x": 2, "y": 2}) expected = Variable(["x", "y"], [[0, 0], [1, 1]]) assert_identical(actual, expected) v = Variable(["foo"], [0, 1]) - actual = v.expand_dims("foo") + actual = v.set_dims("foo") expected = v assert_identical(actual, expected) with pytest.raises(ValueError, match=r"must be a superset"): - v.expand_dims(["z"]) + v.set_dims(["z"]) def test_expand_dims_object_dtype(self): v = Variable([], ("a", 1)) - actual = v.expand_dims(("x",), (3,)) + actual = v.set_dims(("x",), (3,)) exp_values = np.empty((3,), dtype=object) for i in range(3): exp_values[i] = ("a", 1) From a5918c3d47dad74589cd227c55423dcd0c6d1575 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 15:18:48 -0700 Subject: [PATCH 13/53] update docs --- doc/api-hidden.rst | 59 +++++++++++++++++++++++----------------------- doc/whats-new.rst | 12 +++++----- xarray/__init__.py | 2 ++ 3 files changed, 38 insertions(+), 35 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index b4aaf617d5e..a661dcdc41d 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -208,7 +208,6 @@ Variable.argsort Variable.astype Variable.broadcast_equals - Variable.broadcast_to Variable.chunk Variable.clip Variable.coarsen @@ -221,7 +220,6 @@ Variable.cumprod Variable.cumsum Variable.equals - Variable.set_dims Variable.fillna Variable.get_axis_num Variable.identical @@ -244,6 +242,7 @@ Variable.rolling_window Variable.round Variable.searchsorted + Variable.set_dims Variable.shift Variable.squeeze Variable.stack @@ -353,33 +352,35 @@ IndexVariable.values - namedarray.core.NamedArray.all - namedarray.core.NamedArray.any - namedarray.core.NamedArray.attrs - namedarray.core.NamedArray.chunks - namedarray.core.NamedArray.chunksizes - namedarray.core.NamedArray.copy - namedarray.core.NamedArray.count - namedarray.core.NamedArray.cumprod - namedarray.core.NamedArray.cumsum - namedarray.core.NamedArray.data - namedarray.core.NamedArray.dims - namedarray.core.NamedArray.dtype - namedarray.core.NamedArray.get_axis_num - namedarray.core.NamedArray.max - namedarray.core.NamedArray.mean - namedarray.core.NamedArray.median - namedarray.core.NamedArray.min - namedarray.core.NamedArray.nbytes - namedarray.core.NamedArray.ndim - namedarray.core.NamedArray.prod - namedarray.core.NamedArray.reduce - namedarray.core.NamedArray.shape - namedarray.core.NamedArray.size - namedarray.core.NamedArray.sizes - namedarray.core.NamedArray.std - namedarray.core.NamedArray.sum - namedarray.core.NamedArray.var + NamedArray.all + NamedArray.any + NamedArray.attrs + NamedArray.broadcast_to + NamedArray.chunks + NamedArray.chunksizes + NamedArray.copy + NamedArray.count + NamedArray.cumprod + NamedArray.cumsum + NamedArray.data + NamedArray.dims + NamedArray.dtype + NamedArray.expand_dims + NamedArray.get_axis_num + NamedArray.max + NamedArray.mean + NamedArray.median + NamedArray.min + NamedArray.nbytes + NamedArray.ndim + NamedArray.prod + NamedArray.reduce + NamedArray.shape + NamedArray.size + NamedArray.sizes + NamedArray.std + NamedArray.sum + NamedArray.var plot.plot diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 09298fa26df..7d2c554250e 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -26,11 +26,11 @@ New Features (:pull:`8380`) By `Anderson Banihirwe `_. + Breaking changes ~~~~~~~~~~~~~~~~ - Deprecations ~~~~~~~~~~~~ @@ -352,7 +352,7 @@ Documentation - Added page on the internal design of xarray objects. (:pull:`7991`) By `Tom Nicholas `_. - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, - :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.set_dims`,:py:meth:`Dataset.drop_vars` + :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`,:py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. - Add docstrings for the :py:class:`Index` base class and add some documentation on how to create custom, Xarray-compatible indexes (:pull:`6975`) @@ -397,7 +397,7 @@ Documentation ~~~~~~~~~~~~~ - Added examples to docstrings of :py:meth:`Dataset.assign_attrs`, :py:meth:`Dataset.broadcast_equals`, - :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.set_dims`,:py:meth:`Dataset.drop_vars` + :py:meth:`Dataset.equals`, :py:meth:`Dataset.identical`, :py:meth:`Dataset.expand_dims`,:py:meth:`Dataset.drop_vars` (:issue:`6793`, :pull:`7937`) By `Harshitha `_. - Added page on wrapping chunked numpy-like arrays as alternatives to dask arrays. (:pull:`7951`) By `Tom Nicholas `_. @@ -1129,7 +1129,7 @@ Documentation By `Zach Moon `_. - Raise a more informative error when trying to open a non-existent zarr store. (:issue:`6484`, :pull:`7060`) By `Sam Levang `_. -- Added examples to docstrings for :py:meth:`DataArray.set_dims`, :py:meth:`DataArray.drop_duplicates`, :py:meth:`DataArray.reset_coords`, :py:meth:`DataArray.equals`, :py:meth:`DataArray.identical`, :py:meth:`DataArray.broadcast_equals`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.dropna`, :py:meth:`DataArray.drop_isel`, :py:meth:`DataArray.drop_sel`, :py:meth:`DataArray.head`, :py:meth:`DataArray.tail`. (:issue:`5816`, :pull:`7088`) +- Added examples to docstrings for :py:meth:`DataArray.expand_dims`, :py:meth:`DataArray.drop_duplicates`, :py:meth:`DataArray.reset_coords`, :py:meth:`DataArray.equals`, :py:meth:`DataArray.identical`, :py:meth:`DataArray.broadcast_equals`, :py:meth:`DataArray.bfill`, :py:meth:`DataArray.ffill`, :py:meth:`DataArray.fillna`, :py:meth:`DataArray.dropna`, :py:meth:`DataArray.drop_isel`, :py:meth:`DataArray.drop_sel`, :py:meth:`DataArray.head`, :py:meth:`DataArray.tail`. (:issue:`5816`, :pull:`7088`) By `Patrick Naylor `_. - Add missing docstrings to various array properties. (:pull:`7090`) By `Tom Nicholas `_. @@ -3641,7 +3641,7 @@ Bug fixes - Fix regression introduced in v0.12.2 where ``copy(deep=True)`` would convert unicode indices to dtype=object (:issue:`3094`). By `Guido Imperiale `_. -- Improved error handling and documentation for `.set_dims()` +- Improved error handling and documentation for `.expand_dims()` read-only view. - Fix tests for big-endian systems (:issue:`3125`). By `Graham Inggs `_. @@ -5465,7 +5465,7 @@ Enhancements enable persisting data in distributed memory when using Dask (:issue:`1344`). By `Matthew Rocklin `_. -- New :py:meth:`~xarray.DataArray.set_dims` method for ``DataArray`` and +- New :py:meth:`~xarray.DataArray.expand_dims` method for ``DataArray`` and ``Dataset`` (:issue:`1326`). By `Keisuke Fujii `_. diff --git a/xarray/__init__.py b/xarray/__init__.py index 1fd3b0c4336..91613e8cbbc 100644 --- a/xarray/__init__.py +++ b/xarray/__init__.py @@ -41,6 +41,7 @@ from xarray.core.options import get_options, set_options from xarray.core.parallel import map_blocks from xarray.core.variable import IndexVariable, Variable, as_variable +from xarray.namedarray.core import NamedArray from xarray.util.print_versions import show_versions try: @@ -104,6 +105,7 @@ "IndexSelResult", "IndexVariable", "Variable", + "NamedArray", # Exceptions "MergeError", "SerializationWarning", From 80912656a289be14e0f15c852a7f5df529bafc47 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 15:28:54 -0700 Subject: [PATCH 14/53] revert to set_dims --- xarray/core/concat.py | 2 +- xarray/core/dataarray.py | 10 +++++----- xarray/core/dataset.py | 14 +++++++------- xarray/core/groupby.py | 2 +- xarray/tests/test_sparse.py | 2 +- xarray/tests/test_units.py | 2 +- xarray/tests/test_variable.py | 4 ++-- 7 files changed, 18 insertions(+), 18 deletions(-) diff --git a/xarray/core/concat.py b/xarray/core/concat.py index cfb87254f48..a136480b2fb 100644 --- a/xarray/core/concat.py +++ b/xarray/core/concat.py @@ -504,7 +504,7 @@ def _dataset_concat( # case where concat dimension is a coordinate or data_var but not a dimension if (dim in coord_names or dim in data_names) and dim not in dim_names: - datasets = [ds.set_dims(dim) for ds in datasets] + datasets = [ds.expand_dims(dim) for ds in datasets] # determine which variables to concatenate concat_over, equals, concat_dim_lengths = _calc_concat_over( diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 8c9a08f2e37..eada9ca290c 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -2537,7 +2537,7 @@ def expand_dims( See Also -------- - Dataset.set_dims + Dataset.expand_dims Examples -------- @@ -2549,13 +2549,13 @@ def expand_dims( Add new dimension of length 2: - >>> da.set_dims(dim={"y": 2}) + >>> da.expand_dims(dim={"y": 2}) array([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]) Dimensions without coordinates: y, x - >>> da.set_dims(dim={"y": 2}, axis=1) + >>> da.expand_dims(dim={"y": 2}, axis=1) array([[0, 0], [1, 1], @@ -2566,7 +2566,7 @@ def expand_dims( Add a new dimension with coordinates from array: - >>> da.set_dims(dim={"y": np.arange(5)}, axis=0) + >>> da.expand_dims(dim={"y": np.arange(5)}, axis=0) array([[0, 1, 2, 3, 4], [0, 1, 2, 3, 4], @@ -2587,7 +2587,7 @@ def expand_dims( dim = {dim: 1} dim = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") - ds = self._to_temp_dataset().set_dims(dim, axis) + ds = self._to_temp_dataset().expand_dims(dim, axis) return self._from_temp_dataset(ds) def set_index( diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 3280308cd95..661050b39c4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -4482,7 +4482,7 @@ def expand_dims( # Expand the dataset with a new dimension called "time" - >>> dataset.set_dims(dim="time") + >>> dataset.expand_dims(dim="time") Dimensions: (time: 1) Dimensions without coordinates: time @@ -4502,7 +4502,7 @@ def expand_dims( # Expand the dataset with a new dimension called "time" using axis argument - >>> dataset_1d.set_dims(dim="time", axis=0) + >>> dataset_1d.expand_dims(dim="time", axis=0) Dimensions: (time: 1, x: 3) Dimensions without coordinates: time, x @@ -4522,7 +4522,7 @@ def expand_dims( # Expand the dataset with a new dimension called "time" using axis argument - >>> dataset_2d.set_dims(dim="time", axis=2) + >>> dataset_2d.expand_dims(dim="time", axis=2) Dimensions: (y: 3, x: 4, time: 1) Dimensions without coordinates: y, x, time @@ -4531,7 +4531,7 @@ def expand_dims( See Also -------- - DataArray.set_dims + DataArray.expand_dims """ if dim is None: pass @@ -5143,7 +5143,7 @@ def _stack_once( vdims = list(var.dims) + add_dims shape = [self.dims[d] for d in vdims] exp_var = var.set_dims(vdims, shape) - stacked_var = exp_var.stack(**{new_dim: dims}) # type: ignore + stacked_var = exp_var.stack(**{new_dim: dims}) new_variables[name] = stacked_var stacked_var_names.append(name) else: @@ -5327,7 +5327,7 @@ def stack_dataarray(da): return ( da.assign_coords(**missing_stack_coords) - .set_dims(missing_stack_dims) + .expand_dims(missing_stack_dims) .stack({new_dim: (variable_dim,) + stacking_dims}) ) @@ -7337,7 +7337,7 @@ def to_dask_dataframe( # Broadcast then flatten the array: var_new_dims = var.set_dims(ordered_dims).chunk(ds_chunks) - dask_array = var_new_dims._data.reshape(-1) # type: ignore + dask_array = var_new_dims._data.reshape(-1) series = dd.from_dask_array(dask_array, columns=name, meta=df_meta) series_list.append(series) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index af1d32fa6ac..090cb4729bd 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -872,7 +872,7 @@ def _binary_op(self, other, f, reflexive=False): for var in other.coords: if other[var].ndim == 0: other[var] = ( - other[var].drop_vars(var).set_dims({name: other.sizes[name]}) + other[var].drop_vars(var).expand_dims({name: other.sizes[name]}) ) # need to handle NaNs in group or elements that don't belong to any bins diff --git a/xarray/tests/test_sparse.py b/xarray/tests/test_sparse.py index a9f52c995a4..489836b70fd 100644 --- a/xarray/tests/test_sparse.py +++ b/xarray/tests/test_sparse.py @@ -109,7 +109,7 @@ def test_variable_property(prop): (do("notnull"), True), (do("roll"), True), (do("round"), True), - (do("expand_dims", dims=("x", "y", "z")), True), + (do("set_dims", dims=("x", "y", "z")), True), (do("stack", dimensions={"flat": ("x", "y")}), True), (do("to_base_variable"), True), (do("transpose"), True), diff --git a/xarray/tests/test_units.py b/xarray/tests/test_units.py index 93fc1272d5b..7e1105e2e5d 100644 --- a/xarray/tests/test_units.py +++ b/xarray/tests/test_units.py @@ -2147,7 +2147,7 @@ def test_concat(self, unit, error, dtype): assert_units_equal(expected, actual) assert_identical(expected, actual) - def test_expand_dims(self, dtype): + def test_set_dims(self, dtype): array = np.linspace(0, 5, 3 * 10).reshape(3, 10).astype(dtype) * unit_registry.m variable = xr.Variable(("x", "y"), array) diff --git a/xarray/tests/test_variable.py b/xarray/tests/test_variable.py index 6e06c9e11d1..8a73e435977 100644 --- a/xarray/tests/test_variable.py +++ b/xarray/tests/test_variable.py @@ -1606,7 +1606,7 @@ def test_get_axis_num(self): with pytest.raises(ValueError, match=r"not found in array dim"): v.get_axis_num("foobar") - def test_expand_dims(self): + def test_set_dims(self): v = Variable(["x"], [0, 1]) actual = v.set_dims(["x", "y"]) expected = Variable(["x", "y"], [[0], [1]]) @@ -1627,7 +1627,7 @@ def test_expand_dims(self): with pytest.raises(ValueError, match=r"must be a superset"): v.set_dims(["z"]) - def test_expand_dims_object_dtype(self): + def test_set_dims_object_dtype(self): v = Variable([], ("a", 1)) actual = v.set_dims(("x",), (3,)) exp_values = np.empty((3,), dtype=object) From 2b932737b76c06232014138e6412aaf7b02ffe3a Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 16:01:10 -0700 Subject: [PATCH 15/53] revert to .set_dims on Variable --- xarray/core/variable.py | 20 ++++++++++++++++++++ xarray/namedarray/core.py | 3 +-- xarray/namedarray/utils.py | 22 ++++++++++------------ 3 files changed, 31 insertions(+), 14 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index a675f3e2f0f..897e3f2dcf1 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,6 +45,7 @@ is_duck_array, maybe_coerce_to_str, ) +from xarray.namedarray._typing import _DimsLike, _ShapeLike from xarray.namedarray.core import NamedArray NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -1381,6 +1382,25 @@ def roll(self, shifts=None, **shifts_kwargs): result = result._roll_one_dim(dim, count) return result + def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: + """ + Return a new variable with given set of dimensions. + This method might be used to attach new dimension(s) to variable. + When possible, this operation does not copy this variable's data. + + Parameters + ---------- + dims : str or sequence of str or dict + Dimensions to include on the new object (must be a superset of the existing dimensions). + If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + + shape : sequence of int, optional + Shape to broadcast the data to. Must be specified in the same order as `dims`. + If not provided, new dimensions are inserted with length 1. + """ + + return self.expand_dims(dims, shape) + def _stack_once(self, dims: list[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): raise ValueError(f"invalid existing dimensions: {dims}") diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index c2fe807c1d3..e90858e4a9c 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -913,8 +913,7 @@ def T(self) -> Self: def broadcast_to(self, shape: _ShapeLike) -> NamedArray[Any, _DType_co]: from xarray.core import duck_array_ops # TODO: remove this import - data = duck_array_ops.broadcast_to(self.data, shape) - return self._replace(data=data) + return duck_array_ops.broadcast_to(self.data, shape) def _create_expanded_obj( self, expanded_data: duckarray[Any, Any], expanded_dims: _DimsLike diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index ca8d7f62176..b5119374fa0 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -2,13 +2,13 @@ import sys import warnings -from collections.abc import Collection, Hashable, Iterable, Iterator, Mapping +from collections.abc import Hashable, Iterator, Mapping from enum import Enum from typing import TYPE_CHECKING, Any, Final import numpy as np -from xarray.namedarray._typing import ErrorOptionsWithWarn +from xarray.namedarray._typing import ErrorOptionsWithWarn, _DimsLike if TYPE_CHECKING: if sys.version_info >= (3, 10): @@ -82,10 +82,10 @@ def is_dict_like(value: Any) -> TypeGuard[Mapping[Any, Any]]: def drop_missing_dims( - supplied_dims: Iterable[Hashable], - dims: Iterable[Hashable], + supplied_dims: _DimsLike, + dims: _DimsLike, missing_dims: ErrorOptionsWithWarn, -) -> Iterable[Hashable]: +) -> _DimsLike: """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that are not present in dims. @@ -98,8 +98,7 @@ def drop_missing_dims( if missing_dims == "raise": supplied_dims_set = {val for val in supplied_dims if val is not ...} - invalid = supplied_dims_set - set(dims) - if invalid: + if invalid := supplied_dims_set - set(dims): raise ValueError( f"Dimensions {invalid} do not exist. Expected one or more of {dims}" ) @@ -107,8 +106,7 @@ def drop_missing_dims( return supplied_dims elif missing_dims == "warn": - invalid = set(supplied_dims) - set(dims) - if invalid: + if invalid := set(supplied_dims) - set(dims): warnings.warn( f"Dimensions {invalid} do not exist. Expected one or more of {dims}" ) @@ -125,10 +123,10 @@ def drop_missing_dims( def infix_dims( - dims_supplied: Collection[Any], - dims_all: Collection[Any], + dims_supplied: _DimsLike, + dims_all: _DimsLike, missing_dims: ErrorOptionsWithWarn = "raise", -) -> Iterator[Any]: +) -> Iterator[_DimsLike]: """ Resolves a supplied list containing an ellipsis representing other items, to a generator with the 'realized' list of all items From 7ea1fb359eaa60f01bdf170f88b4349de78111b4 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Fri, 27 Oct 2023 16:02:21 -0700 Subject: [PATCH 16/53] Update xarray/namedarray/core.py Co-authored-by: Illviljan <14371165+Illviljan@users.noreply.github.com> --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index e90858e4a9c..75d888be966 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -860,7 +860,7 @@ def transpose( self, *dims: Hashable | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", - ) -> Self: + ) -> NamedArray[Any, _DType_co]: """Return a new object with transposed dimensions. Parameters From 456d57c4966d973e9890a454011f6910bcd97347 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 16:52:14 -0700 Subject: [PATCH 17/53] restore .transpose on variable --- xarray/core/variable.py | 39 ++++++++++++++++++++++++++++++++++++++- xarray/namedarray/core.py | 8 +++++--- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 897e3f2dcf1..1b51152f8fa 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1382,7 +1382,43 @@ def roll(self, shifts=None, **shifts_kwargs): result = result._roll_one_dim(dim, count) return result - def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: + def transpose( + self, + *dims: Hashable | ellipsis, + missing_dims: ErrorOptionsWithWarn = "raise", + ) -> Variable: + """Return a new object with transposed dimensions. + + Parameters + ---------- + *dims : Hashable, optional + By default, reverse the dimensions. Otherwise, reorder the + dimensions to this order. + missing_dims : {"raise", "warn", "ignore"}, default: "raise" + What to do if dimensions that should be selected from are not present in the + NamedArray: + - "raise": raise an exception + - "warn": raise a warning, and ignore the missing dimensions + - "ignore": ignore the missing dimensions + + Returns + ------- + transposed : NamedArray + The returned object has transposed data and dimensions with the + same attributes as the original. + + Notes + ----- + This operation returns a view of this variable's data. It is + lazy for dask-backed Variables but not for numpy-backed Variables. + + See Also + -------- + numpy.transpose + """ + return self.permute_dims(*dims, missing_dims=missing_dims) + + def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Variable: """ Return a new variable with given set of dimensions. This method might be used to attach new dimension(s) to variable. @@ -2770,6 +2806,7 @@ def _inplace_binary_op(self, other, f): ) def _create_expanded_obj(self, expanded_data, expanded_dims) -> Variable: # type: ignore + # override NamedArray's version to use Variable constructor instead of cls return Variable( expanded_dims, expanded_data, self._attrs, self._encoding, fastpath=True ) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 75d888be966..eac0f9d8f8b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -856,7 +856,7 @@ def _to_dense(self) -> Self: else: raise TypeError("self.data is not a sparse array") - def transpose( + def permute_dims( self, *dims: Hashable | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", @@ -908,7 +908,7 @@ def transpose( @property def T(self) -> Self: - return self.transpose() + return self.permute_dims() def broadcast_to(self, shape: _ShapeLike) -> NamedArray[Any, _DType_co]: from xarray.core import duck_array_ops # TODO: remove this import @@ -920,7 +920,9 @@ def _create_expanded_obj( ) -> Self: return self._replace(dims=expanded_dims, data=expanded_data) - def expand_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Self: + def expand_dims( + self, dims: _DimsLike, shape: _ShapeLike | None = None + ) -> NamedArray[Any, _DType_co]: """ Expand the dimensions of the object. From 3f458c840853b590a984405a07be087e36ddbac0 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 22:34:58 -0700 Subject: [PATCH 18/53] revert to set_dims in Variable --- xarray/core/variable.py | 55 ++++++++++++++++++++++++---------- xarray/namedarray/core.py | 63 ++++++++++++++++++++++++++------------- 2 files changed, 81 insertions(+), 37 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 1b51152f8fa..03af59e40fd 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,7 +45,6 @@ is_duck_array, maybe_coerce_to_str, ) -from xarray.namedarray._typing import _DimsLike, _ShapeLike from xarray.namedarray.core import NamedArray NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( @@ -1418,24 +1417,54 @@ def transpose( """ return self.permute_dims(*dims, missing_dims=missing_dims) - def set_dims(self, dims: _DimsLike, shape: _ShapeLike | None = None) -> Variable: - """ - Return a new variable with given set of dimensions. + def set_dims(self, dims, shape=None): + """Return a new variable with given set of dimensions. This method might be used to attach new dimension(s) to variable. + When possible, this operation does not copy this variable's data. Parameters ---------- dims : str or sequence of str or dict - Dimensions to include on the new object (must be a superset of the existing dimensions). - If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + Dimensions to include on the new variable. If a dict, values are + used to provide the sizes of new dimensions; otherwise, new + dimensions are inserted with length 1. - shape : sequence of int, optional - Shape to broadcast the data to. Must be specified in the same order as `dims`. - If not provided, new dimensions are inserted with length 1. + Returns + ------- + Variable """ + if isinstance(dims, str): + dims = [dims] + + if shape is None and utils.is_dict_like(dims): + shape = dims.values() + + missing_dims = set(self.dims) - set(dims) + if missing_dims: + raise ValueError( + f"new dimensions {dims!r} must be a superset of " + f"existing dimensions {self.dims!r}" + ) - return self.expand_dims(dims, shape) + self_dims = set(self.dims) + expanded_dims = tuple(d for d in dims if d not in self_dims) + self.dims + + if self.dims == expanded_dims: + # don't use broadcast_to unless necessary so the result remains + # writeable if possible + expanded_data = self.data + elif shape is not None: + dims_map = dict(zip(dims, shape)) + tmp_shape = tuple(dims_map[d] for d in expanded_dims) + expanded_data = duck_array_ops.broadcast_to(self.data, tmp_shape) + else: + expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] + + expanded_var = Variable( + expanded_dims, expanded_data, self._attrs, self._encoding, fastpath=True + ) + return expanded_var.transpose(*dims) def _stack_once(self, dims: list[Hashable], new_dim: Hashable): if not set(dims) <= set(self.dims): @@ -2805,12 +2834,6 @@ def _inplace_binary_op(self, other, f): "Values of an IndexVariable are immutable and can not be modified inplace" ) - def _create_expanded_obj(self, expanded_data, expanded_dims) -> Variable: # type: ignore - # override NamedArray's version to use Variable constructor instead of cls - return Variable( - expanded_dims, expanded_data, self._attrs, self._encoding, fastpath=True - ) - def _unified_dims(variables): # validate dimensions diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index eac0f9d8f8b..00c90038f48 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -907,18 +907,45 @@ def permute_dims( return self._replace(dims=dims, data=data) @property - def T(self) -> Self: + def T(self) -> NamedArray[Any, _DType_co]: return self.permute_dims() - def broadcast_to(self, shape: _ShapeLike) -> NamedArray[Any, _DType_co]: - from xarray.core import duck_array_ops # TODO: remove this import + def _check_dims(self, dims: Mapping[Any, _Dim] | _Dim): + if isinstance(dims, dict): + dims_keys = dims + else: + dims_keys = dims if isinstance(dims, str) else list(dims) + if missing_dims := set(self.dims) - set(dims_keys): + raise ValueError( + f"new dimensions {dims!r} must be a superset of " + f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" + ) + + def _get_expanded_dims(self, dims: Mapping[Any, _Dim] | _Dim) -> _DimsLike: + self_dims = set(self.dims) + return tuple(dim for dim in dims if dim not in self_dims) + self.dims - return duck_array_ops.broadcast_to(self.data, shape) + def broadcast_to(self, dims: Mapping[Any, _Dim]) -> NamedArray[Any, _DType_co]: + """ + Broadcast namedarray to a new shape. - def _create_expanded_obj( - self, expanded_data: duckarray[Any, Any], expanded_dims: _DimsLike - ) -> Self: - return self._replace(dims=expanded_dims, data=expanded_data) + Parameters + ---------- + dims : dict + Dimensions to broadcast the array to. Keys are dimension names and values are the new sizes. + """ + + from xarray.core import duck_array_ops # TODO: Remove this import in the future + + self._check_dims(dims) + expanded_dims = self._get_expanded_dims(dims) + shape = list(dims.values()) + + dims_map = dict(zip(dims, cast(Iterable[SupportsIndex], shape))) + temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) + + data = duck_array_ops.broadcast_to(self.data, temporary_shape) # type: ignore + return self._new(data=data, dims=expanded_dims) def expand_dims( self, dims: _DimsLike, shape: _ShapeLike | None = None @@ -943,33 +970,27 @@ def expand_dims( if isinstance(dims, str): dims = [dims] + self._check_dims(dims) + if shape is None and is_dict_like(dims): shape = list(dims.values()) - if missing_dims := set(self.dims) - set(dims): - raise ValueError( - f"new dimensions {dims!r} must be a superset of " - f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" - ) - - self_dims = set(self.dims) - expanded_dims = tuple(dim for dim in dims if dim not in self_dims) + self.dims + expanded_dims = self._get_expanded_dims(dims) if self.dims == expanded_dims: # don't use broadcast_to unless necessary so the result remains # writeable if possible expanded_data = self.data + expanded_obj = self._replace(dims=expanded_dims, data=expanded_data) elif shape is not None: dims_map = dict(zip(dims, cast(Iterable[SupportsIndex], shape))) - temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) - expanded_data = self.broadcast_to(temporary_shape) - + expanded_obj = self.broadcast_to(dims_map) else: expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] # type: ignore + expanded_obj = self._replace(dims=expanded_dims, data=expanded_data) - expanded_obj = self._create_expanded_obj(expanded_data, expanded_dims) - return expanded_obj.transpose(*dims) + return expanded_obj.permute_dims(*dims) _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] From aae28613a2c7bad49c967c39fc6af9bfc4586df4 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 28 Oct 2023 05:36:26 +0000 Subject: [PATCH 19/53] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- doc/whats-new.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 24dfa2e297d..d4d6a0327ad 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,7 +24,7 @@ New Features - Use `opt_einsum `_ for :py:func:`xarray.dot` by default if installed. By `Deepak Cherian `_. (:issue:`7764`, :pull:`8373`). - + - Add :py:meth:`NamedArray.expand_dims` and :py:meth:`NamedArray.broadcast_to` (:pull:`8380`) By `Anderson Banihirwe `_. From d6240de251082bcd8e90ee930b46f6ff409eff89 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 22:38:35 -0700 Subject: [PATCH 20/53] fix docstring --- xarray/core/variable.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 03af59e40fd..919ac1f77e2 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -1385,8 +1385,8 @@ def transpose( self, *dims: Hashable | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", - ) -> Variable: - """Return a new object with transposed dimensions. + ) -> Self: + """Return a new Variable with transposed dimensions. Parameters ---------- @@ -1395,14 +1395,14 @@ def transpose( dimensions to this order. missing_dims : {"raise", "warn", "ignore"}, default: "raise" What to do if dimensions that should be selected from are not present in the - NamedArray: + Variable: - "raise": raise an exception - "warn": raise a warning, and ignore the missing dimensions - "ignore": ignore the missing dimensions Returns ------- - transposed : NamedArray + transposed : Variable The returned object has transposed data and dimensions with the same attributes as the original. From 13643452823c4da0884e11ed409dc3bd759f8a17 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 23:09:58 -0700 Subject: [PATCH 21/53] update test_namedarray --- xarray/tests/test_namedarray.py | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 6e39a3aa94f..17992751153 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -19,9 +19,11 @@ from xarray.namedarray._typing import ( _AttrsLike, + _Dim, _DimsLike, _DType, _Shape, + _ShapeLike, duckarray, ) from xarray.namedarray.utils import Default @@ -61,6 +63,14 @@ def random_inputs() -> np.ndarray[Any, np.dtype[np.float32]]: return np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) +@pytest.fixture +def data() -> NamedArray[Any, np.dtype[np.float32]]: + dtype_float = np.dtype(np.float32) + narr_float: NamedArray[Any, np.dtype[np.float32]] + narr_float = NamedArray(("x",), np.array([1.5, 3.2], dtype=dtype_float)) + return narr_float + + def test_namedarray_init() -> None: dtype = np.dtype(np.int8) expected = np.array([1, 2], dtype=dtype) @@ -421,3 +431,63 @@ def _new( var_float2: Variable[Any, np.dtype[np.float32]] var_float2 = var_float._replace(("x",), np_val2) assert var_float2.dtype == dtype_float + + +@pytest.mark.parametrize( + "new_dims, new_shape", + [ + (["x", "y"], (2, 1)), # basic case, expanding along existing dimensions + (["x", "y", "z"], (2, 1, 1)), # adding a new dimension + (["z", "x", "y"], (1, 2, 1)), # adding a new dimension with different order + (["x"], (2,)), # reducing dimensions + ({"x": 2, "y": 1}, (2, 1)), # using dict for dims + ( + {"x": 2, "y": 1, "z": 1}, + (2, 1, 1), + ), # using dict for dims, adding new dimension + ], +) +def test_expand_dims( + data: NamedArray[Any, np.dtype[np.float32]], + new_dims: _DimsLike, + new_shape: _ShapeLike, +) -> None: + actual = data.expand_dims(new_dims) + # Ensure the expected dims match, especially when new dimensions are added + expected_dims = ( + tuple(new_dims) + if isinstance(new_dims, (list, tuple)) + else tuple(new_dims.keys()) + ) + expected = NamedArray(expected_dims, data._data.reshape(*new_shape)) + assert np.array_equal(actual.data, expected.data) + assert actual.dims == expected.dims + + +def test_expand_dims_object_dtype() -> None: + data: NamedArray[Any, np.dtype[object]] + x = np.empty([], dtype=object) + x[()] = ("a", 1) + data = NamedArray([], x) + actual = data.expand_dims(("x",), (3,)) + exp_values = np.empty((3,), dtype=object) + for i in range(3): + exp_values[i] = ("a", 1) + assert np.array_equal(actual.data, exp_values) + + +@pytest.mark.parametrize( + "dims", + [ + {"x": 2, "y": 1}, # basic case, broadcasting along existing dimensions + {"x": 2, "y": 3}, # increasing size of existing dimension + {"x": 2, "y": 1, "z": 1}, # adding a new dimension + {"z": 1, "x": 2, "y": 1}, # adding a new dimension with different order + ], +) +def test_broadcast_to( + data: NamedArray[Any, np.dtype[np.float32]], + dims: Mapping[Any, _Dim], +) -> None: + actual = data.broadcast_to(dims) + assert actual.sizes == dims From b793f744a29ce969604db7897a0db662af1760f2 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 27 Oct 2023 23:41:33 -0700 Subject: [PATCH 22/53] update tests --- doc/api-hidden.rst | 2 +- xarray/namedarray/core.py | 8 +++--- xarray/tests/test_namedarray.py | 43 +++++++++++++++++++++++++++++++++ 3 files changed, 48 insertions(+), 5 deletions(-) diff --git a/doc/api-hidden.rst b/doc/api-hidden.rst index a661dcdc41d..c711eb452b5 100644 --- a/doc/api-hidden.rst +++ b/doc/api-hidden.rst @@ -294,7 +294,6 @@ IndexVariable.cumprod IndexVariable.cumsum IndexVariable.equals - IndexVariable.set_dims IndexVariable.fillna IndexVariable.get_axis_num IndexVariable.get_level_variable @@ -318,6 +317,7 @@ IndexVariable.rolling_window IndexVariable.round IndexVariable.searchsorted + IndexVariable.set_dims IndexVariable.shift IndexVariable.squeeze IndexVariable.stack diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 00c90038f48..ed2d47543a4 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -959,12 +959,12 @@ def expand_dims( Parameters ---------- dims : str or sequence of str or dict - Dimensions to include on the new object (must be a superset of the existing dimensions). - If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + Dimensions to include on the new object (must be a superset of the existing dimensions). + If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. shape : sequence of int, optional - Shape to broadcast the data to. Must be specified in the same order as `dims`. - If not provided, new dimensions are inserted with length 1. + Shape to broadcast the data to. Must be specified in the same order as `dims`. + If not provided, new dimensions are inserted with length 1. """ if isinstance(dims, str): diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 17992751153..692607cc841 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -22,6 +22,7 @@ _Dim, _DimsLike, _DType, + _IntOrUnknown, _Shape, _ShapeLike, duckarray, @@ -71,6 +72,16 @@ def data() -> NamedArray[Any, np.dtype[np.float32]]: return narr_float +@pytest.fixture +def random_data() -> NamedArray[Any, np.dtype[np.float32]]: + dtype_float = np.dtype(np.float32) + narr_float: NamedArray[Any, np.dtype[np.float32]] + narr_float = NamedArray( + ("x", "y", "z"), np.arange(60).reshape(3, 4, 5).astype(dtype_float) + ) + return narr_float + + def test_namedarray_init() -> None: dtype = np.dtype(np.int8) expected = np.array([1, 2], dtype=dtype) @@ -491,3 +502,35 @@ def test_broadcast_to( ) -> None: actual = data.broadcast_to(dims) assert actual.sizes == dims + + +@pytest.mark.parametrize( + "dims, expected_sizes", + [ + # Basic case: reversing the dimensions + ((), {"z": 5, "y": 4, "x": 3}), + (["y", "x", "z"], {"y": 4, "x": 3, "z": 5}), + (["y", "x", ...], {"y": 4, "x": 3, "z": 5}), + ], +) +def test_permute_dims( + random_data: NamedArray[Any, np.dtype[np.float32]], + dims: _DimsLike, + expected_sizes: dict[_Dim, _IntOrUnknown], +) -> None: + actual = random_data.permute_dims(*dims) + assert actual.sizes == expected_sizes + + +@pytest.mark.parametrize( + "dims", + [ + (["y", "x"]), + ], +) +def test_permute_dims_errors( + random_data: NamedArray[Any, np.dtype[np.float32]], + dims: _DimsLike, +) -> None: + with pytest.raises(ValueError): + random_data.permute_dims(*dims) From cc67b63c992a5009da9d7f1e89dcbf184abcf4a6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 9 Nov 2023 19:21:40 +0000 Subject: [PATCH 23/53] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/_typing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 7b7af3bdb8f..d4c68e130c9 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -271,4 +271,3 @@ def todense(self) -> np.ndarray[Any, _DType_co]: ErrorOptions = Literal["raise", "ignore"] ErrorOptionsWithWarn = Literal["raise", "warn", "ignore"] - From ab6262be0c6acb9bb43ef31dfae556608a78d38d Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Mon, 27 Nov 2023 21:57:45 -0800 Subject: [PATCH 24/53] Apply suggestions from code review Co-authored-by: Deepak Cherian --- xarray/namedarray/core.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 2fd22227930..c93b25bd489 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -863,7 +863,7 @@ def permute_dims( Parameters ---------- *dims : Hashable, optional - By default, reverse the dimensions. Otherwise, reorder the + By default, reverse the order of the dimensions. Otherwise, reorder the dimensions to this order. missing_dims : {"raise", "warn", "ignore"}, default: "raise" What to do if dimensions that should be selected from are not present in the @@ -874,14 +874,10 @@ def permute_dims( Returns ------- - transposed : NamedArray - The returned object has transposed data and dimensions with the + NamedArray + The returned NamedArray has permuted dimensions and data with the same attributes as the original. - Notes - ----- - This operation returns a view of this variable's data. It is - lazy for dask-backed Variables but not for numpy-backed Variables. See Also -------- From 069c3534dda87e161cb7d2b5bcd1f1fe882a057f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 28 Nov 2023 14:49:19 -0800 Subject: [PATCH 25/53] fix formatting issue --- xarray/tests/test_namedarray.py | 168 -------------------------------- 1 file changed, 168 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index d70798b8886..fcdf063d106 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -21,12 +21,9 @@ from xarray.namedarray._typing import ( _AttrsLike, - _Dim, _DimsLike, _DType, - _IntOrUnknown, _Shape, - _ShapeLike, duckarray, ) from xarray.namedarray.utils import Default @@ -61,69 +58,6 @@ def __array_namespace__(self) -> ModuleType: return np -@pytest.fixture -def random_inputs() -> np.ndarray[Any, np.dtype[np.float32]]: - return np.arange(3 * 4 * 5, dtype=np.float32).reshape((3, 4, 5)) - - -@pytest.fixture -def data() -> NamedArray[Any, np.dtype[np.float32]]: - dtype_float = np.dtype(np.float32) - narr_float: NamedArray[Any, np.dtype[np.float32]] - narr_float = NamedArray(("x",), np.array([1.5, 3.2], dtype=dtype_float)) - return narr_float - - -@pytest.fixture -def random_data() -> NamedArray[Any, np.dtype[np.float32]]: - dtype_float = np.dtype(np.float32) - narr_float: NamedArray[Any, np.dtype[np.float32]] - narr_float = NamedArray( - ("x", "y", "z"), np.arange(60).reshape(3, 4, 5).astype(dtype_float) - ) - return narr_float - - -def test_namedarray_init() -> None: - dtype = np.dtype(np.int8) - expected = np.array([1, 2], dtype=dtype) - actual: NamedArray[Any, np.dtype[np.int8]] - actual = NamedArray(("x",), expected) - assert np.array_equal(np.asarray(actual.data), expected) - - with pytest.raises(AttributeError): - expected2 = [1, 2] - actual2: NamedArray[Any, Any] - actual2 = NamedArray(("x",), expected2) # type: ignore[arg-type] - assert np.array_equal(np.asarray(actual2.data), expected2) - - -@pytest.mark.parametrize( - "dims, data, expected, raise_error", - [ - (("x",), [1, 2, 3], np.array([1, 2, 3]), False), - ((1,), np.array([4, 5, 6]), np.array([4, 5, 6]), False), - ((), 2, np.array(2), False), - # Fail: - (("x",), NamedArray("time", np.array([1, 2, 3])), np.array([1, 2, 3]), True), - ], -) -def test_from_array( - dims: _DimsLike, - data: ArrayLike, - expected: np.ndarray[Any, Any], - raise_error: bool, -) -> None: - actual: NamedArray[Any, Any] - if raise_error: - with pytest.raises(TypeError, match="already a Named array"): - actual = from_array(dims, data) - - # Named arrays are not allowed: - from_array(actual) # type: ignore[call-overload] - else: - actual = from_array(dims, data) - class NamedArraySubclassobjects: @pytest.fixture def target(self, data: np.ndarray[Any, Any]) -> Any: @@ -386,108 +320,6 @@ def test_duck_array_typevar( if isinstance(b, _arrayfunction_or_api): return b else: - cls_ = cast("type[Variable[Any, _DType]]", type(self)) - return cls_(dims_, data, attrs_) - - var_float: Variable[Any, np.dtype[np.float32]] - var_float = Variable(("x",), np_val) - assert var_float.dtype == dtype_float - - var_float2: Variable[Any, np.dtype[np.float32]] - var_float2 = var_float._replace(("x",), np_val2) - assert var_float2.dtype == dtype_float - - -@pytest.mark.parametrize( - "new_dims, new_shape", - [ - (["x", "y"], (2, 1)), # basic case, expanding along existing dimensions - (["x", "y", "z"], (2, 1, 1)), # adding a new dimension - (["z", "x", "y"], (1, 2, 1)), # adding a new dimension with different order - (["x"], (2,)), # reducing dimensions - ({"x": 2, "y": 1}, (2, 1)), # using dict for dims - ( - {"x": 2, "y": 1, "z": 1}, - (2, 1, 1), - ), # using dict for dims, adding new dimension - ], -) -def test_expand_dims( - data: NamedArray[Any, np.dtype[np.float32]], - new_dims: _DimsLike, - new_shape: _ShapeLike, -) -> None: - actual = data.expand_dims(new_dims) - # Ensure the expected dims match, especially when new dimensions are added - expected_dims = ( - tuple(new_dims) - if isinstance(new_dims, (list, tuple)) - else tuple(new_dims.keys()) - ) - expected = NamedArray(expected_dims, data._data.reshape(*new_shape)) - assert np.array_equal(actual.data, expected.data) - assert actual.dims == expected.dims - - -def test_expand_dims_object_dtype() -> None: - data: NamedArray[Any, np.dtype[object]] - x = np.empty([], dtype=object) - x[()] = ("a", 1) - data = NamedArray([], x) - actual = data.expand_dims(("x",), (3,)) - exp_values = np.empty((3,), dtype=object) - for i in range(3): - exp_values[i] = ("a", 1) - assert np.array_equal(actual.data, exp_values) - - -@pytest.mark.parametrize( - "dims", - [ - {"x": 2, "y": 1}, # basic case, broadcasting along existing dimensions - {"x": 2, "y": 3}, # increasing size of existing dimension - {"x": 2, "y": 1, "z": 1}, # adding a new dimension - {"z": 1, "x": 2, "y": 1}, # adding a new dimension with different order - ], -) -def test_broadcast_to( - data: NamedArray[Any, np.dtype[np.float32]], - dims: Mapping[Any, _Dim], -) -> None: - actual = data.broadcast_to(dims) - assert actual.sizes == dims - - -@pytest.mark.parametrize( - "dims, expected_sizes", - [ - # Basic case: reversing the dimensions - ((), {"z": 5, "y": 4, "x": 3}), - (["y", "x", "z"], {"y": 4, "x": 3, "z": 5}), - (["y", "x", ...], {"y": 4, "x": 3, "z": 5}), - ], -) -def test_permute_dims( - random_data: NamedArray[Any, np.dtype[np.float32]], - dims: _DimsLike, - expected_sizes: dict[_Dim, _IntOrUnknown], -) -> None: - actual = random_data.permute_dims(*dims) - assert actual.sizes == expected_sizes - - -@pytest.mark.parametrize( - "dims", - [ - (["y", "x"]), - ], -) -def test_permute_dims_errors( - random_data: NamedArray[Any, np.dtype[np.float32]], - dims: _DimsLike, -) -> None: - with pytest.raises(ValueError): - random_data.permute_dims(*dims) raise TypeError( f"a ({type(a)}) is not a valid _arrayfunction or _arrayapi" ) From 5e1af7a17422f9122a98d549ad1fa70dd2b1d9a6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 28 Nov 2023 15:00:39 -0800 Subject: [PATCH 26/53] fix tests --- xarray/tests/test_namedarray.py | 94 +++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index fcdf063d106..690fb652796 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -21,9 +21,12 @@ from xarray.namedarray._typing import ( _AttrsLike, + _Dim, _DimsLike, _DType, + _IntOrUnknown, _Shape, + _ShapeLike, duckarray, ) from xarray.namedarray.utils import Default @@ -475,3 +478,94 @@ def _new( var_float2: Variable[Any, np.dtype[np.float32]] var_float2 = var_float._replace(("x",), np_val2) assert var_float2.dtype == dtype_float + + @pytest.mark.parametrize( + "new_dims, new_shape", + [ + (["x", "y"], (2, 1)), # basic case, expanding along existing dimensions + (["x", "y", "z"], (2, 1, 1)), # adding a new dimension + (["z", "x", "y"], (1, 2, 1)), # adding a new dimension with different order + (["x"], (2,)), # reducing dimensions + ({"x": 2, "y": 1}, (2, 1)), # using dict for dims + ( + {"x": 2, "y": 1, "z": 1}, + (2, 1, 1), + ), # using dict for dims, adding new dimension + ], + ) + def test_expand_dims( + self, + target: NamedArray[Any, np.dtype[np.float32]], + new_dims: _DimsLike, + new_shape: _ShapeLike, + ) -> None: + actual = target.expand_dims(new_dims) + # Ensure the expected dims match, especially when new dimensions are added + expected_dims = ( + tuple(new_dims) + if isinstance(new_dims, (list, tuple)) + else tuple(new_dims.keys()) + ) + expected = NamedArray(expected_dims, target._data.reshape(*new_shape)) + assert np.array_equal(actual.data, expected.data) + assert actual.dims == expected.dims + + def test_expand_dims_object_dtype(self) -> None: + data: NamedArray[Any, np.dtype[object]] + x = np.empty([], dtype=object) + x[()] = ("a", 1) + data = NamedArray([], x) + actual = data.expand_dims(("x",), (3,)) + exp_values = np.empty((3,), dtype=object) + for i in range(3): + exp_values[i] = ("a", 1) + assert np.array_equal(actual.data, exp_values) + + @pytest.mark.parametrize( + "dims", + [ + {"x": 2, "y": 1}, # basic case, broadcasting along existing dimensions + {"x": 2, "y": 3}, # increasing size of existing dimension + {"x": 2, "y": 1, "z": 1}, # adding a new dimension + {"z": 1, "x": 2, "y": 1}, # adding a new dimension with different order + ], + ) + def test_broadcast_to( + self, + target: NamedArray[Any, np.dtype[np.float32]], + dims: Mapping[Any, _Dim], + ) -> None: + actual = target.broadcast_to(dims) + assert actual.sizes == dims + + @pytest.mark.parametrize( + "dims, expected_sizes", + [ + # Basic case: reversing the dimensions + ((), {"z": 5, "y": 4, "x": 3}), + (["y", "x", "z"], {"y": 4, "x": 3, "z": 5}), + (["y", "x", ...], {"y": 4, "x": 3, "z": 5}), + ], + ) + def test_permute_dims( + self, + random_data: NamedArray[Any, np.dtype[np.float32]], + dims: _DimsLike, + expected_sizes: dict[_Dim, _IntOrUnknown], + ) -> None: + actual = random_data.permute_dims(*dims) + assert actual.sizes == expected_sizes + + @pytest.mark.parametrize( + "dims", + [ + (["y", "x"]), + ], + ) + def test_permute_dims_errors( + self, + random_data: NamedArray[Any, np.dtype[np.float32]], + dims: _DimsLike, + ) -> None: + with pytest.raises(ValueError): + random_data.permute_dims(*dims) From 2499c9385a9157904707314836849d7800bf9690 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 29 Nov 2023 18:01:46 -0800 Subject: [PATCH 27/53] update expand_dims --- xarray/core/variable.py | 21 +++- xarray/namedarray/core.py | 108 ++++++++++++++------- xarray/namedarray/utils.py | 26 ++++- xarray/tests/test_namedarray.py | 163 +++++++++++++++----------------- 4 files changed, 194 insertions(+), 124 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index 44596f1062c..056570cfdfb 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -46,6 +46,7 @@ maybe_coerce_to_str, ) from xarray.namedarray.core import NamedArray +from xarray.namedarray.utils import infix_dims NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, @@ -1386,7 +1387,7 @@ def transpose( *dims: Hashable | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", ) -> Self: - """Return a new Variable with transposed dimensions. + """Return a new Variable object with transposed dimensions. Parameters ---------- @@ -1415,7 +1416,23 @@ def transpose( -------- numpy.transpose """ - return self.permute_dims(*dims, missing_dims=missing_dims) + if len(dims) == 0: + dims = self.dims[::-1] + else: + dims = tuple(infix_dims(dims, self.dims, missing_dims)) + + if len(dims) < 2 or dims == self.dims: + # no need to transpose if only one dimension + # or dims are in same order + return self.copy(deep=False) + + axes = self.get_axis_num(dims) + data = as_indexable(self._data).transpose(axes) + return self._replace(dims=dims, data=data) + + @property + def T(self) -> Self: + return self.transpose() def set_dims(self, dims, shape=None): """Return a new variable with given set of dimensions. diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index c93b25bd489..58778840ee5 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -30,7 +30,6 @@ _dtype, _DType_co, _ScalarType_co, - _ShapeLike, _ShapeType_co, _sparsearrayfunction_or_api, _SupportsImag, @@ -38,8 +37,8 @@ ) from xarray.namedarray.utils import ( _default, + either_dict_or_kwargs, infix_dims, - is_dict_like, is_duck_dask_array, to_0d_object_array, ) @@ -883,7 +882,6 @@ def permute_dims( -------- numpy.transpose """ - from xarray.core.indexing import as_indexable if not dims: dims = self.dims[::-1] @@ -896,14 +894,14 @@ def permute_dims( return self.copy(deep=False) axes = self.get_axis_num(dims) - data = as_indexable(self._data).transpose(axes) # type: ignore + data = self._data.transpose(axes) return self._replace(dims=dims, data=data) @property def T(self) -> NamedArray[Any, _DType_co]: return self.permute_dims() - def _check_dims(self, dims: Mapping[Any, _Dim] | _Dim): + def _check_dims(self, dims: Mapping[Any, _Dim] | _Dim) -> None: if isinstance(dims, dict): dims_keys = dims else: @@ -941,49 +939,91 @@ def broadcast_to(self, dims: Mapping[Any, _Dim]) -> NamedArray[Any, _DType_co]: return self._new(data=data, dims=expanded_dims) def expand_dims( - self, dims: _DimsLike, shape: _ShapeLike | None = None + self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any ) -> NamedArray[Any, _DType_co]: """ - Expand the dimensions of the object. + Expand the dimensions of the NamedArray. - This method adds new dimensions to the object and optionally broadcasts - the data to the new shape if provided. + This method adds new dimensions to the object. New dimensions can be added + at specific positions with a given size, which defaults to 1 if not specified. + The method handles both positional and keyword arguments for specifying new dimensions. Parameters ---------- - dims : str or sequence of str or dict - Dimensions to include on the new object (must be a superset of the existing dimensions). - If a dict, values are used to provide the sizes of new dimensions; otherwise, new dimensions are inserted with length 1. + dim : str, sequence of str, or dict, optional + Dimensions to include on the new object. It must be a superset of the existing dimensions. + If a dict, values are used to provide the axis position of dimensions; otherwise, new dimensions are inserted with length 1. + If not provided, a new dimension named 'dim_0', 'dim_1', etc., is added at the start, ensuring no name conflict with existing dimensions. - shape : sequence of int, optional - Shape to broadcast the data to. Must be specified in the same order as `dims`. - If not provided, new dimensions are inserted with length 1. - """ + **dim_kwargs : Any + Additional dimensions specified as keyword arguments. Each keyword argument specifies the name of the new dimension and its position. - if isinstance(dims, str): - dims = [dims] + Returns + ------- + NamedArray + A new NamedArray with expanded dimensions. - self._check_dims(dims) + Raises + ------ + ValueError + If any of the specified new dimensions already exist in the NamedArray. - if shape is None and is_dict_like(dims): - shape = list(dims.values()) + Examples + -------- - expanded_dims = self._get_expanded_dims(dims) + >>> data = np.asarray([[1.0, 2.0], [3.0, 4.0]]) + >>> array = xr.NamedArray(("x", "y"), data) - if self.dims == expanded_dims: - # don't use broadcast_to unless necessary so the result remains - # writeable if possible - expanded_data = self.data - expanded_obj = self._replace(dims=expanded_dims, data=expanded_data) + # expand dimensions without specifying any name (adds 'dim_0') + >>> expanded = array.expand_dims() + >>> expanded.dims + ('dim_0', 'x', 'y') - elif shape is not None: - dims_map = dict(zip(dims, cast(Iterable[SupportsIndex], shape))) - expanded_obj = self.broadcast_to(dims_map) - else: - expanded_data = self.data[(None,) * (len(expanded_dims) - self.ndim)] # type: ignore - expanded_obj = self._replace(dims=expanded_dims, data=expanded_data) + # expand dimensions by specifying a new dimension name + >>> expanded = array.expand_dims(dim="z") + >>> expanded.dims + ('z', 'x', 'y') + + # expand dimensions with multiple new dimensions + >>> expanded = array.expand_dims(dim={"z": 0, "a": 2}) + >>> expanded.dims + ('z', 'x', 'a', 'y') + + # using keyword arguments to specify new dimensions + >>> expanded = array.expand_dims(z=0, a=2) + >>> expanded.dims + ('z', 'x', 'a', 'y') + """ + + if dim is None and not dim_kwargs: + # If no dimensions specified, find a unique default dimension name + dim_number = 0 + default_dim = f"dim_{dim_number}" + while default_dim in self.dims: + dim_number += 1 + default_dim = f"dim_{dim_number}" + dim = {default_dim: 0} + + elif isinstance(dim, (str, list, tuple)): + # If dim is a string or list/tuple, convert to a dict with default positions + dim = {d: 0 for d in (dim if isinstance(dim, (list, tuple)) else [dim])} + + combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") + + # create a list of all dimensions, placing new ones at their specified positions + new_dims = list(self.dims) + for d, pos in sorted(combined_dims.items(), key=lambda x: x[1]): + if d in new_dims: + raise ValueError(f"Dimension {d} already exists") + new_dims.insert(pos, d) + + slicing_tuple = tuple( + None if d in combined_dims else slice(None) for d in new_dims + ) - return expanded_obj.permute_dims(*dims) + expanded_data: duckarray[_ShapeType, _DType] = self.data[slicing_tuple] + # use slicing to expand dimensions + return self._new(dims=new_dims, data=expanded_data) _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index b5119374fa0..d086a3353d7 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -4,7 +4,7 @@ import warnings from collections.abc import Hashable, Iterator, Mapping from enum import Enum -from typing import TYPE_CHECKING, Any, Final +from typing import TYPE_CHECKING, Any, Final, TypeVar, cast import numpy as np @@ -28,6 +28,11 @@ DaskCollection: Any = NDArray # type: ignore +K = TypeVar("K") +V = TypeVar("V") +T = TypeVar("T") + + # Singleton type, as per https://github.com/python/typing/pull/240 class Default(Enum): token: Final = 0 @@ -152,6 +157,25 @@ def infix_dims( yield from existing_dims +def either_dict_or_kwargs( + pos_kwargs: Mapping[Any, T] | None, + kw_kwargs: Mapping[str, T], + func_name: str, +) -> Mapping[Hashable, T]: + if pos_kwargs is None or pos_kwargs == {}: + # Need an explicit cast to appease mypy due to invariance; see + # https://github.com/python/mypy/issues/6228 + return cast(Mapping[Hashable, T], kw_kwargs) + + if not is_dict_like(pos_kwargs): + raise ValueError(f"the first argument to .{func_name} must be a dictionary") + if kw_kwargs: + raise ValueError( + f"cannot specify both keyword and positional arguments to .{func_name}" + ) + return pos_kwargs + + class ReprObject: """Object that prints as the given value, for use with sentinel values.""" diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 690fb652796..8caf1c8f106 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -21,12 +21,9 @@ from xarray.namedarray._typing import ( _AttrsLike, - _Dim, _DimsLike, _DType, - _IntOrUnknown, _Shape, - _ShapeLike, duckarray, ) from xarray.namedarray.utils import Default @@ -480,92 +477,84 @@ def _new( assert var_float2.dtype == dtype_float @pytest.mark.parametrize( - "new_dims, new_shape", + "dim,expected_ndim,expected_shape,expected_dims", [ - (["x", "y"], (2, 1)), # basic case, expanding along existing dimensions - (["x", "y", "z"], (2, 1, 1)), # adding a new dimension - (["z", "x", "y"], (1, 2, 1)), # adding a new dimension with different order - (["x"], (2,)), # reducing dimensions - ({"x": 2, "y": 1}, (2, 1)), # using dict for dims - ( - {"x": 2, "y": 1, "z": 1}, - (2, 1, 1), - ), # using dict for dims, adding new dimension + (None, 3, (1, 2, 5), ("dim_0", "x", "y")), + ("z", 3, (1, 2, 5), ("z", "x", "y")), + (["z", "a"], 4, (1, 1, 2, 5), ("z", "a", "x", "y")), + ({"z": 0, "a": 2}, 4, (1, 2, 1, 5), ("z", "x", "a", "y")), ], ) def test_expand_dims( self, - target: NamedArray[Any, np.dtype[np.float32]], - new_dims: _DimsLike, - new_shape: _ShapeLike, - ) -> None: - actual = target.expand_dims(new_dims) - # Ensure the expected dims match, especially when new dimensions are added - expected_dims = ( - tuple(new_dims) - if isinstance(new_dims, (list, tuple)) - else tuple(new_dims.keys()) - ) - expected = NamedArray(expected_dims, target._data.reshape(*new_shape)) - assert np.array_equal(actual.data, expected.data) - assert actual.dims == expected.dims - - def test_expand_dims_object_dtype(self) -> None: - data: NamedArray[Any, np.dtype[object]] - x = np.empty([], dtype=object) - x[()] = ("a", 1) - data = NamedArray([], x) - actual = data.expand_dims(("x",), (3,)) - exp_values = np.empty((3,), dtype=object) - for i in range(3): - exp_values[i] = ("a", 1) - assert np.array_equal(actual.data, exp_values) - - @pytest.mark.parametrize( - "dims", - [ - {"x": 2, "y": 1}, # basic case, broadcasting along existing dimensions - {"x": 2, "y": 3}, # increasing size of existing dimension - {"x": 2, "y": 1, "z": 1}, # adding a new dimension - {"z": 1, "x": 2, "y": 1}, # adding a new dimension with different order - ], - ) - def test_broadcast_to( - self, - target: NamedArray[Any, np.dtype[np.float32]], - dims: Mapping[Any, _Dim], - ) -> None: - actual = target.broadcast_to(dims) - assert actual.sizes == dims - - @pytest.mark.parametrize( - "dims, expected_sizes", - [ - # Basic case: reversing the dimensions - ((), {"z": 5, "y": 4, "x": 3}), - (["y", "x", "z"], {"y": 4, "x": 3, "z": 5}), - (["y", "x", ...], {"y": 4, "x": 3, "z": 5}), - ], - ) - def test_permute_dims( - self, - random_data: NamedArray[Any, np.dtype[np.float32]], - dims: _DimsLike, - expected_sizes: dict[_Dim, _IntOrUnknown], - ) -> None: - actual = random_data.permute_dims(*dims) - assert actual.sizes == expected_sizes - - @pytest.mark.parametrize( - "dims", - [ - (["y", "x"]), - ], - ) - def test_permute_dims_errors( - self, - random_data: NamedArray[Any, np.dtype[np.float32]], - dims: _DimsLike, - ) -> None: - with pytest.raises(ValueError): - random_data.permute_dims(*dims) + target: NamedArray[Any, Any], + dim, + expected_ndim, + expected_shape, + expected_dims, + ): + result = target.expand_dims(dim=dim) + assert result.ndim == expected_ndim + assert result.shape == expected_shape + assert result.dims == expected_dims + + # @pytest.mark.skip + # def test_expand_dims_object_dtype(self) -> None: + # data: NamedArray[Any, np.dtype[object]] + # x = np.empty([], dtype=object) + # x[()] = ("a", 1) + # data = NamedArray([], x) + # actual = data.expand_dims(("x",), (3,)) + # exp_values = np.empty((3,), dtype=object) + # for i in range(3): + # exp_values[i] = ("a", 1) + # assert np.array_equal(actual.data, exp_values) + # @pytest.mark.skip + # @pytest.mark.parametrize( + # "dims", + # [ + # {"x": 2, "y": 1}, # basic case, broadcasting along existing dimensions + # {"x": 2, "y": 3}, # increasing size of existing dimension + # {"x": 2, "y": 1, "z": 1}, # adding a new dimension + # {"z": 1, "x": 2, "y": 1}, # adding a new dimension with different order + # ], + # ) + # def test_broadcast_to( + # self, + # target: NamedArray[Any, np.dtype[np.float32]], + # dims: Mapping[Any, _Dim], + # ) -> None: + # actual = target.broadcast_to(dims) + # assert actual.sizes == dims + + # @pytest.mark.parametrize( + # "dims, expected_sizes", + # [ + # # Basic case: reversing the dimensions + # ((), {"z": 5, "y": 4, "x": 3}), + # (["y", "x", "z"], {"y": 4, "x": 3, "z": 5}), + # (["y", "x", ...], {"y": 4, "x": 3, "z": 5}), + # ], + # ) + # def test_permute_dims( + # self, + # random_data: NamedArray[Any, np.dtype[np.float32]], + # dims: _DimsLike, + # expected_sizes: dict[_Dim, _IntOrUnknown], + # ) -> None: + # actual = random_data.permute_dims(*dims) + # assert actual.sizes == expected_sizes + + # @pytest.mark.parametrize( + # "dims", + # [ + # (["y", "x"]), + # ], + # ) + # def test_permute_dims_errors( + # self, + # random_data: NamedArray[Any, np.dtype[np.float32]], + # dims: _DimsLike, + # ) -> None: + # with pytest.raises(ValueError): + # random_data.permute_dims(*dims) From 3e4d8fa8aa8ac036636a1d2976e1c5a3f66f9ac6 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 13:06:01 -0800 Subject: [PATCH 28/53] update tests --- xarray/namedarray/core.py | 40 +++++++++++++++++++++------- xarray/tests/test_namedarray.py | 47 +++++++-------------------------- 2 files changed, 41 insertions(+), 46 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 58778840ee5..7bfa9d4a5b4 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -1004,25 +1004,47 @@ def expand_dims( default_dim = f"dim_{dim_number}" dim = {default_dim: 0} - elif isinstance(dim, (str, list, tuple)): - # If dim is a string or list/tuple, convert to a dict with default positions - dim = {d: 0 for d in (dim if isinstance(dim, (list, tuple)) else [dim])} + if isinstance(dim, str): + dim = {dim: 0} + + elif isinstance(dim, (list, tuple)): + # if dim is a list/tuple, convert to a dict with default positions + dim = {d: idx for idx, d in (enumerate(dim))} combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") + # check for duplicate positions + positions = list(combined_dims.values()) + if len(positions) != len(set(positions)): + raise ValueError( + f"Cannot assign multiple new dimensions to the same position: {positions}" + ) + # create a list of all dimensions, placing new ones at their specified positions - new_dims = list(self.dims) - for d, pos in sorted(combined_dims.items(), key=lambda x: x[1]): - if d in new_dims: - raise ValueError(f"Dimension {d} already exists") - new_dims.insert(pos, d) + all_dims_with_pos = [(d, i) for i, d in enumerate(self.dims)] + + # adjust positions of existing dimensions based on new dimensions' positions + for new_dim, new_pos in combined_dims.items(): + for i, (existing_dim, existing_pos) in enumerate(all_dims_with_pos): + if existing_pos >= new_pos: + all_dims_with_pos[i] = (existing_dim, existing_pos + 1) + + # add new dimensions to the list + all_dims_with_pos.extend(combined_dims.items()) + # sort by position to get the final order + all_dims_with_pos.sort(key=lambda x: x[1]) + + # extract the ordered list of dimensions + new_dims = [dim[0] for dim in all_dims_with_pos] + + # use slicing to expand dimensions slicing_tuple = tuple( None if d in combined_dims else slice(None) for d in new_dims ) expanded_data: duckarray[_ShapeType, _DType] = self.data[slicing_tuple] - # use slicing to expand dimensions + return self._new(dims=new_dims, data=expanded_data) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 8caf1c8f106..81f77f861b9 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -21,9 +21,11 @@ from xarray.namedarray._typing import ( _AttrsLike, + _Dim, _DimsLike, _DType, _Shape, + _ShapeLike, duckarray, ) from xarray.namedarray.utils import Default @@ -487,46 +489,17 @@ def _new( ) def test_expand_dims( self, - target: NamedArray[Any, Any], - dim, - expected_ndim, - expected_shape, - expected_dims, - ): + target: NamedArray[Any, np.dtype[np.float32]], + dim: _Dim | _DimsLike | Mapping[_Dim, int] | None, + expected_ndim: int, + expected_shape: _ShapeLike, + expected_dims: _DimsLike, + ) -> None: result = target.expand_dims(dim=dim) assert result.ndim == expected_ndim assert result.shape == expected_shape assert result.dims == expected_dims - # @pytest.mark.skip - # def test_expand_dims_object_dtype(self) -> None: - # data: NamedArray[Any, np.dtype[object]] - # x = np.empty([], dtype=object) - # x[()] = ("a", 1) - # data = NamedArray([], x) - # actual = data.expand_dims(("x",), (3,)) - # exp_values = np.empty((3,), dtype=object) - # for i in range(3): - # exp_values[i] = ("a", 1) - # assert np.array_equal(actual.data, exp_values) - # @pytest.mark.skip - # @pytest.mark.parametrize( - # "dims", - # [ - # {"x": 2, "y": 1}, # basic case, broadcasting along existing dimensions - # {"x": 2, "y": 3}, # increasing size of existing dimension - # {"x": 2, "y": 1, "z": 1}, # adding a new dimension - # {"z": 1, "x": 2, "y": 1}, # adding a new dimension with different order - # ], - # ) - # def test_broadcast_to( - # self, - # target: NamedArray[Any, np.dtype[np.float32]], - # dims: Mapping[Any, _Dim], - # ) -> None: - # actual = target.broadcast_to(dims) - # assert actual.sizes == dims - # @pytest.mark.parametrize( # "dims, expected_sizes", # [ @@ -538,11 +511,11 @@ def test_expand_dims( # ) # def test_permute_dims( # self, - # random_data: NamedArray[Any, np.dtype[np.float32]], + # target: NamedArray[Any, np.dtype[np.float32]], # dims: _DimsLike, # expected_sizes: dict[_Dim, _IntOrUnknown], # ) -> None: - # actual = random_data.permute_dims(*dims) + # actual = target.permute_dims(*dims) # assert actual.sizes == expected_sizes # @pytest.mark.parametrize( From ff01bce97d1a3997a0ef576b0586a1eabef7ae77 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 13:13:55 -0800 Subject: [PATCH 29/53] update tests --- xarray/tests/test_namedarray.py | 62 ++++++++++++++++----------------- 1 file changed, 31 insertions(+), 31 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 81f77f861b9..36d05909d77 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -24,6 +24,7 @@ _Dim, _DimsLike, _DType, + _IntOrUnknown, _Shape, _ShapeLike, duckarray, @@ -500,34 +501,33 @@ def test_expand_dims( assert result.shape == expected_shape assert result.dims == expected_dims - # @pytest.mark.parametrize( - # "dims, expected_sizes", - # [ - # # Basic case: reversing the dimensions - # ((), {"z": 5, "y": 4, "x": 3}), - # (["y", "x", "z"], {"y": 4, "x": 3, "z": 5}), - # (["y", "x", ...], {"y": 4, "x": 3, "z": 5}), - # ], - # ) - # def test_permute_dims( - # self, - # target: NamedArray[Any, np.dtype[np.float32]], - # dims: _DimsLike, - # expected_sizes: dict[_Dim, _IntOrUnknown], - # ) -> None: - # actual = target.permute_dims(*dims) - # assert actual.sizes == expected_sizes - - # @pytest.mark.parametrize( - # "dims", - # [ - # (["y", "x"]), - # ], - # ) - # def test_permute_dims_errors( - # self, - # random_data: NamedArray[Any, np.dtype[np.float32]], - # dims: _DimsLike, - # ) -> None: - # with pytest.raises(ValueError): - # random_data.permute_dims(*dims) + @pytest.mark.parametrize( + "dims, expected_sizes", + [ + ((), {"y": 5, "x": 2}), + (["y", "x"], {"y": 5, "x": 2}), + (["y", ...], {"y": 5, "x": 2}), + ], + ) + def test_permute_dims( + self, + target: NamedArray[Any, np.dtype[np.float32]], + dims: _DimsLike, + expected_sizes: dict[_Dim, _IntOrUnknown], + ) -> None: + actual = target.permute_dims(*dims) + assert actual.sizes == expected_sizes + + @pytest.mark.parametrize( + "dims", + [ + (["y"]), + ], + ) + def test_permute_dims_errors( + self, + target: NamedArray[Any, np.dtype[np.float32]], + dims: _DimsLike, + ) -> None: + with pytest.raises(ValueError): + target.permute_dims(*dims) From 2c4b2b6a511d3285cf7996ca1bc44a9f7389081c Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 13:49:50 -0800 Subject: [PATCH 30/53] remove unnecessary guard conditions --- xarray/tests/test_namedarray.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 36d05909d77..274349b0c25 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -364,7 +364,6 @@ def test_new_namedarray(self) -> None: narr_int = narr_float._new(("x",), np.array([1, 3], dtype=dtype_int)) assert narr_int.dtype == dtype_int - # Test with a subclass: class Variable( NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co] ): @@ -402,9 +401,8 @@ def _new( if data is _default: return type(self)(dims_, copy.copy(self._data), attrs_) - else: - cls_ = cast("type[Variable[Any, _DType]]", type(self)) - return cls_(dims_, data, attrs_) + cls_ = cast("type[Variable[Any, _DType]]", type(self)) + return cls_(dims_, data, attrs_) var_float: Variable[Any, np.dtype[np.float32]] var_float = Variable(("x",), np.array([1.5, 3.2], dtype=dtype_float)) @@ -429,7 +427,6 @@ def test_replace_namedarray(self) -> None: narr_float2 = NamedArray(("x",), np_val2) assert narr_float2.dtype == dtype_float - # Test with a subclass: class Variable( NamedArray[_ShapeType_co, _DType_co], Generic[_ShapeType_co, _DType_co] ): @@ -467,9 +464,8 @@ def _new( if data is _default: return type(self)(dims_, copy.copy(self._data), attrs_) - else: - cls_ = cast("type[Variable[Any, _DType]]", type(self)) - return cls_(dims_, data, attrs_) + cls_ = cast("type[Variable[Any, _DType]]", type(self)) + return cls_(dims_, data, attrs_) var_float: Variable[Any, np.dtype[np.float32]] var_float = Variable(("x",), np_val) From c0aefaa3d36a535434fb880735bc327faa796f67 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 14:03:00 -0800 Subject: [PATCH 31/53] Update type hints in NamedArray class and test cases --- xarray/namedarray/core.py | 6 +++--- xarray/tests/test_namedarray.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 7bfa9d4a5b4..8e3a8e2f710 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -854,7 +854,7 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: def permute_dims( self, - *dims: Hashable | ellipsis, + *dims: _DimsLike | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", ) -> NamedArray[Any, _DType_co]: """Return a new object with transposed dimensions. @@ -901,7 +901,7 @@ def permute_dims( def T(self) -> NamedArray[Any, _DType_co]: return self.permute_dims() - def _check_dims(self, dims: Mapping[Any, _Dim] | _Dim) -> None: + def _check_dims(self, dims: _DimsLike | Mapping[_Dim, int]) -> None: if isinstance(dims, dict): dims_keys = dims else: @@ -1043,7 +1043,7 @@ def expand_dims( None if d in combined_dims else slice(None) for d in new_dims ) - expanded_data: duckarray[_ShapeType, _DType] = self.data[slicing_tuple] + expanded_data: duckarray[Any, _DType_co] = self.data[slicing_tuple] return self._new(dims=new_dims, data=expanded_data) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 274349b0c25..021e94c1fed 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -487,7 +487,7 @@ def _new( def test_expand_dims( self, target: NamedArray[Any, np.dtype[np.float32]], - dim: _Dim | _DimsLike | Mapping[_Dim, int] | None, + dim: _DimsLike | Mapping[_Dim, int] | None, expected_ndim: int, expected_shape: _ShapeLike, expected_dims: _DimsLike, From e2cce0570f067946fbc4ed6fd1744f2f25525685 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 14:23:15 -0800 Subject: [PATCH 32/53] Refactor NamedArray T property to handle non-2D arrays --- xarray/namedarray/core.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 8e3a8e2f710..92ff4855db8 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -899,7 +899,15 @@ def permute_dims( @property def T(self) -> NamedArray[Any, _DType_co]: - return self.permute_dims() + """Return a new object with transposed dimensions.""" + if self.ndim != 2: + raise ValueError( + f"x.T requires x to have 2 dimensions, got {self.ndim}. Use x.permute_dims() to permute dimensions." + ) + + data = self._data.T + dims = self.dims[::-1] + return self._replace(dims=dims, data=data) def _check_dims(self, dims: _DimsLike | Mapping[_Dim, int]) -> None: if isinstance(dims, dict): @@ -1043,7 +1051,7 @@ def expand_dims( None if d in combined_dims else slice(None) for d in new_dims ) - expanded_data: duckarray[Any, _DType_co] = self.data[slicing_tuple] + expanded_data: duckarray[Any, _DType_co] = self._data[slicing_tuple] return self._new(dims=new_dims, data=expanded_data) From 3633a2ed93a1813fcdfe4d3431bc0130f642ce80 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 14:26:12 -0800 Subject: [PATCH 33/53] Reverse the order of dimensions in x.T --- xarray/namedarray/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 92ff4855db8..044828dfd33 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -905,7 +905,7 @@ def T(self) -> NamedArray[Any, _DType_co]: f"x.T requires x to have 2 dimensions, got {self.ndim}. Use x.permute_dims() to permute dimensions." ) - data = self._data.T + data = self._data[::-1, ::-1] dims = self.dims[::-1] return self._replace(dims=dims, data=data) From 326dad486a5f1ac738c34c200a3a3783106a956e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 17:54:58 -0800 Subject: [PATCH 34/53] Refactor broadcasting and dimension expansion in NamedArray --- xarray/namedarray/core.py | 42 ++++++++++++++++++----------- xarray/tests/test_namedarray.py | 47 ++++++++++++++++++++++++++++----- 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 044828dfd33..808f9a65f4a 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -11,7 +11,6 @@ Callable, Generic, Literal, - SupportsIndex, TypeVar, cast, overload, @@ -894,7 +893,9 @@ def permute_dims( return self.copy(deep=False) axes = self.get_axis_num(dims) - data = self._data.transpose(axes) + data = self._data.transpose( + axes + ) # TODO: replace this with use array-api-compat function return self._replace(dims=dims, data=data) @property @@ -920,11 +921,9 @@ def _check_dims(self, dims: _DimsLike | Mapping[_Dim, int]) -> None: f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" ) - def _get_expanded_dims(self, dims: Mapping[Any, _Dim] | _Dim) -> _DimsLike: - self_dims = set(self.dims) - return tuple(dim for dim in dims if dim not in self_dims) + self.dims - - def broadcast_to(self, dims: Mapping[Any, _Dim]) -> NamedArray[Any, _DType_co]: + def broadcast_to( + self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any + ) -> NamedArray[Any, _DType_co]: """ Broadcast namedarray to a new shape. @@ -933,18 +932,25 @@ def broadcast_to(self, dims: Mapping[Any, _Dim]) -> NamedArray[Any, _DType_co]: dims : dict Dimensions to broadcast the array to. Keys are dimension names and values are the new sizes. """ + from xarray.core import duck_array_ops - from xarray.core import duck_array_ops # TODO: Remove this import in the future + combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "broadcast_to") - self._check_dims(dims) - expanded_dims = self._get_expanded_dims(dims) - shape = list(dims.values()) + # check that the dimensions are valid + self._check_dims(list(combined_dims.keys())) - dims_map = dict(zip(dims, cast(Iterable[SupportsIndex], shape))) - temporary_shape = tuple(dims_map[dim] for dim in expanded_dims) + # create a dictionary of the current dimensions and their sizes + current_shape = dict(zip(self.dims, self._data.shape)) - data = duck_array_ops.broadcast_to(self.data, temporary_shape) # type: ignore - return self._new(data=data, dims=expanded_dims) + # update the current shape with the new dimensions, keeping the order of the original dimensions + broadcast_shape = {d: current_shape.get(d, 1) for d in self.dims} + broadcast_shape |= combined_dims + + # ensure the dimensions are in the correct order + ordered_dims = list(broadcast_shape.keys()) + ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) + data = duck_array_ops.broadcast_to(self._data, ordered_shape) + return self._new(data=data, dims=ordered_dims) def expand_dims( self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any @@ -1028,6 +1034,12 @@ def expand_dims( f"Cannot assign multiple new dimensions to the same position: {positions}" ) + for new_dim in combined_dims.keys(): + if new_dim in self.dims: + raise ValueError( + f"Dimension {new_dim} already exists. Please remove it from the specified dimensions: {combined_dims}" + ) + # create a list of all dimensions, placing new ones at their specified positions all_dims_with_pos = [(d, i) for i, d in enumerate(self.dims)] diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 021e94c1fed..6011a1f4f17 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -497,6 +497,19 @@ def test_expand_dims( assert result.shape == expected_shape assert result.dims == expected_dims + def test_expand_dims_errors( + self, target: NamedArray[Any, np.dtype[np.float32]] + ) -> None: + with pytest.raises(ValueError, match=r"Dimension.*already exists"): + dim = {"x": 0} + target.expand_dims(dim=dim) + + with pytest.raises( + ValueError, match=r"Cannot assign multiple new dimensions.*" + ): + dim = {"z": 0, "a": 0} + target.expand_dims(dim=dim) + @pytest.mark.parametrize( "dims, expected_sizes", [ @@ -514,16 +527,38 @@ def test_permute_dims( actual = target.permute_dims(*dims) assert actual.sizes == expected_sizes + def test_permute_dims_errors( + self, + target: NamedArray[Any, np.dtype[np.float32]], + ) -> None: + with pytest.raises(ValueError, match=r"'y'.*permuted list"): + dims = ["y"] + target.permute_dims(*dims) + @pytest.mark.parametrize( - "dims", + "broadcast_dims,expected_ndim", [ - (["y"]), + ({"x": 2, "y": 5}, 2), + ({"x": 2, "y": 5, "z": 2}, 3), + ({"w": 1, "x": 2, "y": 5}, 3), ], ) - def test_permute_dims_errors( + def test_broadcast_to( self, target: NamedArray[Any, np.dtype[np.float32]], - dims: _DimsLike, + broadcast_dims: Mapping[_Dim, int], + expected_ndim: int, ) -> None: - with pytest.raises(ValueError): - target.permute_dims(*dims) + expand_dims = set(broadcast_dims.keys()) - set(target.dims) + result = target.expand_dims(list(expand_dims)).broadcast_to(broadcast_dims) + assert result.ndim == expected_ndim + assert result.sizes == broadcast_dims + + def test_broadcast_to_errors( + self, target: NamedArray[Any, np.dtype[np.float32]] + ) -> None: + with pytest.raises( + ValueError, + match=r"operands could not be broadcast together with remapped shapes", + ): + target.broadcast_to({"x": 2, "y": 2}) From 1c562a959912eb9a6396142334cc07e1a35f1dcc Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 18:05:11 -0800 Subject: [PATCH 35/53] update docstring --- xarray/namedarray/core.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 808f9a65f4a..ee9b687bf3b 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -925,13 +925,38 @@ def broadcast_to( self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any ) -> NamedArray[Any, _DType_co]: """ - Broadcast namedarray to a new shape. + Broadcast the NamedArray to a new shape by extending its dimensions. + + This method allows for the expansion of the array's dimensions to a specified shape. + New dimensions with specified sizes can be added, and existing dimensions can be resized. + It handles both positional and keyword arguments for specifying the dimensions to broadcast. Parameters ---------- - dims : dict - Dimensions to broadcast the array to. Keys are dimension names and values are the new sizes. + dim : dict, str, sequence of str, optional + Dimensions to broadcast the array to. If a dict, keys are dimension names and values are the new sizes. + If a string or sequence of strings, new dimensions are added with a size of 1. + + **dim_kwargs : Any + Additional dimensions specified as keyword arguments. Each keyword argument specifies the name of the new dimension and its size. + + Returns + ------- + NamedArray + A new NamedArray with the broadcasted dimensions. + + Examples + -------- + >>> data = np.asarray([[1.0, 2.0], [3.0, 4.0]]) + >>> array = xr.NamedArray(("x", "y"), data) + >>> array.sizes + {'x': 2, 'y': 2} + + >>> broadcasted = array.expand_dims("lat").broadcast_to(x=2, y=2, lat=6) + >>> broadcasted.sizes + {'lat': 6, 'x': 2, 'y': 2} """ + from xarray.core import duck_array_ops combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "broadcast_to") From a7fd5c7d75608524291cd41130321650e6cae38b Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 18:06:07 -0800 Subject: [PATCH 36/53] add todo item --- xarray/namedarray/core.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index ee9b687bf3b..521c52af4b7 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -974,7 +974,9 @@ def broadcast_to( # ensure the dimensions are in the correct order ordered_dims = list(broadcast_shape.keys()) ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) - data = duck_array_ops.broadcast_to(self._data, ordered_shape) + data = duck_array_ops.broadcast_to( + self._data, ordered_shape + ) # TODO: use array-api-compat function return self._new(data=data, dims=ordered_dims) def expand_dims( From 82e89c0325d7039633292b97c41aed0893f78ede Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 02:09:58 +0000 Subject: [PATCH 37/53] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/variable.py | 3 +-- xarray/tests/test_namedarray.py | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/core/variable.py b/xarray/core/variable.py index ec5d3eacf33..f9f814bc0a6 100644 --- a/xarray/core/variable.py +++ b/xarray/core/variable.py @@ -45,9 +45,8 @@ is_duck_array, maybe_coerce_to_str, ) -from xarray.namedarray.core import NamedArray -from xarray.namedarray.utils import infix_dims from xarray.namedarray.core import NamedArray, _raise_if_any_duplicate_dimensions +from xarray.namedarray.utils import infix_dims NON_NUMPY_SUPPORTED_ARRAY_TYPES = ( indexing.ExplicitlyIndexed, diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 57fcb85e16f..5a29437e71f 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -562,6 +562,7 @@ def test_broadcast_to_errors( match=r"operands could not be broadcast together with remapped shapes", ): target.broadcast_to({"x": 2, "y": 2}) + def test_warn_on_repeated_dimension_names(self) -> None: with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2)) From 31acb3be7f24075ec3e27d3f0958310aa065a954 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 21:07:40 -0800 Subject: [PATCH 38/53] use comprehension --- xarray/namedarray/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index a4ee0add49a..5696fe57bcc 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -489,7 +489,7 @@ def _parse_dimensions(self, dims: _DimsLike) -> _Dims: f"number of data dimensions, ndim={self.ndim}" ) if len(set(dims)) < len(dims): - repeated_dims = set([d for d in dims if dims.count(d) > 1]) + repeated_dims = {d for d in dims if dims.count(d) > 1} warnings.warn( f"Duplicate dimension names present: dimensions {repeated_dims} appear more than once in dims={dims}. " "We do not yet support duplicate dimension names, but we do allow initial construction of the object. " @@ -1112,7 +1112,7 @@ def _raise_if_any_duplicate_dimensions( dims: _Dims, err_context: str = "This function" ) -> None: if len(set(dims)) < len(dims): - repeated_dims = set([d for d in dims if dims.count(d) > 1]) + repeated_dims = {d for d in dims if dims.count(d) > 1} raise ValueError( f"{err_context} cannot handle duplicate dimensions, but dimensions {repeated_dims} appear more than once on this object's dims: {dims}" ) From 6fdbad63adf13125ad949fcf59e1a0a6b7043e1f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 30 Nov 2023 21:12:56 -0800 Subject: [PATCH 39/53] use dim --- xarray/namedarray/core.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 5696fe57bcc..56a7645829c 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -863,14 +863,14 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: def permute_dims( self, - *dims: _DimsLike | ellipsis, + *dim: _DimsLike | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", ) -> NamedArray[Any, _DType_co]: """Return a new object with transposed dimensions. Parameters ---------- - *dims : Hashable, optional + *dim : Hashable, optional By default, reverse the order of the dimensions. Otherwise, reorder the dimensions to this order. missing_dims : {"raise", "warn", "ignore"}, default: "raise" @@ -892,10 +892,10 @@ def permute_dims( numpy.transpose """ - if not dims: + if not dim: dims = self.dims[::-1] else: - dims = tuple(infix_dims(dims, self.dims, missing_dims)) + dims = tuple(infix_dims(dim, self.dims, missing_dims)) if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension From 401b6d54e26a7ed13d84ef271d2f054edc81b035 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 18:54:30 +0000 Subject: [PATCH 40/53] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/namedarray/_typing.py | 2 +- xarray/namedarray/core.py | 1 - xarray/namedarray/utils.py | 2 -- 3 files changed, 1 insertion(+), 4 deletions(-) diff --git a/xarray/namedarray/_typing.py b/xarray/namedarray/_typing.py index 84bbdd4db3d..8dfbaabbfc4 100644 --- a/xarray/namedarray/_typing.py +++ b/xarray/namedarray/_typing.py @@ -6,8 +6,8 @@ from typing import ( Any, Callable, - Literal, Final, + Literal, Protocol, SupportsIndex, TypeVar, diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 181030d8744..70603f1a151 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -42,7 +42,6 @@ is_duck_dask_array, to_0d_object_array, ) -from xarray.namedarray.utils import is_duck_dask_array, to_0d_object_array if TYPE_CHECKING: from numpy.typing import ArrayLike, NDArray diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index ea2bf911e50..d086a3353d7 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -5,8 +5,6 @@ from collections.abc import Hashable, Iterator, Mapping from enum import Enum from typing import TYPE_CHECKING, Any, Final, TypeVar, cast -from collections.abc import Hashable -from typing import TYPE_CHECKING, Any import numpy as np From c7696395b1b6e4b59ff6cb3e44cffd28515f219e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Fri, 1 Dec 2023 12:00:35 -0800 Subject: [PATCH 41/53] fix imports --- xarray/namedarray/core.py | 1 - xarray/namedarray/utils.py | 11 +---------- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 70603f1a151..0584ea15ca2 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -36,7 +36,6 @@ _SupportsReal, ) from xarray.namedarray.utils import ( - _default, either_dict_or_kwargs, infix_dims, is_duck_dask_array, diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index d086a3353d7..66c06f14c16 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -3,8 +3,7 @@ import sys import warnings from collections.abc import Hashable, Iterator, Mapping -from enum import Enum -from typing import TYPE_CHECKING, Any, Final, TypeVar, cast +from typing import TYPE_CHECKING, Any, TypeVar, cast import numpy as np @@ -33,14 +32,6 @@ T = TypeVar("T") -# Singleton type, as per https://github.com/python/typing/pull/240 -class Default(Enum): - token: Final = 0 - - -_default = Default.token - - def module_available(module: str) -> bool: """Checks whether a module is installed without importing it. From 959d97c673f0a20f4ba5735f07cee63a92865152 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 4 Dec 2023 20:35:35 -0800 Subject: [PATCH 42/53] formatting only --- xarray/core/dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index f6f4f35ad9f..7fd0add45b4 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -112,8 +112,6 @@ drop_dims_from_indexers, either_dict_or_kwargs, emit_user_level_warning, - infix_dims, - is_dict_like, is_scalar, maybe_wrap_array, ) From 7be6a2d4b9d32fb7c0cb68102cdf9e5cd5002fea Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe <13301940+andersy005@users.noreply.github.com> Date: Mon, 4 Dec 2023 20:39:54 -0800 Subject: [PATCH 43/53] Apply suggestions from code review Co-authored-by: Deepak Cherian --- xarray/namedarray/core.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 0584ea15ca2..4c24bb0a518 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -916,7 +916,7 @@ def T(self) -> NamedArray[Any, _DType_co]: f"x.T requires x to have 2 dimensions, got {self.ndim}. Use x.permute_dims() to permute dimensions." ) - data = self._data[::-1, ::-1] + data = self._data.T dims = self.dims[::-1] return self._replace(dims=dims, data=data) @@ -938,7 +938,6 @@ def broadcast_to( Broadcast the NamedArray to a new shape by extending its dimensions. This method allows for the expansion of the array's dimensions to a specified shape. - New dimensions with specified sizes can be added, and existing dimensions can be resized. It handles both positional and keyword arguments for specifying the dimensions to broadcast. Parameters @@ -975,7 +974,7 @@ def broadcast_to( self._check_dims(list(combined_dims.keys())) # create a dictionary of the current dimensions and their sizes - current_shape = dict(zip(self.dims, self._data.shape)) + current_shape = self.sizes # update the current shape with the new dimensions, keeping the order of the original dimensions broadcast_shape = {d: current_shape.get(d, 1) for d in self.dims} @@ -1025,10 +1024,6 @@ def expand_dims( >>> data = np.asarray([[1.0, 2.0], [3.0, 4.0]]) >>> array = xr.NamedArray(("x", "y"), data) - # expand dimensions without specifying any name (adds 'dim_0') - >>> expanded = array.expand_dims() - >>> expanded.dims - ('dim_0', 'x', 'y') # expand dimensions by specifying a new dimension name >>> expanded = array.expand_dims(dim="z") @@ -1046,14 +1041,6 @@ def expand_dims( ('z', 'x', 'a', 'y') """ - if dim is None and not dim_kwargs: - # If no dimensions specified, find a unique default dimension name - dim_number = 0 - default_dim = f"dim_{dim_number}" - while default_dim in self.dims: - dim_number += 1 - default_dim = f"dim_{dim_number}" - dim = {default_dim: 0} if isinstance(dim, str): dim = {dim: 0} @@ -1071,10 +1058,10 @@ def expand_dims( f"Cannot assign multiple new dimensions to the same position: {positions}" ) - for new_dim in combined_dims.keys(): - if new_dim in self.dims: - raise ValueError( - f"Dimension {new_dim} already exists. Please remove it from the specified dimensions: {combined_dims}" + conflicts = set(self.dims) & set(combined_dims) + if conflicts: + raise ValueError( + f"Dimensions {conflicts!r} already exists. Please remove it from the specified dimensions: {combined_dims}" ) # create a list of all dimensions, placing new ones at their specified positions From ea24613f58ac77797ae9ad831fed0c58b3f5c579 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 7 Dec 2023 11:56:30 -0800 Subject: [PATCH 44/53] [skip-rtd] fix indentation --- xarray/namedarray/core.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 4c24bb0a518..9def5260b2d 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -1041,7 +1041,6 @@ def expand_dims( ('z', 'x', 'a', 'y') """ - if isinstance(dim, str): dim = {dim: 0} @@ -1058,11 +1057,10 @@ def expand_dims( f"Cannot assign multiple new dimensions to the same position: {positions}" ) - conflicts = set(self.dims) & set(combined_dims) - if conflicts: + if conflicts := set(self.dims) & set(combined_dims): raise ValueError( - f"Dimensions {conflicts!r} already exists. Please remove it from the specified dimensions: {combined_dims}" - ) + f"Dimensions {conflicts!r} already exists. Please remove it from the specified dimensions: {combined_dims}" + ) # create a list of all dimensions, placing new ones at their specified positions all_dims_with_pos = [(d, i) for i, d in enumerate(self.dims)] From f2a5989c37ff2ac1bdb1af4f0116f15f2cf6f937 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 06:55:40 +0000 Subject: [PATCH 45/53] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/core/dataarray.py | 1 - xarray/tests/test_namedarray.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 23f2dd130b4..099a94592fa 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -4,7 +4,6 @@ import warnings from collections.abc import Hashable, Iterable, Mapping, MutableMapping, Sequence from os import PathLike -from typing import TYPE_CHECKING, Any, Callable, Generic, Literal, NoReturn, overload from typing import ( TYPE_CHECKING, Any, diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 2b381d618ee..ac65ace76c6 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -29,8 +29,8 @@ _Dim, _DimsLike, _DType, - _IntOrUnknown, _IndexKeyLike, + _IntOrUnknown, _Shape, _ShapeLike, duckarray, From 5f7e5c5d842379650ee3992f5ed5929078ee123e Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 24 Jan 2024 16:41:42 -0800 Subject: [PATCH 46/53] refactor expand_dims to simplify API simplified the `expand_dims` method of the NamedArray class by removing support for multiple and keyword arguments for new dimensions. the change updates the method signature to only accept a single dimension name, enhancing clarity and maintainability. this shift focuses on the common use case of adding a single dimension and moves extended functionality to a separate API. --- xarray/namedarray/core.py | 77 +++------------------------------ xarray/tests/test_namedarray.py | 27 ++++-------- 2 files changed, 15 insertions(+), 89 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 9def5260b2d..dc7c0539835 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -989,34 +989,24 @@ def broadcast_to( return self._new(data=data, dims=ordered_dims) def expand_dims( - self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any + self, + dim: _Dim | Default = _default, ) -> NamedArray[Any, _DType_co]: """ Expand the dimensions of the NamedArray. - This method adds new dimensions to the object. New dimensions can be added - at specific positions with a given size, which defaults to 1 if not specified. - The method handles both positional and keyword arguments for specifying new dimensions. + This method adds new dimensions to the object. The new dimensions are added at the beginning of the array. Parameters ---------- - dim : str, sequence of str, or dict, optional - Dimensions to include on the new object. It must be a superset of the existing dimensions. - If a dict, values are used to provide the axis position of dimensions; otherwise, new dimensions are inserted with length 1. - If not provided, a new dimension named 'dim_0', 'dim_1', etc., is added at the start, ensuring no name conflict with existing dimensions. - - **dim_kwargs : Any - Additional dimensions specified as keyword arguments. Each keyword argument specifies the name of the new dimension and its position. + dim : Hashable, optional + Dimension name to expand the array to. This dimension will be added at the beginning of the array. Returns ------- NamedArray A new NamedArray with expanded dimensions. - Raises - ------ - ValueError - If any of the specified new dimensions already exist in the NamedArray. Examples -------- @@ -1030,64 +1020,11 @@ def expand_dims( >>> expanded.dims ('z', 'x', 'y') - # expand dimensions with multiple new dimensions - >>> expanded = array.expand_dims(dim={"z": 0, "a": 2}) - >>> expanded.dims - ('z', 'x', 'a', 'y') - - # using keyword arguments to specify new dimensions - >>> expanded = array.expand_dims(z=0, a=2) - >>> expanded.dims - ('z', 'x', 'a', 'y') """ - if isinstance(dim, str): - dim = {dim: 0} - - elif isinstance(dim, (list, tuple)): - # if dim is a list/tuple, convert to a dict with default positions - dim = {d: idx for idx, d in (enumerate(dim))} - - combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "expand_dims") - - # check for duplicate positions - positions = list(combined_dims.values()) - if len(positions) != len(set(positions)): - raise ValueError( - f"Cannot assign multiple new dimensions to the same position: {positions}" - ) - - if conflicts := set(self.dims) & set(combined_dims): - raise ValueError( - f"Dimensions {conflicts!r} already exists. Please remove it from the specified dimensions: {combined_dims}" - ) - - # create a list of all dimensions, placing new ones at their specified positions - all_dims_with_pos = [(d, i) for i, d in enumerate(self.dims)] - - # adjust positions of existing dimensions based on new dimensions' positions - for new_dim, new_pos in combined_dims.items(): - for i, (existing_dim, existing_pos) in enumerate(all_dims_with_pos): - if existing_pos >= new_pos: - all_dims_with_pos[i] = (existing_dim, existing_pos + 1) - - # add new dimensions to the list - all_dims_with_pos.extend(combined_dims.items()) - - # sort by position to get the final order - all_dims_with_pos.sort(key=lambda x: x[1]) - - # extract the ordered list of dimensions - new_dims = [dim[0] for dim in all_dims_with_pos] - - # use slicing to expand dimensions - slicing_tuple = tuple( - None if d in combined_dims else slice(None) for d in new_dims - ) - - expanded_data: duckarray[Any, _DType_co] = self._data[slicing_tuple] + from xarray.namedarray._array_api import expand_dims - return self._new(dims=new_dims, data=expanded_data) + return expand_dims(self, dim=dim) _NamedArray = NamedArray[Any, np.dtype[_ScalarType_co]] diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index ac65ace76c6..1e2ad32cf14 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -332,7 +332,7 @@ def test_duck_array_class( self, ) -> None: def test_duck_array_typevar( - a: duckarray[Any, _DType] + a: duckarray[Any, _DType], ) -> duckarray[Any, _DType]: # Mypy checks a is valid: b: duckarray[Any, _DType] = a @@ -496,16 +496,15 @@ def _new( @pytest.mark.parametrize( "dim,expected_ndim,expected_shape,expected_dims", [ - (None, 3, (1, 2, 5), ("dim_0", "x", "y")), + (None, 3, (1, 2, 5), (None, "x", "y")), + (_default, 3, (1, 2, 5), ("dim_2", "x", "y")), ("z", 3, (1, 2, 5), ("z", "x", "y")), - (["z", "a"], 4, (1, 1, 2, 5), ("z", "a", "x", "y")), - ({"z": 0, "a": 2}, 4, (1, 2, 1, 5), ("z", "x", "a", "y")), ], ) def test_expand_dims( self, target: NamedArray[Any, np.dtype[np.float32]], - dim: _DimsLike | Mapping[_Dim, int] | None, + dim: _Dim | _default, expected_ndim: int, expected_shape: _ShapeLike, expected_dims: _DimsLike, @@ -515,19 +514,6 @@ def test_expand_dims( assert result.shape == expected_shape assert result.dims == expected_dims - def test_expand_dims_errors( - self, target: NamedArray[Any, np.dtype[np.float32]] - ) -> None: - with pytest.raises(ValueError, match=r"Dimension.*already exists"): - dim = {"x": 0} - target.expand_dims(dim=dim) - - with pytest.raises( - ValueError, match=r"Cannot assign multiple new dimensions.*" - ): - dim = {"z": 0, "a": 0} - target.expand_dims(dim=dim) - @pytest.mark.parametrize( "dims, expected_sizes", [ @@ -568,7 +554,10 @@ def test_broadcast_to( expected_ndim: int, ) -> None: expand_dims = set(broadcast_dims.keys()) - set(target.dims) - result = target.expand_dims(list(expand_dims)).broadcast_to(broadcast_dims) + # loop over expand_dims and call .expand_dims(dim=dim) in a loop + for dim in expand_dims: + target = target.expand_dims(dim=dim) + result = target.broadcast_to(broadcast_dims) assert result.ndim == expected_ndim assert result.sizes == broadcast_dims From 0fb44436665cbe0faaea9c35dfd1b67ca02d3f14 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Wed, 24 Jan 2024 19:18:19 -0800 Subject: [PATCH 47/53] fix type hint for `dim` parameter in test. --- xarray/tests/test_namedarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index 1e2ad32cf14..db49aa80066 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -504,7 +504,7 @@ def _new( def test_expand_dims( self, target: NamedArray[Any, np.dtype[np.float32]], - dim: _Dim | _default, + dim: _Dim | Default, expected_ndim: int, expected_shape: _ShapeLike, expected_dims: _DimsLike, From a831c148aec218b323a950c0b573f090cf07f717 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 25 Jan 2024 15:50:24 -0800 Subject: [PATCH 48/53] fix typing issues --- xarray/namedarray/_array_api.py | 25 +++++++++++++++++ xarray/namedarray/core.py | 48 ++++++++++++++++++--------------- xarray/namedarray/utils.py | 17 ++++++------ 3 files changed, 60 insertions(+), 30 deletions(-) diff --git a/xarray/namedarray/_array_api.py b/xarray/namedarray/_array_api.py index b5c320e0b96..15569ad0e52 100644 --- a/xarray/namedarray/_array_api.py +++ b/xarray/namedarray/_array_api.py @@ -9,6 +9,7 @@ from xarray.namedarray._typing import ( Default, _arrayapi, + _Axes, _Axis, _default, _Dim, @@ -196,3 +197,27 @@ def expand_dims( d.insert(axis, dim) out = x._new(dims=tuple(d), data=xp.expand_dims(x._data, axis=axis)) return out + + +def permute_dims(x: NamedArray[Any, _DType], axes: _Axes) -> NamedArray[Any, _DType]: + """ + Permutes the dimensions of an array. + + Parameters + ---------- + x : + Array to permute. + axes : + Permutation of the dimensions of x. + + Returns + ------- + out : + An array with permuted dimensions. The returned array must have the same + data type as x. + + """ + xp = _get_data_namespace(x) + dims = x.dims + out = x._new(dims=tuple(dims[i] for i in axes), data=xp.permute_dims(x._data, axes)) + return out diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index dc7c0539835..f8c3b4afa22 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -863,7 +863,7 @@ def _to_dense(self) -> NamedArray[Any, _DType_co]: def permute_dims( self, - *dim: _DimsLike | ellipsis, + *dim: Iterable[_Dim] | ellipsis, missing_dims: ErrorOptionsWithWarn = "raise", ) -> NamedArray[Any, _DType_co]: """Return a new object with transposed dimensions. @@ -892,21 +892,29 @@ def permute_dims( numpy.transpose """ + from xarray.namedarray._array_api import permute_dims + if not dim: dims = self.dims[::-1] else: - dims = tuple(infix_dims(dim, self.dims, missing_dims)) + # Flatten the tuple and handle ellipsis + flattened_dims: list[_Dim] = [] + for item in dim: + if item is ...: + flattened_dims.extend(self.dims) + else: + flattened_dims.extend(item) + dims = tuple(infix_dims(flattened_dims, self.dims, missing_dims)) if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension # or dims are in same order return self.copy(deep=False) - axes = self.get_axis_num(dims) - data = self._data.transpose( - axes - ) # TODO: replace this with use array-api-compat function - return self._replace(dims=dims, data=data) + axes_result = self.get_axis_num(dims) + axes = (axes_result,) if isinstance(axes_result, int) else axes_result + + return permute_dims(self, axes) @property def T(self) -> NamedArray[Any, _DType_co]: @@ -916,20 +924,7 @@ def T(self) -> NamedArray[Any, _DType_co]: f"x.T requires x to have 2 dimensions, got {self.ndim}. Use x.permute_dims() to permute dimensions." ) - data = self._data.T - dims = self.dims[::-1] - return self._replace(dims=dims, data=data) - - def _check_dims(self, dims: _DimsLike | Mapping[_Dim, int]) -> None: - if isinstance(dims, dict): - dims_keys = dims - else: - dims_keys = dims if isinstance(dims, str) else list(dims) - if missing_dims := set(self.dims) - set(dims_keys): - raise ValueError( - f"new dimensions {dims!r} must be a superset of " - f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" - ) + return self.permute_dims() def broadcast_to( self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any @@ -971,7 +966,16 @@ def broadcast_to( combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "broadcast_to") # check that the dimensions are valid - self._check_dims(list(combined_dims.keys())) + dims = list(combined_dims.keys()) + if isinstance(dims, dict): + dims_keys = dims + else: + dims_keys = dims if isinstance(dims, str) else list(dims) + if missing_dims := set(self.dims) - set(dims_keys): + raise ValueError( + f"new dimensions {dims!r} must be a superset of " + f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" + ) # create a dictionary of the current dimensions and their sizes current_shape = self.sizes diff --git a/xarray/namedarray/utils.py b/xarray/namedarray/utils.py index 66c06f14c16..3097cd820a0 100644 --- a/xarray/namedarray/utils.py +++ b/xarray/namedarray/utils.py @@ -2,7 +2,7 @@ import sys import warnings -from collections.abc import Hashable, Iterator, Mapping +from collections.abc import Hashable, Iterable, Iterator, Mapping from typing import TYPE_CHECKING, Any, TypeVar, cast import numpy as np @@ -17,7 +17,7 @@ from numpy.typing import NDArray - from xarray.namedarray._typing import duckarray + from xarray.namedarray._typing import _Dim, duckarray try: from dask.array.core import Array as DaskArray @@ -78,8 +78,8 @@ def is_dict_like(value: Any) -> TypeGuard[Mapping[Any, Any]]: def drop_missing_dims( - supplied_dims: _DimsLike, - dims: _DimsLike, + supplied_dims: Iterable[_Dim], + dims: Iterable[_Dim], missing_dims: ErrorOptionsWithWarn, ) -> _DimsLike: """Depending on the setting of missing_dims, drop any dimensions from supplied_dims that @@ -119,16 +119,17 @@ def drop_missing_dims( def infix_dims( - dims_supplied: _DimsLike, - dims_all: _DimsLike, + dims_supplied: Iterable[_Dim], + dims_all: Iterable[_Dim], missing_dims: ErrorOptionsWithWarn = "raise", -) -> Iterator[_DimsLike]: +) -> Iterator[_Dim]: """ Resolves a supplied list containing an ellipsis representing other items, to a generator with the 'realized' list of all items """ if ... in dims_supplied: - if len(set(dims_all)) != len(dims_all): + dims_all_list = list(dims_all) + if len(set(dims_all)) != len(dims_all_list): raise ValueError("Cannot use ellipsis with repeated dims") if list(dims_supplied).count(...) > 1: raise ValueError("More than one ellipsis supplied") From 15245195d2ce9c9cfcb270d5720c31b4da6e8aca Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 25 Jan 2024 16:04:30 -0800 Subject: [PATCH 49/53] fix UnboundLocalError: local variable 'flattened_dims' referenced before assignment --- xarray/namedarray/_array_api.py | 9 +++++++-- xarray/namedarray/core.py | 2 +- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/xarray/namedarray/_array_api.py b/xarray/namedarray/_array_api.py index 15569ad0e52..2ad539bad18 100644 --- a/xarray/namedarray/_array_api.py +++ b/xarray/namedarray/_array_api.py @@ -217,7 +217,12 @@ def permute_dims(x: NamedArray[Any, _DType], axes: _Axes) -> NamedArray[Any, _DT data type as x. """ - xp = _get_data_namespace(x) + dims = x.dims - out = x._new(dims=tuple(dims[i] for i in axes), data=xp.permute_dims(x._data, axes)) + new_dims = tuple(dims[i] for i in axes) + if isinstance(x._data, _arrayapi): + xp = _get_data_namespace(x) + out = x._new(dims=new_dims, data=xp.permute_dims(x._data, axes)) + else: + out = x._new(dims=new_dims, data=x._data.transpose(axes)) # type: ignore[attr-defined] return out diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index f8c3b4afa22..e13ca510b44 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -904,7 +904,7 @@ def permute_dims( flattened_dims.extend(self.dims) else: flattened_dims.extend(item) - dims = tuple(infix_dims(flattened_dims, self.dims, missing_dims)) + dims = tuple(infix_dims(flattened_dims, self.dims, missing_dims)) if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension From e6af928ac57ef76cb99a1cb60a98c04161e0d249 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 25 Jan 2024 18:27:43 -0800 Subject: [PATCH 50/53] fix type hint --- xarray/namedarray/core.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index e13ca510b44..1d1dcfe2c2a 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -927,7 +927,7 @@ def T(self) -> NamedArray[Any, _DType_co]: return self.permute_dims() def broadcast_to( - self, dim: _DimsLike | Mapping[_Dim, int] | None = None, **dim_kwargs: Any + self, dim: Mapping[_Dim, int] | None = None, **dim_kwargs: Any ) -> NamedArray[Any, _DType_co]: """ Broadcast the NamedArray to a new shape by extending its dimensions. @@ -987,9 +987,7 @@ def broadcast_to( # ensure the dimensions are in the correct order ordered_dims = list(broadcast_shape.keys()) ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) - data = duck_array_ops.broadcast_to( - self._data, ordered_shape - ) # TODO: use array-api-compat function + data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore # TODO: use array-api-compat function return self._new(data=data, dims=ordered_dims) def expand_dims( From a6ad9b87adb1115db53a23dddba8ddd37e2c4638 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 25 Jan 2024 22:36:54 -0800 Subject: [PATCH 51/53] ignore typing --- xarray/namedarray/core.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index 1d1dcfe2c2a..a94bdb17002 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -897,14 +897,7 @@ def permute_dims( if not dim: dims = self.dims[::-1] else: - # Flatten the tuple and handle ellipsis - flattened_dims: list[_Dim] = [] - for item in dim: - if item is ...: - flattened_dims.extend(self.dims) - else: - flattened_dims.extend(item) - dims = tuple(infix_dims(flattened_dims, self.dims, missing_dims)) + dims = tuple(infix_dims(dim, self.dims, missing_dims)) # type: ignore if len(dims) < 2 or dims == self.dims: # no need to transpose if only one dimension From fd1c0d3a56a3856a14ac1c70695fac7cfaaed771 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Thu, 25 Jan 2024 22:48:41 -0800 Subject: [PATCH 52/53] update whats-new --- doc/whats-new.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 7cbd083a5b2..d42be5695a0 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -23,6 +23,9 @@ v2024.02.0 (unreleased) New Features ~~~~~~~~~~~~ +- Add :py:meth:`NamedArray.expand_dims`, :py:meth:`NamedArray.permute_dims` and :py:meth:`NamedArray.broadcast_to` + (:pull:`8380`) By `Anderson Banihirwe `_. + Breaking changes ~~~~~~~~~~~~~~~~ @@ -212,9 +215,6 @@ New Features (:pull:`8511`). By `Maximilian Roos `_. -- Add :py:meth:`NamedArray.expand_dims` and :py:meth:`NamedArray.broadcast_to` - (:pull:`8380`) By `Anderson Banihirwe `_. - Breaking changes ~~~~~~~~~~~~~~~~ From e84abf9687ad8b960739d8d5184003601f53a01b Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Sat, 27 Jan 2024 19:29:10 -0800 Subject: [PATCH 53/53] adjust the `broadcast_to` method to prohibit adding new dimensions, allowing only the broadcasting of existing ones --- xarray/namedarray/core.py | 30 +++++++++++++----------------- xarray/tests/test_namedarray.py | 3 +++ 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/xarray/namedarray/core.py b/xarray/namedarray/core.py index a94bdb17002..ea653c01498 100644 --- a/xarray/namedarray/core.py +++ b/xarray/namedarray/core.py @@ -923,19 +923,20 @@ def broadcast_to( self, dim: Mapping[_Dim, int] | None = None, **dim_kwargs: Any ) -> NamedArray[Any, _DType_co]: """ - Broadcast the NamedArray to a new shape by extending its dimensions. + Broadcast the NamedArray to a new shape. New dimensions are not allowed. This method allows for the expansion of the array's dimensions to a specified shape. It handles both positional and keyword arguments for specifying the dimensions to broadcast. + An error is raised if new dimensions are attempted to be added. Parameters ---------- dim : dict, str, sequence of str, optional Dimensions to broadcast the array to. If a dict, keys are dimension names and values are the new sizes. - If a string or sequence of strings, new dimensions are added with a size of 1. + If a string or sequence of strings, existing dimensions are matched with a size of 1. **dim_kwargs : Any - Additional dimensions specified as keyword arguments. Each keyword argument specifies the name of the new dimension and its size. + Additional dimensions specified as keyword arguments. Each keyword argument specifies the name of an existing dimension and its size. Returns ------- @@ -949,35 +950,30 @@ def broadcast_to( >>> array.sizes {'x': 2, 'y': 2} - >>> broadcasted = array.expand_dims("lat").broadcast_to(x=2, y=2, lat=6) + >>> broadcasted = array.broadcast_to(x=2, y=2) >>> broadcasted.sizes - {'lat': 6, 'x': 2, 'y': 2} + {'x': 2, 'y': 2} """ from xarray.core import duck_array_ops combined_dims = either_dict_or_kwargs(dim, dim_kwargs, "broadcast_to") - # check that the dimensions are valid - dims = list(combined_dims.keys()) - if isinstance(dims, dict): - dims_keys = dims - else: - dims_keys = dims if isinstance(dims, str) else list(dims) - if missing_dims := set(self.dims) - set(dims_keys): + # Check that no new dimensions are added + if new_dims := set(combined_dims) - set(self.dims): raise ValueError( - f"new dimensions {dims!r} must be a superset of " - f"existing dimensions {self.dims!r}. missing dims: {missing_dims}" + f"Cannot add new dimensions: {new_dims}. Only existing dimensions are allowed. " + "Use `expand_dims` method to add new dimensions." ) - # create a dictionary of the current dimensions and their sizes + # Create a dictionary of the current dimensions and their sizes current_shape = self.sizes - # update the current shape with the new dimensions, keeping the order of the original dimensions + # Update the current shape with the new dimensions, keeping the order of the original dimensions broadcast_shape = {d: current_shape.get(d, 1) for d in self.dims} broadcast_shape |= combined_dims - # ensure the dimensions are in the correct order + # Ensure the dimensions are in the correct order ordered_dims = list(broadcast_shape.keys()) ordered_shape = tuple(broadcast_shape[d] for d in ordered_dims) data = duck_array_ops.broadcast_to(self._data, ordered_shape) # type: ignore # TODO: use array-api-compat function diff --git a/xarray/tests/test_namedarray.py b/xarray/tests/test_namedarray.py index db49aa80066..20652f4cc3b 100644 --- a/xarray/tests/test_namedarray.py +++ b/xarray/tests/test_namedarray.py @@ -570,6 +570,9 @@ def test_broadcast_to_errors( ): target.broadcast_to({"x": 2, "y": 2}) + with pytest.raises(ValueError, match=r"Cannot add new dimensions"): + target.broadcast_to({"x": 2, "y": 2, "z": 2}) + def test_warn_on_repeated_dimension_names(self) -> None: with pytest.warns(UserWarning, match="Duplicate dimension names"): NamedArray(("x", "x"), np.arange(4).reshape(2, 2))