From 18de1e61954fa190e9ad333c636203b1ad8eb492 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Sun, 20 Oct 2024 18:24:40 +0200 Subject: [PATCH 1/5] Do not require dimensions ("y","x") on UgridDataArray.from_structured --- docs/changelog.rst | 12 +++++++++++- tests/test_ugrid_dataset.py | 8 +++++++- xugrid/core/wrap.py | 36 ++++++++++++++++++++++++------------ 3 files changed, 42 insertions(+), 14 deletions(-) diff --git a/docs/changelog.rst b/docs/changelog.rst index 1b948855f..c1497f9a3 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -6,6 +6,16 @@ All notable changes to this project will be documented in this file. The format is based on `Keep a Changelog`_, and this project adheres to `Semantic Versioning`_. +Unreleased +---------- + +Changed +~~~~~~~ + +- :meth:`xugrid.UgridDataArrayAccessor.from_structured` previously required the + literal dimensions ``("y", "x")``. This requirement has been relaxed, it will + now infer the dimensions from the provided coordinates. + [0.12.1] 2024-09-09 ------------------- @@ -827,4 +837,4 @@ Added ------------------ .. _Keep a Changelog: https://keepachangelog.com/en/1.0.0/ -.. _Semantic Versioning: https://semver.org/spec/v2.0.0.html \ No newline at end of file +.. _Semantic Versioning: https://semver.org/spec/v2.0.0.html diff --git a/tests/test_ugrid_dataset.py b/tests/test_ugrid_dataset.py index c0db9e52e..61d5132b0 100644 --- a/tests/test_ugrid_dataset.py +++ b/tests/test_ugrid_dataset.py @@ -164,7 +164,9 @@ def test_rename(self): def test_from_structured(self): da = xr.DataArray([0.0, 1.0, 2.0], {"x": [5.0, 10.0, 15.0]}, ["x"]) - with pytest.raises(ValueError, match="Last two dimensions of da"): + with pytest.raises( + ValueError, match="DataArray must have at least two spatial dimensions" + ): xugrid.UgridDataArray.from_structured(da) da = xr.DataArray( @@ -173,6 +175,10 @@ def test_from_structured(self): dims=["layer", "y", "x"], name="grid", ) + + with pytest.raises(ValueError, match="Last two dimensions of da"): + xugrid.UgridDataArray.from_structured(da.transpose()) + uda = xugrid.UgridDataArray.from_structured(da) assert isinstance(uda, xugrid.UgridDataArray) assert uda.name == "grid" diff --git a/xugrid/core/wrap.py b/xugrid/core/wrap.py index 131649933..1a4a90e6e 100644 --- a/xugrid/core/wrap.py +++ b/xugrid/core/wrap.py @@ -16,7 +16,7 @@ from pandas import RangeIndex import xugrid -from xugrid.conversion import grid_from_dataset, grid_from_geodataframe +from xugrid.conversion import grid_from_dataset, grid_from_geodataframe, infer_xy_coords from xugrid.core.utils import unique_grids from xugrid.ugrid.ugrid2d import Ugrid2d from xugrid.ugrid.ugridbase import AbstractUgrid, UgridType, align @@ -260,21 +260,33 @@ def from_structured( ------- unstructured: UgridDataArray """ - if da.dims[-2:] != ("y", "x"): - raise ValueError('Last two dimensions of da must be ("y", "x")') + if da.ndim < 2: + raise ValueError( + "DataArray must have at least two spatial dimensions. " + f"Found: {da.dims}." + ) if (x is None) ^ (y is None): raise ValueError("Provide both x and y, or neither.") + # Infer x, y coords: works only for 1D coords. if x is None: - grid = Ugrid2d.from_structured(da) + x, y = infer_xy_coords(da) + + # Find out if it's multi-dimensional + ndim = da[x].ndim + lastdims = da.dims[-2:] + if ndim == 1: + grid = Ugrid2d.from_structured(da, x=x, y=y) + expected = (da["y"].dims[0], da["x"].dims[0]) + elif ndim == 2: + grid = Ugrid2d.from_structured_multicoord(da, x=x, y=y) + expected = da[x].dims else: - # Find out if it's multi-dimensional - xdim = da[x].ndim - if xdim == 1: - grid = Ugrid2d.from_structured(da, x=x, y=y) - elif xdim == 2: - grid = Ugrid2d.from_structured_multicoord(da, x=x, y=y) - else: - raise ValueError(f"x and y must be 1D or 2D. Found: {xdim}") + raise ValueError(f"x and y must be 1D or 2D. Found: {ndim}") + + if da.dims[-2:] != expected: + raise ValueError( + f"Last two dimensions of da must be {expected}, received: {lastdims}" + ) dims = da.dims[:-2] coords = {k: da.coords[k] for k in dims} From e6ab0774821b5e13ff867adc59f579d8587baa92 Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Sun, 20 Oct 2024 21:03:05 +0200 Subject: [PATCH 2/5] Add 1D/2D logic to Ugrid2d.from_structured. Use .stack() on DataArray. --- docs/api.rst | 1 - docs/changelog.rst | 6 +- tests/test_ugrid2d.py | 4 +- tests/test_ugrid_dataset.py | 14 +++- xugrid/core/wrap.py | 75 ++++++++++++--------- xugrid/ugrid/ugrid2d.py | 130 +++++++++++++++++++++++------------- 6 files changed, 145 insertions(+), 85 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 73f7c04b9..62540dfe5 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -348,7 +348,6 @@ UGRID2D Topology Ugrid2d.to_dataset Ugrid2d.from_geodataframe Ugrid2d.from_structured - Ugrid2d.from_structured_multicoord Ugrid2d.from_structured_bounds Ugrid2d.from_structured_intervals1d Ugrid2d.from_structured_intervals2d diff --git a/docs/changelog.rst b/docs/changelog.rst index c1497f9a3..8d2e44d84 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,6 +15,10 @@ Changed - :meth:`xugrid.UgridDataArrayAccessor.from_structured` previously required the literal dimensions ``("y", "x")``. This requirement has been relaxed, it will now infer the dimensions from the provided coordinates. +- :meth:`xugrid.Ugrid2d.from_structured` previously only supported 1D coordinates; + it now detects whether coordinates are 1D or 2D automatically. Accordingly, + :meth:`xugrid.Ugrid2d.from_structured_multicoord` should no longer be used, + and calling it will give a FutureWarning. [0.12.1] 2024-09-09 ------------------- @@ -88,7 +92,7 @@ Changed - Selection operations such as :meth:`UgridDataArrayAccessor.sel_points` will now also return points that are located on the edges of 2D topologies. - :attr:`xugrid.Ugrid1d.dimensions` and :attr:`xugrid.Ugrid2d.dimensions` now - raise a FutureWarning; use ``.dims`` or ``.sizes`` instead. + give a FutureWarning; use ``.dims`` or ``.sizes`` instead. - Improved performance of :func:`xugrid.open_dataset` and :func:`xugrid.merge_partitions` when handling datasets with a large number of variables (>100). diff --git a/tests/test_ugrid2d.py b/tests/test_ugrid2d.py index adf3fda8d..8e10d437c 100644 --- a/tests/test_ugrid2d.py +++ b/tests/test_ugrid2d.py @@ -1217,7 +1217,7 @@ def test_from_structured(): assert grid.n_face == 4 -def test_form_structured_multicoord(): +def test_from_structured_multicoord(): da = xr.DataArray( data=np.ones((2, 2)), coords={ @@ -1226,7 +1226,7 @@ def test_form_structured_multicoord(): }, dims=("y", "x"), ) - grid = xugrid.Ugrid2d.from_structured_multicoord(da, x="xc", y="yc") + grid = xugrid.Ugrid2d._from_structured_multicoord(da, x="xc", y="yc") assert isinstance(grid, xugrid.Ugrid2d) assert grid.n_face == 4 diff --git a/tests/test_ugrid_dataset.py b/tests/test_ugrid_dataset.py index 61d5132b0..8a1df92f2 100644 --- a/tests/test_ugrid_dataset.py +++ b/tests/test_ugrid_dataset.py @@ -176,8 +176,10 @@ def test_from_structured(self): name="grid", ) - with pytest.raises(ValueError, match="Last two dimensions of da"): - xugrid.UgridDataArray.from_structured(da.transpose()) + with pytest.raises(ValueError, match="Provide both x and y, or neither."): + xugrid.UgridDataArray.from_structured(da, x="this") + with pytest.raises(ValueError, match="Coordinates xc and yc are not present."): + xugrid.UgridDataArray.from_structured(da, x="xc", y="yc") uda = xugrid.UgridDataArray.from_structured(da) assert isinstance(uda, xugrid.UgridDataArray) @@ -190,6 +192,14 @@ def test_from_structured(self): uda = xugrid.UgridDataArray.from_structured(flipped) assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) + daT = da.transpose() + uda = xugrid.UgridDataArray.from_structured(daT) + assert isinstance(uda, xugrid.UgridDataArray) + assert uda.name == "grid" + assert uda.dims == ("layer", "mesh2d_nFaces") + assert uda.shape == (2, 12) + assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) + def test_from_structured_multicoord(self): da = xr.DataArray( data=[[0, 1], [2, 3]], diff --git a/xugrid/core/wrap.py b/xugrid/core/wrap.py index 1a4a90e6e..eab165921 100644 --- a/xugrid/core/wrap.py +++ b/xugrid/core/wrap.py @@ -16,7 +16,7 @@ from pandas import RangeIndex import xugrid -from xugrid.conversion import grid_from_dataset, grid_from_geodataframe, infer_xy_coords +from xugrid.conversion import grid_from_dataset, grid_from_geodataframe from xugrid.core.utils import unique_grids from xugrid.ugrid.ugrid2d import Ugrid2d from xugrid.ugrid.ugridbase import AbstractUgrid, UgridType, align @@ -250,7 +250,7 @@ def from_structured( Parameters ---------- da: xr.DataArray - Last two dimensions must be ``("y", "x")``. + Last two dimensions must be the y and x dimension (in that order!). x: str, default: None Which coordinate to use as the UGRID x-coordinate. y: str, default: None @@ -265,37 +265,10 @@ def from_structured( "DataArray must have at least two spatial dimensions. " f"Found: {da.dims}." ) - if (x is None) ^ (y is None): - raise ValueError("Provide both x and y, or neither.") - # Infer x, y coords: works only for 1D coords. - if x is None: - x, y = infer_xy_coords(da) - - # Find out if it's multi-dimensional - ndim = da[x].ndim - lastdims = da.dims[-2:] - if ndim == 1: - grid = Ugrid2d.from_structured(da, x=x, y=y) - expected = (da["y"].dims[0], da["x"].dims[0]) - elif ndim == 2: - grid = Ugrid2d.from_structured_multicoord(da, x=x, y=y) - expected = da[x].dims - else: - raise ValueError(f"x and y must be 1D or 2D. Found: {ndim}") - - if da.dims[-2:] != expected: - raise ValueError( - f"Last two dimensions of da must be {expected}, received: {lastdims}" - ) - - dims = da.dims[:-2] - coords = {k: da.coords[k] for k in dims} - face_da = xr.DataArray( - da.data.reshape(*da.shape[:-2], -1), - coords=coords, - dims=[*dims, grid.face_dimension], - name=da.name, - ) + grid, stackdims = Ugrid2d.from_structured(da, x, y, return_dims=True) + face_da = da.stack( # noqa: PD013 + {grid.face_dimension: stackdims}, create_index=False + ).drop_vars(stackdims, errors="ignore") return UgridDataArray(face_da, grid) @staticmethod @@ -433,3 +406,39 @@ def from_geodataframe(geodataframe: "geopandas.GeoDataFrame"): # type: ignore # grid = grid_from_geodataframe(geodataframe) ds = xr.Dataset.from_dataframe(geodataframe.drop("geometry", axis=1)) return UgridDataset(ds, [grid]) + + @staticmethod + def from_structured( + ds: xr.DataArray, topology: dict | None = None + ) -> "UgridDataArray": + """ + Create a UgridDataset from a (structured) xarray Dataset. + + The spatial dimensions are flattened into a single UGRID face dimension. + + By default, this method looks for the "x" and "y" coordinates and assumes + they are one-dimensional. To convert rotated or curvilinear coordinates, + provide the names of the x and y coordinates. + + Parameters + ---------- + da: xr.DataArray + Last two dimensions must be ``("y", "x")``. + topology: dict, optional, default is None. + Mapping of topology name to x and y coordinate variables. + If None, a single mesh2d topology is assumed. + + Returns + ------- + unstructured: UgridDataset + """ + if topology is None: + topology = {"mesh2d": ("x", "y")} + return + + +# grids = [] +# datasets = [] +# for key, (xcoord, ycoord) in topology.items(): +# grid = Ugrid2d.from_structured +# selection = ds.isel() diff --git a/xugrid/ugrid/ugrid2d.py b/xugrid/ugrid/ugrid2d.py index 7bcee102e..bc30b73c6 100644 --- a/xugrid/ugrid/ugrid2d.py +++ b/xugrid/ugrid/ugrid2d.py @@ -2116,10 +2116,7 @@ def from_shapely(geometry: PolygonArray, crs=None) -> "Ugrid2d": @staticmethod def _from_intervals_helper( - node_x: np.ndarray, - node_y: np.ndarray, - nx: int, - ny: int, + node_x: np.ndarray, node_y: np.ndarray, nx: int, ny: int, name: str ) -> "Ugrid2d": linear_index = np.arange(node_x.size, dtype=IntDType).reshape((ny + 1, nx + 1)) # Allocate face_node_connectivity @@ -2135,12 +2132,13 @@ def _from_intervals_helper( face_nodes[:, 1] = linear_index[lower, right].ravel() face_nodes[:, 2] = linear_index[upper, right].ravel() face_nodes[:, 3] = linear_index[upper, left].ravel() - return Ugrid2d(node_x, node_y, -1, face_nodes) + return Ugrid2d(node_x, node_y, -1, face_nodes, name=name) @staticmethod def from_structured_intervals1d( x_intervals: np.ndarray, y_intervals: np.ndarray, + name: str = "mesh2d", ) -> "Ugrid2d": """ Create a Ugrid2d topology from a structured topology based on 1D intervals. @@ -2151,6 +2149,7 @@ def from_structured_intervals1d( x-coordinate interval values for N row and M columns. y_intervals: np.ndarray of shape (N + 1,) y-coordinate interval values for N row and M columns. + name: str """ x_intervals = np.asarray(x_intervals) y_intervals = np.asarray(y_intervals) @@ -2159,12 +2158,13 @@ def from_structured_intervals1d( node_y, node_x = ( a.ravel() for a in np.meshgrid(y_intervals, x_intervals, indexing="ij") ) - return Ugrid2d._from_intervals_helper(node_x, node_y, nx, ny) + return Ugrid2d._from_intervals_helper(node_x, node_y, nx, ny, name=name) @staticmethod def from_structured_intervals2d( x_intervals: np.ndarray, y_intervals: np.ndarray, + name: str = "mesh2d", ) -> "Ugrid2d": """ Create a Ugrid2d topology from a structured topology based on 2D intervals. @@ -2175,6 +2175,7 @@ def from_structured_intervals2d( x-coordinate interval values for N row and M columns. y_intervals: np.ndarray of shape shape (N + 1, M + 1) y-coordinate interval values for N row and M columns. + name: str """ x_intervals = np.asarray(x_intervals) y_intervals = np.asarray(y_intervals) @@ -2190,12 +2191,13 @@ def from_structured_intervals2d( ny = shape[0] - 1 node_x = x_intervals.ravel() node_y = y_intervals.ravel() - return Ugrid2d._from_intervals_helper(node_x, node_y, nx, ny) + return Ugrid2d._from_intervals_helper(node_x, node_y, nx, ny, name=name) @staticmethod def from_structured_bounds( x_bounds: np.ndarray, y_bounds: np.ndarray, + name: str = "mesh2d", ) -> "Ugrid2d": """ Create a Ugrid2d topology from a structured topology based on 1D bounds. @@ -2208,6 +2210,7 @@ def from_structured_bounds( x-coordinate bounds for N row and M columns. y_bounds: np.ndarray of shape (N, 2) y-coordinate bounds for N row and M columns. + name: str Returns ------- @@ -2218,34 +2221,16 @@ def from_structured_bounds( x = conversion.bounds_to_vertices(x_bounds) y = conversion.bounds_to_vertices(y_bounds) node_y, node_x = (a.ravel() for a in np.meshgrid(y, x, indexing="ij")) - return Ugrid2d._from_intervals_helper(node_x, node_y, nx, ny) + return Ugrid2d._from_intervals_helper(node_x, node_y, nx, ny, name) @staticmethod - def from_structured( + def _from_structured_singlecoord( data: Union[xr.DataArray, xr.Dataset], x: str | None = None, y: str | None = None, + name: str = "mesh2d", ) -> "Ugrid2d": - """ - Create a Ugrid2d topology from an axis-aligned rectilinear structured topology. - - This method assumes the coordinates are 1D. - - Use ``from_structured_multicoord`` for 2D x and y coordinates, e.g. for - (approximated) curvilinear and rotated structured topologies. - - Parameters - ---------- - data: xr.DataArray or xr.Dataset - x: str, optional - Name of the 1D coordinate to use as the UGRID x-coordinate. - y: str, optional - Name of the 1D coordinate to use as the UGRID y-coordinate. - - Returns - ------- - grid: Ugrid2d - """ + # This method assumes the coordinates are 1D. if x is None or y is None: x, y = conversion.infer_xy_coords(data) if x is None or y is None: @@ -2255,40 +2240,93 @@ def from_structured( x_intervals = conversion.infer_interval_breaks1d(data, x) y_intervals = conversion.infer_interval_breaks1d(data, y) - return Ugrid2d.from_structured_intervals1d(x_intervals, y_intervals) + return Ugrid2d.from_structured_intervals1d(x_intervals, y_intervals, name) @staticmethod - def from_structured_multicoord( + def _from_structured_multicoord( data: Union[xr.DataArray, xr.Dataset], x: str, y: str, + name: str = "mesh2d", ) -> "Ugrid2d": - """ - Create a Ugrid2d topology from a structured topology, including rotated - and (approximated) curvilinear topologies. + # This method assumes the coordinates are 2D and thereby supports rotated + # or (approximated) curvilinear topologies. + xv = conversion.infer_interval_breaks(data[x], axis=1, check_monotonic=True) + xv = conversion.infer_interval_breaks(xv, axis=0) + yv = conversion.infer_interval_breaks(data[y], axis=1) + yv = conversion.infer_interval_breaks(yv, axis=0, check_monotonic=True) + return Ugrid2d.from_structured_intervals2d(xv, yv, name) - This method assumes the coordinates are 2D. + @staticmethod + def from_structured_multicoord( + data: Union[xr.DataArray, xr.Dataset], + x: str | None = None, + y: str | None = None, + name: str = "mesh2d", + ) -> "Ugrid2d": + warnings.warn( + "Ugrid2d.from_structured_multicoord has been deprecated. " + "Use Ugrid2d.from_structured instead.", + FutureWarning, + ) + return Ugrid2d.from_structured(data, x, y, name) - Use ``from_structured`` for 1D x and y coordinates, which is generally - the case for axis-aligned rectilinear topologies (most rasters). + @staticmethod + def from_structured( + data: Union[xr.DataArray, xr.Dataset], + x: str | None = None, + y: str | None = None, + name: str = "mesh2d", + return_dims: bool = False, + ): + """ + Create a Ugrid2d topology from a structured topology axis-aligned rectilinear, rotated + or (approximated) curvilinear topologies. Parameters ---------- data: xr.DataArray or xr.Dataset - x: str - Name of the 2D coordinate to use as the UGRID x-coordinate. - y: str - Name of the 2D coordinate to use as the UGRID y-coordinate. + x: str, optional + Name of the 1D or 2D coordinate to use as the UGRID x-coordinate. + y: str, optional + Name of the 1D or 2D coordinate to use as the UGRID y-coordinate. + return_dims: bool + If True, returns a tuple containing the name of the y and x dimensions. Returns ------- grid: Ugrid2d + dims: tuple of str, optional + Provided if ``return_dims`` is True. """ - xv = conversion.infer_interval_breaks(data[x], axis=1, check_monotonic=True) - xv = conversion.infer_interval_breaks(xv, axis=0) - yv = conversion.infer_interval_breaks(data[y], axis=1) - yv = conversion.infer_interval_breaks(yv, axis=0, check_monotonic=True) - return Ugrid2d.from_structured_intervals2d(xv, yv) + if (x is None) ^ (y is None): + raise ValueError("Provide both x and y, or neither.") + if x is None: + x, y = conversion.infer_xy_coords(data) + else: + coords = set(data.coords) + missing_coords = coords - {x, y} + if missing_coords: + raise ValueError( + f"Coordinates {x} and {y} are not present, " + f"expected one of: {coords}" + ) + + # Find out if it's multi-dimensional + ndim = data[x].ndim + if ndim == 1: + grid = Ugrid2d._from_structured_singlecoord(data, x=x, y=y, name=name) + dims = (data[y].dims[0], data[x].dims[0]) + elif ndim == 2: + grid = Ugrid2d._from_structured_multicoord(data, x=x, y=y, name=name) + dims = tuple(data[x].dims) + else: + raise ValueError(f"x and y must be 1D or 2D. Found: {ndim}") + + if return_dims: + return grid, dims + else: + return grid def to_shapely(self, dim): """ From a237b0de8afc052446000f11e191f5c92541364f Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Sun, 20 Oct 2024 21:12:31 +0200 Subject: [PATCH 3/5] Basic implementation for UgridDataset.from_structured -- WIP --- xugrid/core/wrap.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/xugrid/core/wrap.py b/xugrid/core/wrap.py index eab165921..f47ec9fff 100644 --- a/xugrid/core/wrap.py +++ b/xugrid/core/wrap.py @@ -408,9 +408,7 @@ def from_geodataframe(geodataframe: "geopandas.GeoDataFrame"): # type: ignore # return UgridDataset(ds, [grid]) @staticmethod - def from_structured( - ds: xr.DataArray, topology: dict | None = None - ) -> "UgridDataArray": + def from_structured(ds: xr.Dataset, topology: dict | None = None) -> "UgridDataset": """ Create a UgridDataset from a (structured) xarray Dataset. @@ -422,11 +420,11 @@ def from_structured( Parameters ---------- - da: xr.DataArray - Last two dimensions must be ``("y", "x")``. + ds: xr.Dataset topology: dict, optional, default is None. Mapping of topology name to x and y coordinate variables. - If None, a single mesh2d topology is assumed. + If None, searches for "x" and "y" coordinates and creates a Ugrid2d + topology with name "mesh2d". Returns ------- @@ -434,11 +432,14 @@ def from_structured( """ if topology is None: topology = {"mesh2d": ("x", "y")} - return + dataset = ds + grids = [] + for name, (x, y) in topology.items(): + stackdims, grid = Ugrid2d.from_structured( + ds, x=x, y=y, name=name, return_dims=True + ) + dataset = dataset.stack(*stackdims).drop_vars(stackdims, errors="ignore") # noqa: PD013 + grids.append(grid) -# grids = [] -# datasets = [] -# for key, (xcoord, ycoord) in topology.items(): -# grid = Ugrid2d.from_structured -# selection = ds.isel() + return UgridDataset(dataset, grids) From 8c5535fda27691e87575346055e803203561692c Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 21 Oct 2024 10:07:50 +0200 Subject: [PATCH 4/5] Finish UgridDataset.from_structured. --- docs/api.rst | 1 + docs/changelog.rst | 14 ++- tests/test_ugrid_dataset.py | 164 +++++++++++++++++++++++------------- xugrid/core/wrap.py | 67 ++++++++++++--- xugrid/ugrid/ugrid2d.py | 2 +- 5 files changed, 173 insertions(+), 75 deletions(-) diff --git a/docs/api.rst b/docs/api.rst index 62540dfe5..be9e2df9e 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -55,6 +55,7 @@ UgridDataset UgridDataset UgridDataset.ugrid UgridDataset.from_geodataframe + UgridDataset.from_structured UGRID Accessor -------------- diff --git a/docs/changelog.rst b/docs/changelog.rst index 8d2e44d84..a56f084ed 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -15,10 +15,16 @@ Changed - :meth:`xugrid.UgridDataArrayAccessor.from_structured` previously required the literal dimensions ``("y", "x")``. This requirement has been relaxed, it will now infer the dimensions from the provided coordinates. -- :meth:`xugrid.Ugrid2d.from_structured` previously only supported 1D coordinates; - it now detects whether coordinates are 1D or 2D automatically. Accordingly, - :meth:`xugrid.Ugrid2d.from_structured_multicoord` should no longer be used, - and calling it will give a FutureWarning. +- :meth:`xugrid.Ugrid2d.from_structured` previously only supported 1D + coordinates; it now detects whether coordinates are 1D or 2D automatically. + Accordingly, :meth:`xugrid.Ugrid2d.from_structured_multicoord` should no + longer be used, and calling it will give a FutureWarning. + +Added +~~~~~ + +- :meth:`xugrid.UgridDataset.from_structured` has been added to create + UgriDatasets from xarray Datasets. [0.12.1] 2024-09-09 ------------------- diff --git a/tests/test_ugrid_dataset.py b/tests/test_ugrid_dataset.py index 8a1df92f2..8ce49d347 100644 --- a/tests/test_ugrid_dataset.py +++ b/tests/test_ugrid_dataset.py @@ -162,63 +162,6 @@ def test_rename(self): renamed = self.uda.ugrid.rename("renamed") assert "renamed_nFaces" in renamed.dims - def test_from_structured(self): - da = xr.DataArray([0.0, 1.0, 2.0], {"x": [5.0, 10.0, 15.0]}, ["x"]) - with pytest.raises( - ValueError, match="DataArray must have at least two spatial dimensions" - ): - xugrid.UgridDataArray.from_structured(da) - - da = xr.DataArray( - data=np.arange(2 * 3 * 4).reshape((2, 3, 4)), - coords={"layer": [1, 2], "y": [5.0, 10.0, 15.0], "x": [2.0, 4.0, 6.0, 8.0]}, - dims=["layer", "y", "x"], - name="grid", - ) - - with pytest.raises(ValueError, match="Provide both x and y, or neither."): - xugrid.UgridDataArray.from_structured(da, x="this") - with pytest.raises(ValueError, match="Coordinates xc and yc are not present."): - xugrid.UgridDataArray.from_structured(da, x="xc", y="yc") - - uda = xugrid.UgridDataArray.from_structured(da) - assert isinstance(uda, xugrid.UgridDataArray) - assert uda.name == "grid" - assert uda.dims == ("layer", "mesh2d_nFaces") - assert uda.shape == (2, 12) - assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) - # Check whether flipping the y-axis doesn't cause any problems - flipped = da.isel(y=slice(None, None, -1)) - uda = xugrid.UgridDataArray.from_structured(flipped) - assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) - - daT = da.transpose() - uda = xugrid.UgridDataArray.from_structured(daT) - assert isinstance(uda, xugrid.UgridDataArray) - assert uda.name == "grid" - assert uda.dims == ("layer", "mesh2d_nFaces") - assert uda.shape == (2, 12) - assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) - - def test_from_structured_multicoord(self): - da = xr.DataArray( - data=[[0, 1], [2, 3]], - coords={ - "yc": (("y", "x"), [[12.0, 11.0], [12.0, 11.0]]), - "xc": (("y", "x"), [[10.0, 12.0], [10.0, 12.0]]), - }, - dims=("y", "x"), - ) - uda = xugrid.UgridDataArray.from_structured(da) - assert isinstance(uda, xugrid.UgridDataArray) - assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [-0.5, 0.5, 1.5]) - assert np.array_equal(uda.data, [0, 1, 2, 3]) - - uda = xugrid.UgridDataArray.from_structured(da, x="xc", y="yc") - assert isinstance(uda, xugrid.UgridDataArray) - assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [9.0, 11.0, 13.0]) - assert np.array_equal(uda.data, [0, 1, 2, 3]) - def test_unary_op(self): alltrue = self.uda.astype(bool) allfalse = alltrue.copy() @@ -822,6 +765,113 @@ def test_reindex_like(self): assert isinstance(back, xugrid.UgridDataset) +class TestFromStructured: + @pytest.fixture(autouse=True) + def setup(self): + self.da1d = xr.DataArray( + [0.0, 1.0, 2.0, 3.0], {"x": [2.0, 4.0, 6.0, 8.0]}, ["x"] + ) + self.da2d = xr.DataArray( + data=np.arange(2 * 3 * 4).reshape((2, 3, 4)), + coords={"layer": [1, 2], "y": [5.0, 10.0, 15.0], "x": [2.0, 4.0, 6.0, 8.0]}, + dims=["layer", "y", "x"], + name="grid", + ) + self.da_coords2d = xr.DataArray( + data=[[0, 1], [2, 3]], + coords={ + "yc": (("y", "x"), [[12.0, 11.0], [12.0, 11.0]]), + "xc": (("y", "x"), [[10.0, 12.0], [10.0, 12.0]]), + }, + dims=("y", "x"), + ) + self.ds = xr.Dataset( + { + "a": self.da2d, + "b": self.da1d, + "c": 1.0, + } + ) + + def test_error_1d(self): + with pytest.raises( + ValueError, match="DataArray must have at least two spatial dimensions" + ): + xugrid.UgridDataArray.from_structured(self.da1d) + + def test_error_x_xor_y(self): + with pytest.raises(ValueError, match="Provide both x and y, or neither."): + xugrid.UgridDataArray.from_structured(self.da2d, x="this") + + def test_missing_xy(self): + with pytest.raises(ValueError, match="Coordinates xc and yc are not present."): + xugrid.UgridDataArray.from_structured(self.da2d, x="xc", y="yc") + + def test_from_dataarray(self): + uda = xugrid.UgridDataArray.from_structured(self.da2d) + assert isinstance(uda, xugrid.UgridDataArray) + assert uda.name == "grid" + assert uda.dims == ("layer", "mesh2d_nFaces") + assert uda.shape == (2, 12) + assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) + # Check whether flipping the y-axis doesn't cause any problems + flipped = self.da2d.isel(y=slice(None, None, -1)) + uda = xugrid.UgridDataArray.from_structured(flipped) + assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) + + # And test transposed. + daT = self.da2d.transpose() + uda = xugrid.UgridDataArray.from_structured(daT) + assert isinstance(uda, xugrid.UgridDataArray) + assert uda.name == "grid" + assert uda.dims == ("layer", "mesh2d_nFaces") + assert uda.shape == (2, 12) + assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) + + def test_from_multicoord(self): + uda = xugrid.UgridDataArray.from_structured(self.da_coords2d) + assert isinstance(uda, xugrid.UgridDataArray) + assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [-0.5, 0.5, 1.5]) + assert np.array_equal(uda.data, [0, 1, 2, 3]) + + uda = xugrid.UgridDataArray.from_structured(self.da_coords2d, x="xc", y="yc") + assert isinstance(uda, xugrid.UgridDataArray) + assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [9.0, 11.0, 13.0]) + assert np.array_equal(uda.data, [0, 1, 2, 3]) + + def test_from_dataset(self): + uds = xugrid.UgridDataset.from_structured(self.ds) + assert isinstance(uds, xugrid.UgridDataset) + assert set(uds.data_vars) == {"a", "b", "c"} + assert uds["a"].dims == ("layer", "mesh2d_nFaces") + assert uds["b"].dims == ("x",) + assert uds["c"].dims == () + uda = uds["a"] + assert uda.shape == (2, 12) + assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]]) + + def test_from_multicoord_dataset(self): + ds = self.ds.copy() + da = self.da_coords2d.rename({"x": "x1", "y": "y1"}) + ds["d"] = da + # Unspecified: it'll only infer x and y. + uds = xugrid.UgridDataset.from_structured(ds) + assert isinstance(uds, xugrid.UgridDataset) + assert uds["a"].dims == ("layer", "mesh2d_nFaces") + assert uds["d"].dims == ("y1", "x1") + assert len(uds.ugrid.grids) == 1 + # Now specify separate topologies. + uds = xugrid.UgridDataset.from_structured( + ds, {"mesh2d_0": ("x", "y"), "mesh2d_1": ("xc", "yc")} + ) + assert isinstance(uds, xugrid.UgridDataset) + assert uds["a"].dims == ("layer", "mesh2d_0_nFaces") + assert uds["b"].dims == ("x",) + assert uds["c"].dims == () + assert uds["d"].dims == ("mesh2d_1_nFaces",) + assert len(uds.ugrid.grids) == 2 + + def test_multiple_coordinates(): grid = GRID() ds = UGRID_DS() diff --git a/xugrid/core/wrap.py b/xugrid/core/wrap.py index f47ec9fff..7ece17cc6 100644 --- a/xugrid/core/wrap.py +++ b/xugrid/core/wrap.py @@ -408,38 +408,79 @@ def from_geodataframe(geodataframe: "geopandas.GeoDataFrame"): # type: ignore # return UgridDataset(ds, [grid]) @staticmethod - def from_structured(ds: xr.Dataset, topology: dict | None = None) -> "UgridDataset": + def from_structured( + dataset: xr.Dataset, topology: dict | None = None + ) -> "UgridDataset": """ Create a UgridDataset from a (structured) xarray Dataset. The spatial dimensions are flattened into a single UGRID face dimension. - By default, this method looks for the "x" and "y" coordinates and assumes - they are one-dimensional. To convert rotated or curvilinear coordinates, - provide the names of the x and y coordinates. + By default, this method looks for the ``"x"`` and ``"y"`` coordinates + and assumes they are one-dimensional. To convert rotated or curvilinear + coordinates, provide the names of the x and y coordinates. Parameters ---------- - ds: xr.Dataset + dataset: xr.Dataset topology: dict, optional, default is None. Mapping of topology name to x and y coordinate variables. - If None, searches for "x" and "y" coordinates and creates a Ugrid2d - topology with name "mesh2d". + If None, defaults to ``{"mesh2d": ("x", "y")}``. Returns ------- unstructured: UgridDataset + + Examples + -------- + By default, this method will look for 1D ``"x"`` and ``"y"`` + coordinates and returns a UgriDataset with a Ugrid topology named + mesh2d: + + >>> uds = xugrid.UgridDataset.from_structured(dataset) + + In case of rotated or curvilinear coordinates, the name of the + resulting UGRID topology and the x and y coordinates must be specified: + + >>> uds = xugrid.UgridDataset.from_structured( + >>> dataset, + >>> topology={"mesh2d": ("xc", "yc")}, + >>> ) + + In case of multiple grid topologies in a single dataset, these must be + specified as well: + + >>> uds = xugrid.UgridDataset.from_structured( + >>> dataset, + >>> topology={"mesh2d_xy": ("x", "y"), "mesh2d_lonlat": {"lon", "lat"}, + >>> ) """ if topology is None: topology = {"mesh2d": ("x", "y")} - dataset = ds grids = [] + dss = [] for name, (x, y) in topology.items(): - stackdims, grid = Ugrid2d.from_structured( - ds, x=x, y=y, name=name, return_dims=True + grid, stackdims = Ugrid2d.from_structured( + dataset, x=x, y=y, name=name, return_dims=True + ) + # Use subset to check that ALL dims of stackdims are present in the + # variable. + checkdims = set(stackdims) + ugrid_vars = [ + name + for name, var in dataset.data_vars.items() + if checkdims.issubset(var.dims) + ] + dss.append( + dataset[ugrid_vars] # noqa: PD013 + .stack({grid.face_dimension: stackdims}) + .drop_vars(stackdims + (grid.face_dimension,)) ) - dataset = dataset.stack(*stackdims).drop_vars(stackdims, errors="ignore") # noqa: PD013 grids.append(grid) - - return UgridDataset(dataset, grids) + # Add the original dataset to include all non-UGRID variables. + dss.append(dataset) + # Then merge with compat="override". This'll pick the first available + # variable: i.e. it will prioritize the UGRID form. + merged = xr.merge(dss, compat="override") + return UgridDataset(merged, grids) diff --git a/xugrid/ugrid/ugrid2d.py b/xugrid/ugrid/ugrid2d.py index bc30b73c6..e9c21a34a 100644 --- a/xugrid/ugrid/ugrid2d.py +++ b/xugrid/ugrid/ugrid2d.py @@ -2305,7 +2305,7 @@ def from_structured( x, y = conversion.infer_xy_coords(data) else: coords = set(data.coords) - missing_coords = coords - {x, y} + missing_coords = {x, y} - coords if missing_coords: raise ValueError( f"Coordinates {x} and {y} are not present, " From e111a647e0a6c41207131ce3c973d1243ca74e1e Mon Sep 17 00:00:00 2001 From: Huite Bootsma Date: Mon, 21 Oct 2024 10:26:44 +0200 Subject: [PATCH 5/5] Tweak docstrings --- xugrid/core/wrap.py | 43 +++++++++++++++++++++++++++-------------- xugrid/ugrid/ugrid2d.py | 11 +++++++++++ 2 files changed, 40 insertions(+), 14 deletions(-) diff --git a/xugrid/core/wrap.py b/xugrid/core/wrap.py index 7ece17cc6..8e183ac09 100644 --- a/xugrid/core/wrap.py +++ b/xugrid/core/wrap.py @@ -243,9 +243,16 @@ def from_structured( The spatial dimensions are flattened into a single UGRID face dimension. - By default, this method looks for the "x" and "y" coordinates and assumes - they are one-dimensional. To convert rotated or curvilinear coordinates, - provide the names of the x and y coordinates. + By default, this method looks for: + + 1. ``"x"`` and ``"y"`` dimensions. + 2. ``"longitude"`` and ``"latitude"`` dimensions. + 3. ``"axis"`` attributes of "X" or "Y" on coordinates. + 4. ``"standard_name"`` attributes of "longitude", "latitude", + "projection_x_coordinate", or "project_y_coordinate" on coordinate + variables. + + Specify the x and y coordinate names explicitly otherwise. Parameters ---------- @@ -416,16 +423,24 @@ def from_structured( The spatial dimensions are flattened into a single UGRID face dimension. - By default, this method looks for the ``"x"`` and ``"y"`` coordinates - and assumes they are one-dimensional. To convert rotated or curvilinear - coordinates, provide the names of the x and y coordinates. + By default, this method looks for: + + 1. ``"x"`` and ``"y"`` dimensions. + 2. ``"longitude"`` and ``"latitude"`` dimensions. + 3. ``"axis"`` attributes of "X" or "Y" on coordinates. + 4. ``"standard_name"`` attributes of "longitude", "latitude", + "projection_x_coordinate", or "project_y_coordinate" on coordinate + variables. + + Specify the x and y coordinate names explicitly otherwise, see the + examples. Parameters ---------- dataset: xr.Dataset topology: dict, optional, default is None. Mapping of topology name to x and y coordinate variables. - If None, defaults to ``{"mesh2d": ("x", "y")}``. + If None, defaults to ``{"mesh2d": (None, None)}``. Returns ------- @@ -433,22 +448,22 @@ def from_structured( Examples -------- - By default, this method will look for 1D ``"x"`` and ``"y"`` + By default, this method will look for ``"x"`` and ``"y"`` coordinates and returns a UgriDataset with a Ugrid topology named mesh2d: >>> uds = xugrid.UgridDataset.from_structured(dataset) - In case of rotated or curvilinear coordinates, the name of the - resulting UGRID topology and the x and y coordinates must be specified: + In case of other names, the name of the resulting UGRID topology and + the x and y coordinates must be specified: >>> uds = xugrid.UgridDataset.from_structured( >>> dataset, - >>> topology={"mesh2d": ("xc", "yc")}, + >>> topology={"my_mesh2d": ("xc", "yc")}, >>> ) - In case of multiple grid topologies in a single dataset, these must be - specified as well: + In case of multiple grid topologies in a single dataset, the names must + be specified as well: >>> uds = xugrid.UgridDataset.from_structured( >>> dataset, @@ -456,7 +471,7 @@ def from_structured( >>> ) """ if topology is None: - topology = {"mesh2d": ("x", "y")} + topology = {"mesh2d": (None, None)} grids = [] dss = [] diff --git a/xugrid/ugrid/ugrid2d.py b/xugrid/ugrid/ugrid2d.py index e9c21a34a..547262f28 100644 --- a/xugrid/ugrid/ugrid2d.py +++ b/xugrid/ugrid/ugrid2d.py @@ -2283,6 +2283,17 @@ def from_structured( Create a Ugrid2d topology from a structured topology axis-aligned rectilinear, rotated or (approximated) curvilinear topologies. + By default, this method looks for: + + 1. ``"x"`` and ``"y"`` dimensions. + 2. ``"longitude"`` and ``"latitude"`` dimensions. + 3. ``"axis"`` attributes of "X" or "Y" on coordinates. + 4. ``"standard_name"`` attributes of "longitude", "latitude", + "projection_x_coordinate", or "project_y_coordinate" on coordinate + variables. + + Specify the x and y coordinate names explicitly otherwise. + Parameters ---------- data: xr.DataArray or xr.Dataset