Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

From structured dataset #303

Merged
merged 5 commits into from
Oct 21, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ UgridDataset
UgridDataset
UgridDataset.ugrid
UgridDataset.from_geodataframe
UgridDataset.from_structured

UGRID Accessor
--------------
Expand Down Expand Up @@ -348,7 +349,6 @@ UGRID2D Topology
Ugrid2d.to_dataset
Ugrid2d.from_geodataframe
Ugrid2d.from_structured
Ugrid2d.from_structured_multicoord
Ugrid2d.from_structured_bounds
Ugrid2d.from_structured_intervals1d
Ugrid2d.from_structured_intervals2d
Expand Down
24 changes: 22 additions & 2 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,26 @@ All notable changes to this project will be documented in this file.
The format is based on `Keep a Changelog`_, and this project adheres to
`Semantic Versioning`_.

Unreleased
----------

Changed
~~~~~~~

- :meth:`xugrid.UgridDataArrayAccessor.from_structured` previously required the
literal dimensions ``("y", "x")``. This requirement has been relaxed, it will
now infer the dimensions from the provided coordinates.
- :meth:`xugrid.Ugrid2d.from_structured` previously only supported 1D
coordinates; it now detects whether coordinates are 1D or 2D automatically.
Accordingly, :meth:`xugrid.Ugrid2d.from_structured_multicoord` should no
longer be used, and calling it will give a FutureWarning.

Added
~~~~~

- :meth:`xugrid.UgridDataset.from_structured` has been added to create
UgriDatasets from xarray Datasets.

[0.12.1] 2024-09-09
-------------------

Expand Down Expand Up @@ -78,7 +98,7 @@ Changed
- Selection operations such as :meth:`UgridDataArrayAccessor.sel_points` will
now also return points that are located on the edges of 2D topologies.
- :attr:`xugrid.Ugrid1d.dimensions` and :attr:`xugrid.Ugrid2d.dimensions` now
raise a FutureWarning; use ``.dims`` or ``.sizes`` instead.
give a FutureWarning; use ``.dims`` or ``.sizes`` instead.
- Improved performance of :func:`xugrid.open_dataset` and
:func:`xugrid.merge_partitions` when handling datasets with a large number
of variables (>100).
Expand Down Expand Up @@ -827,4 +847,4 @@ Added
------------------

.. _Keep a Changelog: https://keepachangelog.com/en/1.0.0/
.. _Semantic Versioning: https://semver.org/spec/v2.0.0.html
.. _Semantic Versioning: https://semver.org/spec/v2.0.0.html
4 changes: 2 additions & 2 deletions tests/test_ugrid2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,7 +1217,7 @@ def test_from_structured():
assert grid.n_face == 4


def test_form_structured_multicoord():
def test_from_structured_multicoord():
da = xr.DataArray(
data=np.ones((2, 2)),
coords={
Expand All @@ -1226,7 +1226,7 @@ def test_form_structured_multicoord():
},
dims=("y", "x"),
)
grid = xugrid.Ugrid2d.from_structured_multicoord(da, x="xc", y="yc")
grid = xugrid.Ugrid2d._from_structured_multicoord(da, x="xc", y="yc")
assert isinstance(grid, xugrid.Ugrid2d)
assert grid.n_face == 4

Expand Down
148 changes: 107 additions & 41 deletions tests/test_ugrid_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,47 +162,6 @@ def test_rename(self):
renamed = self.uda.ugrid.rename("renamed")
assert "renamed_nFaces" in renamed.dims

def test_from_structured(self):
da = xr.DataArray([0.0, 1.0, 2.0], {"x": [5.0, 10.0, 15.0]}, ["x"])
with pytest.raises(ValueError, match="Last two dimensions of da"):
xugrid.UgridDataArray.from_structured(da)

da = xr.DataArray(
data=np.arange(2 * 3 * 4).reshape((2, 3, 4)),
coords={"layer": [1, 2], "y": [5.0, 10.0, 15.0], "x": [2.0, 4.0, 6.0, 8.0]},
dims=["layer", "y", "x"],
name="grid",
)
uda = xugrid.UgridDataArray.from_structured(da)
assert isinstance(uda, xugrid.UgridDataArray)
assert uda.name == "grid"
assert uda.dims == ("layer", "mesh2d_nFaces")
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])
# Check whether flipping the y-axis doesn't cause any problems
flipped = da.isel(y=slice(None, None, -1))
uda = xugrid.UgridDataArray.from_structured(flipped)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

def test_from_structured_multicoord(self):
da = xr.DataArray(
data=[[0, 1], [2, 3]],
coords={
"yc": (("y", "x"), [[12.0, 11.0], [12.0, 11.0]]),
"xc": (("y", "x"), [[10.0, 12.0], [10.0, 12.0]]),
},
dims=("y", "x"),
)
uda = xugrid.UgridDataArray.from_structured(da)
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [-0.5, 0.5, 1.5])
assert np.array_equal(uda.data, [0, 1, 2, 3])

uda = xugrid.UgridDataArray.from_structured(da, x="xc", y="yc")
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [9.0, 11.0, 13.0])
assert np.array_equal(uda.data, [0, 1, 2, 3])

def test_unary_op(self):
alltrue = self.uda.astype(bool)
allfalse = alltrue.copy()
Expand Down Expand Up @@ -806,6 +765,113 @@ def test_reindex_like(self):
assert isinstance(back, xugrid.UgridDataset)


class TestFromStructured:
@pytest.fixture(autouse=True)
def setup(self):
self.da1d = xr.DataArray(
[0.0, 1.0, 2.0, 3.0], {"x": [2.0, 4.0, 6.0, 8.0]}, ["x"]
)
self.da2d = xr.DataArray(
data=np.arange(2 * 3 * 4).reshape((2, 3, 4)),
coords={"layer": [1, 2], "y": [5.0, 10.0, 15.0], "x": [2.0, 4.0, 6.0, 8.0]},
dims=["layer", "y", "x"],
name="grid",
)
self.da_coords2d = xr.DataArray(
data=[[0, 1], [2, 3]],
coords={
"yc": (("y", "x"), [[12.0, 11.0], [12.0, 11.0]]),
"xc": (("y", "x"), [[10.0, 12.0], [10.0, 12.0]]),
},
dims=("y", "x"),
)
self.ds = xr.Dataset(
{
"a": self.da2d,
"b": self.da1d,
"c": 1.0,
}
)

def test_error_1d(self):
with pytest.raises(
ValueError, match="DataArray must have at least two spatial dimensions"
):
xugrid.UgridDataArray.from_structured(self.da1d)

def test_error_x_xor_y(self):
with pytest.raises(ValueError, match="Provide both x and y, or neither."):
xugrid.UgridDataArray.from_structured(self.da2d, x="this")

def test_missing_xy(self):
with pytest.raises(ValueError, match="Coordinates xc and yc are not present."):
xugrid.UgridDataArray.from_structured(self.da2d, x="xc", y="yc")

def test_from_dataarray(self):
uda = xugrid.UgridDataArray.from_structured(self.da2d)
assert isinstance(uda, xugrid.UgridDataArray)
assert uda.name == "grid"
assert uda.dims == ("layer", "mesh2d_nFaces")
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])
# Check whether flipping the y-axis doesn't cause any problems
flipped = self.da2d.isel(y=slice(None, None, -1))
uda = xugrid.UgridDataArray.from_structured(flipped)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

# And test transposed.
daT = self.da2d.transpose()
uda = xugrid.UgridDataArray.from_structured(daT)
assert isinstance(uda, xugrid.UgridDataArray)
assert uda.name == "grid"
assert uda.dims == ("layer", "mesh2d_nFaces")
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

def test_from_multicoord(self):
uda = xugrid.UgridDataArray.from_structured(self.da_coords2d)
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [-0.5, 0.5, 1.5])
assert np.array_equal(uda.data, [0, 1, 2, 3])

uda = xugrid.UgridDataArray.from_structured(self.da_coords2d, x="xc", y="yc")
assert isinstance(uda, xugrid.UgridDataArray)
assert np.array_equal(np.unique(uda.ugrid.grid.node_x), [9.0, 11.0, 13.0])
assert np.array_equal(uda.data, [0, 1, 2, 3])

def test_from_dataset(self):
uds = xugrid.UgridDataset.from_structured(self.ds)
assert isinstance(uds, xugrid.UgridDataset)
assert set(uds.data_vars) == {"a", "b", "c"}
assert uds["a"].dims == ("layer", "mesh2d_nFaces")
assert uds["b"].dims == ("x",)
assert uds["c"].dims == ()
uda = uds["a"]
assert uda.shape == (2, 12)
assert np.allclose(uda.ugrid.sel(x=2.0, y=5.0), [[0], [12]])

def test_from_multicoord_dataset(self):
ds = self.ds.copy()
da = self.da_coords2d.rename({"x": "x1", "y": "y1"})
ds["d"] = da
# Unspecified: it'll only infer x and y.
uds = xugrid.UgridDataset.from_structured(ds)
assert isinstance(uds, xugrid.UgridDataset)
assert uds["a"].dims == ("layer", "mesh2d_nFaces")
assert uds["d"].dims == ("y1", "x1")
assert len(uds.ugrid.grids) == 1
# Now specify separate topologies.
uds = xugrid.UgridDataset.from_structured(
ds, {"mesh2d_0": ("x", "y"), "mesh2d_1": ("xc", "yc")}
)
assert isinstance(uds, xugrid.UgridDataset)
assert uds["a"].dims == ("layer", "mesh2d_0_nFaces")
assert uds["b"].dims == ("x",)
assert uds["c"].dims == ()
assert uds["d"].dims == ("mesh2d_1_nFaces",)
assert len(uds.ugrid.grids) == 2


def test_multiple_coordinates():
grid = GRID()
ds = UGRID_DS()
Expand Down
134 changes: 106 additions & 28 deletions xugrid/core/wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,14 +243,21 @@ def from_structured(

The spatial dimensions are flattened into a single UGRID face dimension.

By default, this method looks for the "x" and "y" coordinates and assumes
they are one-dimensional. To convert rotated or curvilinear coordinates,
provide the names of the x and y coordinates.
By default, this method looks for:

1. ``"x"`` and ``"y"`` dimensions.
2. ``"longitude"`` and ``"latitude"`` dimensions.
3. ``"axis"`` attributes of "X" or "Y" on coordinates.
4. ``"standard_name"`` attributes of "longitude", "latitude",
"projection_x_coordinate", or "project_y_coordinate" on coordinate
variables.

Specify the x and y coordinate names explicitly otherwise.

Parameters
----------
da: xr.DataArray
Last two dimensions must be ``("y", "x")``.
Last two dimensions must be the y and x dimension (in that order!).
x: str, default: None
Which coordinate to use as the UGRID x-coordinate.
y: str, default: None
Expand All @@ -260,30 +267,15 @@ def from_structured(
-------
unstructured: UgridDataArray
"""
if da.dims[-2:] != ("y", "x"):
raise ValueError('Last two dimensions of da must be ("y", "x")')
if (x is None) ^ (y is None):
raise ValueError("Provide both x and y, or neither.")
if x is None:
grid = Ugrid2d.from_structured(da)
else:
# Find out if it's multi-dimensional
xdim = da[x].ndim
if xdim == 1:
grid = Ugrid2d.from_structured(da, x=x, y=y)
elif xdim == 2:
grid = Ugrid2d.from_structured_multicoord(da, x=x, y=y)
else:
raise ValueError(f"x and y must be 1D or 2D. Found: {xdim}")

dims = da.dims[:-2]
coords = {k: da.coords[k] for k in dims}
face_da = xr.DataArray(
da.data.reshape(*da.shape[:-2], -1),
coords=coords,
dims=[*dims, grid.face_dimension],
name=da.name,
)
if da.ndim < 2:
raise ValueError(
"DataArray must have at least two spatial dimensions. "
f"Found: {da.dims}."
)
grid, stackdims = Ugrid2d.from_structured(da, x, y, return_dims=True)
face_da = da.stack( # noqa: PD013
{grid.face_dimension: stackdims}, create_index=False
).drop_vars(stackdims, errors="ignore")
return UgridDataArray(face_da, grid)

@staticmethod
Expand Down Expand Up @@ -421,3 +413,89 @@ def from_geodataframe(geodataframe: "geopandas.GeoDataFrame"): # type: ignore #
grid = grid_from_geodataframe(geodataframe)
ds = xr.Dataset.from_dataframe(geodataframe.drop("geometry", axis=1))
return UgridDataset(ds, [grid])

@staticmethod
def from_structured(
dataset: xr.Dataset, topology: dict | None = None
) -> "UgridDataset":
"""
Create a UgridDataset from a (structured) xarray Dataset.

The spatial dimensions are flattened into a single UGRID face dimension.

By default, this method looks for:

1. ``"x"`` and ``"y"`` dimensions.
2. ``"longitude"`` and ``"latitude"`` dimensions.
3. ``"axis"`` attributes of "X" or "Y" on coordinates.
4. ``"standard_name"`` attributes of "longitude", "latitude",
"projection_x_coordinate", or "project_y_coordinate" on coordinate
variables.

Specify the x and y coordinate names explicitly otherwise, see the
examples.

Parameters
----------
dataset: xr.Dataset
topology: dict, optional, default is None.
Mapping of topology name to x and y coordinate variables.
If None, defaults to ``{"mesh2d": (None, None)}``.

Returns
-------
unstructured: UgridDataset

Examples
--------
By default, this method will look for ``"x"`` and ``"y"``
coordinates and returns a UgriDataset with a Ugrid topology named
mesh2d:

>>> uds = xugrid.UgridDataset.from_structured(dataset)

In case of other names, the name of the resulting UGRID topology and
the x and y coordinates must be specified:

>>> uds = xugrid.UgridDataset.from_structured(
>>> dataset,
>>> topology={"my_mesh2d": ("xc", "yc")},
>>> )

In case of multiple grid topologies in a single dataset, the names must
be specified as well:

>>> uds = xugrid.UgridDataset.from_structured(
>>> dataset,
>>> topology={"mesh2d_xy": ("x", "y"), "mesh2d_lonlat": {"lon", "lat"},
>>> )
"""
if topology is None:
topology = {"mesh2d": (None, None)}

grids = []
dss = []
for name, (x, y) in topology.items():
grid, stackdims = Ugrid2d.from_structured(
dataset, x=x, y=y, name=name, return_dims=True
)
# Use subset to check that ALL dims of stackdims are present in the
# variable.
checkdims = set(stackdims)
ugrid_vars = [
name
for name, var in dataset.data_vars.items()
if checkdims.issubset(var.dims)
]
dss.append(
dataset[ugrid_vars] # noqa: PD013
.stack({grid.face_dimension: stackdims})
.drop_vars(stackdims + (grid.face_dimension,))
)
grids.append(grid)
# Add the original dataset to include all non-UGRID variables.
dss.append(dataset)
# Then merge with compat="override". This'll pick the first available
# variable: i.e. it will prioritize the UGRID form.
merged = xr.merge(dss, compat="override")
return UgridDataset(merged, grids)
Loading
Loading