From fa0b07c684fa061c1e2848076df947c88edea9a0 Mon Sep 17 00:00:00 2001 From: Florian Pinault Date: Thu, 27 Feb 2025 10:37:28 +0000 Subject: [PATCH] feat: fix documentation to refer to anemoi datasets instead of zarr datasets --- .../graphs/node_attributes/boolean_operations.rst | 2 +- .../docs/graphs/node_attributes/zarr_attribute.rst | 6 +++--- .../docs/graphs/node_coordinates/zarr_dataset.rst | 14 +++++++------- graphs/docs/index.rst | 4 ++-- graphs/docs/overview.rst | 4 ++-- graphs/src/anemoi/graphs/nodes/attributes.py | 6 +++--- .../src/anemoi/graphs/nodes/builders/from_file.py | 2 +- models/docs/modules/data_indices.rst | 2 +- training/src/anemoi/training/data/dataset.py | 4 ++-- .../schemas/graphs/node_attributes_schemas.py | 7 +++++-- .../anemoi/training/schemas/graphs/node_schemas.py | 2 +- 11 files changed, 28 insertions(+), 25 deletions(-) diff --git a/graphs/docs/graphs/node_attributes/boolean_operations.rst b/graphs/docs/graphs/node_attributes/boolean_operations.rst index 53d7e162..307fa4d6 100644 --- a/graphs/docs/graphs/node_attributes/boolean_operations.rst +++ b/graphs/docs/graphs/node_attributes/boolean_operations.rst @@ -6,7 +6,7 @@ of boolean opearations to support these operations when defining node attributes. Below, an attribute `mask` is computed as the intersection of two other masks, that are generated as the non-missing values in 2 -different variables in a Zarr dataset. +different variables in a anemoi dataset. .. literalinclude:: ../yaml/attributes_boolean_operation.yaml :language: yaml diff --git a/graphs/docs/graphs/node_attributes/zarr_attribute.rst b/graphs/docs/graphs/node_attributes/zarr_attribute.rst index 643aade9..deb3cdae 100644 --- a/graphs/docs/graphs/node_attributes/zarr_attribute.rst +++ b/graphs/docs/graphs/node_attributes/zarr_attribute.rst @@ -1,6 +1,6 @@ -################### - From Zarr dataset -################### +##################### + From anemoi dataset +##################### Zarr datasets are the standard format to define data nodes in :ref:`anemoi-graphs `. The user can define diff --git a/graphs/docs/graphs/node_coordinates/zarr_dataset.rst b/graphs/docs/graphs/node_coordinates/zarr_dataset.rst index a3c0cb26..54b799d3 100644 --- a/graphs/docs/graphs/node_coordinates/zarr_dataset.rst +++ b/graphs/docs/graphs/node_coordinates/zarr_dataset.rst @@ -1,15 +1,15 @@ .. _zarr-file: -################### - From Zarr dataset -################### +##################### + From anemoi dataset +##################### -This class builds a set of nodes from a Zarr dataset. The nodes are +This class builds a set of nodes from a anemoi dataset. The nodes are defined by the coordinates of the dataset. The ZarrDataset class supports operations compatible with :ref:`anemoi-datasets `. -To define the `node coordinates` based on a Zarr dataset, you can use +To define the `node coordinates` based on a anemoi dataset, you can use the following YAML configuration: .. code:: yaml @@ -21,13 +21,13 @@ the following YAML configuration: dataset: /path/to/dataset.zarr attributes: ... -where `dataset` is the path to the Zarr dataset. +where `dataset` is the path to the anemoi dataset. The ``ZarrDatasetNodes`` class supports operations over multiple datasets. For example, the `cutout` operation supports combining a regional dataset and a global dataset to enable both limited area and stretched grids. To define the `node coordinates` that combine multiple -Zarr datasets, you can use the following YAML configuration: +anemoi datasets, you can use the following YAML configuration: .. code:: yaml diff --git a/graphs/docs/index.rst b/graphs/docs/index.rst index 8b6070f5..b7e3a092 100644 --- a/graphs/docs/index.rst +++ b/graphs/docs/index.rst @@ -41,8 +41,8 @@ recipe file, which can be used to build graphs for the input, hidden and output layers. For each layer, the package allows you to: - :ref:`Define graph nodes ` based on - coordinates defined in a dataset (Zarr and NPZ) or via algorithmic - approaches such as the triangular refined icosahedron. + coordinates defined in a dataset (anemoi dataset and NPZ) or via + algorithmic approaches such as the triangular refined icosahedron. - :ref:`Define edges ` (connections between nodes) based on methods such as the cut-off radius or K nearest-neighbours. diff --git a/graphs/docs/overview.rst b/graphs/docs/overview.rst index 15b0fe3b..46a922fa 100644 --- a/graphs/docs/overview.rst +++ b/graphs/docs/overview.rst @@ -34,7 +34,7 @@ categories: data nodes A set of nodes representing one or multiple datasets. The `data nodes` may correspond to the input/output of our data-driven model. - They can be defined from Zarr datasets and this method supports all + They can be defined from anemoi datasets and this method supports all :ref:`anemoi-datasets ` operations such as `cutout` or `thinning`. @@ -42,7 +42,7 @@ hidden nodes The `hidden nodes` capture intermediate representations of the model, which are used to learn the dynamics of the system considered (atmosphere, ocean, etc, ...). These nodes can be generated from - existing locations (Zarr datasets or NPZ files) or algorithmically + existing locations (Anemoi datasets or NPZ files) or algorithmically from iterative refinements of polygons over the globe. Another important term that can refer to both data and hidden nodes is diff --git a/graphs/src/anemoi/graphs/nodes/attributes.py b/graphs/src/anemoi/graphs/nodes/attributes.py index 14c4c3d9..b46c9c16 100644 --- a/graphs/src/anemoi/graphs/nodes/attributes.py +++ b/graphs/src/anemoi/graphs/nodes/attributes.py @@ -229,14 +229,14 @@ def __init__(self) -> None: class NonmissingZarrVariable(BooleanBaseNodeAttribute): - """Mask of valid (not missing) values of a Zarr dataset variable. + """Mask of valid (not missing) values of a Anemoi dataset variable. - It reads a variable from a Zarr dataset and returns a boolean mask of nonmissing values in the first timestep. + It reads a variable from a Anemoi dataset and returns a boolean mask of nonmissing values in the first timestep. Attributes ---------- variable : str - Variable to read from the Zarr dataset. + Variable to read from the Anemoi dataset. Methods ------- diff --git a/graphs/src/anemoi/graphs/nodes/builders/from_file.py b/graphs/src/anemoi/graphs/nodes/builders/from_file.py index 9b7e62c2..ec61a048 100644 --- a/graphs/src/anemoi/graphs/nodes/builders/from_file.py +++ b/graphs/src/anemoi/graphs/nodes/builders/from_file.py @@ -26,7 +26,7 @@ class ZarrDatasetNodes(BaseNodeBuilder): - """Nodes from Zarr dataset. + """Nodes from an anemoi dataset. Attributes ---------- diff --git a/models/docs/modules/data_indices.rst b/models/docs/modules/data_indices.rst index c546795e..cc664cfa 100644 --- a/models/docs/modules/data_indices.rst +++ b/models/docs/modules/data_indices.rst @@ -59,7 +59,7 @@ remapper-preprocessor. There are two main Index-levels: -- Data: The data at "Zarr"-level provided by Anemoi-Datasets +- Data: The data at "anemoi-datasets"-level provided by Anemoi-Datasets - Model: The "squeezed" tensors with irrelevant parts missing. Additionally, there are two internal model levels (After preprocessor diff --git a/training/src/anemoi/training/data/dataset.py b/training/src/anemoi/training/data/dataset.py index a3c249d8..ce26122f 100644 --- a/training/src/anemoi/training/data/dataset.py +++ b/training/src/anemoi/training/data/dataset.py @@ -51,7 +51,7 @@ def __init__( Parameters ---------- data_reader : Callable - user function that opens and returns the zarr array data + user function that opens and returns the anemoi-datasets array data grid_indices : Type[BaseGridIndices] indices of the grid to keep. Defaults to None, which keeps all spatial indices. rollout : int, optional @@ -246,7 +246,7 @@ def per_worker_init(self, n_workers: int, worker_id: int) -> None: def __iter__(self) -> torch.Tensor: """Return an iterator over the dataset. - The datasets are retrieved by Anemoi Datasets from zarr files. This iterator yields + The datasets are retrieved by anemoi.datasets from anemoi datasets. This iterator yields chunked batches for DDP and sharded training. Currently it receives data with an ensemble dimension, which is discarded for diff --git a/training/src/anemoi/training/schemas/graphs/node_attributes_schemas.py b/training/src/anemoi/training/schemas/graphs/node_attributes_schemas.py index 19beb770..44920f77 100644 --- a/training/src/anemoi/training/schemas/graphs/node_attributes_schemas.py +++ b/training/src/anemoi/training/schemas/graphs/node_attributes_schemas.py @@ -47,9 +47,12 @@ class CutOutMaskSchema(BaseModel): class NonmissingZarrVariableSchema(BaseModel): target_: Literal["anemoi.graphs.nodes.attributes.NonmissingZarrVariable"] = Field(..., alias="_target_") - "Implementation of a mask from the nonmissing values of a Zarr variable from anemoi.graphs.nodes.attributes." + ( + "Implementation of a mask from the nonmissing values of a anemoi-datasets variable " + "from anemoi.graphs.nodes.attributes." + ) variable: str - "The Zarr variable to use." + "The anemoi-datasets variable to use." class BooleanOperationSchema(BaseModel): diff --git a/training/src/anemoi/training/schemas/graphs/node_schemas.py b/training/src/anemoi/training/schemas/graphs/node_schemas.py index 8c89a26f..7a65aa1e 100644 --- a/training/src/anemoi/training/schemas/graphs/node_schemas.py +++ b/training/src/anemoi/training/schemas/graphs/node_schemas.py @@ -27,7 +27,7 @@ class ZarrNodeSchema(BaseModel): target_: Literal["anemoi.graphs.nodes.ZarrDatasetNodes"] = Field(..., alias="_target_") - "Nodes from Zarr dataset class implementation from anemoi.graphs.nodes." + "Nodes from Anemoi dataset class implementation from anemoi.graphs.nodes." dataset: Union[str, dict] # TODO(Helen): Discuss schema with Baudouin "The dataset containing the nodes."