From c5b90c54d55edd3021415427d9a26666b88da7b6 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 3 Apr 2024 01:52:31 +0200 Subject: [PATCH] Update docstring for compute and persist (#8903) * Update docstring for compute and persist * Add compute reference to load docstring for dask array * Use new wording for persist Co-authored-by: Justus Magin * Update xarray/core/dataarray.py Co-authored-by: Justus Magin * Apply suggestions from code review Co-authored-by: Justus Magin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add "all" to persist docstring * Apply suggestions from code review Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --------- Co-authored-by: Justus Magin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Maximilian Roos <5635139+max-sixty@users.noreply.github.com> --- xarray/core/dataarray.py | 18 ++++++++++++++++-- xarray/core/dataset.py | 11 +++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 80dcfe1302c..509962ff80d 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -1126,6 +1126,8 @@ def load(self, **kwargs) -> Self: """Manually trigger loading of this array's data from disk or a remote source into memory and return this array. + Unlike compute, the original dataset is modified and returned. + Normally, it should not be necessary to call this method in user code, because all xarray functions should either work on deferred data or load data automatically. However, this method can be necessary when @@ -1148,8 +1150,9 @@ def load(self, **kwargs) -> Self: def compute(self, **kwargs) -> Self: """Manually trigger loading of this array's data from disk or a - remote source into memory and return a new array. The original is - left unaltered. + remote source into memory and return a new array. + + Unlike load, the original is left unaltered. Normally, it should not be necessary to call this method in user code, because all xarray functions should either work on deferred data or @@ -1161,6 +1164,11 @@ def compute(self, **kwargs) -> Self: **kwargs : dict Additional keyword arguments passed on to ``dask.compute``. + Returns + ------- + object : DataArray + New object with the data and all coordinates as in-memory arrays. + See Also -------- dask.compute @@ -1174,12 +1182,18 @@ def persist(self, **kwargs) -> Self: This keeps them as dask arrays but encourages them to keep data in memory. This is particularly useful when on a distributed machine. When on a single machine consider using ``.compute()`` instead. + Like compute (but unlike load), the original dataset is left unaltered. Parameters ---------- **kwargs : dict Additional keyword arguments passed on to ``dask.persist``. + Returns + ------- + object : DataArray + New object with all dask-backed data and coordinates as persisted dask arrays. + See Also -------- dask.persist diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 2c0b3e89722..4c80a47209e 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -1005,6 +1005,11 @@ def compute(self, **kwargs) -> Self: **kwargs : dict Additional keyword arguments passed on to ``dask.compute``. + Returns + ------- + object : Dataset + New object with lazy data variables and coordinates as in-memory arrays. + See Also -------- dask.compute @@ -1037,12 +1042,18 @@ def persist(self, **kwargs) -> Self: operation keeps the data as dask arrays. This is particularly useful when using the dask.distributed scheduler and you want to load a large amount of data into distributed memory. + Like compute (but unlike load), the original dataset is left unaltered. Parameters ---------- **kwargs : dict Additional keyword arguments passed on to ``dask.persist``. + Returns + ------- + object : Dataset + New object with all dask-backed coordinates and data variables as persisted dask arrays. + See Also -------- dask.persist