Skip to content

Commit

Permalink
Merge branch 'main' into datatree_from_dict_no_copy
Browse files Browse the repository at this point in the history
  • Loading branch information
Illviljan committed Aug 5, 2024
2 parents 7604934 + 1ac19c4 commit ec49577
Show file tree
Hide file tree
Showing 107 changed files with 3,306 additions and 2,277 deletions.
5 changes: 5 additions & 0 deletions .github/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
changelog:
exclude:
authors:
- dependabot
- pre-commit-ci
4 changes: 2 additions & 2 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repos:
- id: mixed-line-ending
- repo: https://github.com/astral-sh/ruff-pre-commit
# Ruff version.
rev: 'v0.4.7'
rev: 'v0.5.0'
hooks:
- id: ruff
args: ["--fix", "--show-fixes"]
Expand All @@ -30,7 +30,7 @@ repos:
additional_dependencies: ["black==24.4.2"]
- id: blackdoc-autoupdate-black
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.10.0
rev: v1.10.1
hooks:
- id: mypy
# Copied from setup.cfg
Expand Down
11 changes: 5 additions & 6 deletions HOW_TO_RELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,13 @@ upstream https://github.com/pydata/xarray (push)
```sh
git fetch upstream --tags
```
This will return a list of all the contributors since the last release:
Then run
```sh
git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | perl -pe 's/\n/$1, /'
```
This will return the total number of contributors:
```sh
git log "$(git tag --sort=v:refname | tail -1).." --format=%aN | sort -u | wc -l
python ci/release_contributors.py
```
(needs `gitpython` and `toolz` / `cytoolz`)

and copy the output.
3. Write a release summary: ~50 words describing the high level features. This
will be used in the release emails, tweets, GitHub release notes, etc.
4. Look over whats-new.rst and the docs. Make sure "What's New" is complete
Expand Down
18 changes: 18 additions & 0 deletions asv_bench/benchmarks/coding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import numpy as np

import xarray as xr

from . import parameterized


@parameterized(["calendar"], [("standard", "noleap")])
class EncodeCFDatetime:
def setup(self, calendar):
self.units = "days since 2000-01-01"
self.dtype = np.dtype("int64")
self.times = xr.date_range(
"2000", freq="D", periods=10000, calendar=calendar
).values

def time_encode_cf_datetime(self, calendar):
xr.coding.times.encode_cf_datetime(self.times, self.units, calendar, self.dtype)
113 changes: 112 additions & 1 deletion asv_bench/benchmarks/dataset_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import pandas as pd

import xarray as xr
from xarray.backends.api import open_datatree
from xarray.core.datatree import DataTree

from . import _skip_slow, parameterized, randint, randn, requires_dask

Expand All @@ -16,7 +18,6 @@
except ImportError:
pass


os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

_ENGINES = tuple(xr.backends.list_engines().keys() - {"store"})
Expand Down Expand Up @@ -469,6 +470,116 @@ def create_delayed_write():
return ds.to_netcdf("file.nc", engine="netcdf4", compute=False)


class IONestedDataTree:
"""
A few examples that benchmark reading/writing a heavily nested netCDF datatree with
xarray
"""

timeout = 300.0
repeat = 1
number = 5

def make_datatree(self, nchildren=10):
# multiple Dataset
self.ds = xr.Dataset()
self.nt = 1000
self.nx = 90
self.ny = 45
self.nchildren = nchildren

self.block_chunks = {
"time": self.nt / 4,
"lon": self.nx / 3,
"lat": self.ny / 3,
}

self.time_chunks = {"time": int(self.nt / 36)}

times = pd.date_range("1970-01-01", periods=self.nt, freq="D")
lons = xr.DataArray(
np.linspace(0, 360, self.nx),
dims=("lon",),
attrs={"units": "degrees east", "long_name": "longitude"},
)
lats = xr.DataArray(
np.linspace(-90, 90, self.ny),
dims=("lat",),
attrs={"units": "degrees north", "long_name": "latitude"},
)
self.ds["foo"] = xr.DataArray(
randn((self.nt, self.nx, self.ny), frac_nan=0.2),
coords={"lon": lons, "lat": lats, "time": times},
dims=("time", "lon", "lat"),
name="foo",
attrs={"units": "foo units", "description": "a description"},
)
self.ds["bar"] = xr.DataArray(
randn((self.nt, self.nx, self.ny), frac_nan=0.2),
coords={"lon": lons, "lat": lats, "time": times},
dims=("time", "lon", "lat"),
name="bar",
attrs={"units": "bar units", "description": "a description"},
)
self.ds["baz"] = xr.DataArray(
randn((self.nx, self.ny), frac_nan=0.2).astype(np.float32),
coords={"lon": lons, "lat": lats},
dims=("lon", "lat"),
name="baz",
attrs={"units": "baz units", "description": "a description"},
)

self.ds.attrs = {"history": "created for xarray benchmarking"}

self.oinds = {
"time": randint(0, self.nt, 120),
"lon": randint(0, self.nx, 20),
"lat": randint(0, self.ny, 10),
}
self.vinds = {
"time": xr.DataArray(randint(0, self.nt, 120), dims="x"),
"lon": xr.DataArray(randint(0, self.nx, 120), dims="x"),
"lat": slice(3, 20),
}
root = {f"group_{group}": self.ds for group in range(self.nchildren)}
nested_tree1 = {
f"group_{group}/subgroup_1": xr.Dataset() for group in range(self.nchildren)
}
nested_tree2 = {
f"group_{group}/subgroup_2": xr.DataArray(np.arange(1, 10)).to_dataset(
name="a"
)
for group in range(self.nchildren)
}
nested_tree3 = {
f"group_{group}/subgroup_2/sub-subgroup_1": self.ds
for group in range(self.nchildren)
}
dtree = root | nested_tree1 | nested_tree2 | nested_tree3
self.dtree = DataTree.from_dict(dtree)


class IOReadDataTreeNetCDF4(IONestedDataTree):
def setup(self):
# TODO: Lazily skipped in CI as it is very demanding and slow.
# Improve times and remove errors.
_skip_slow()

requires_dask()

self.make_datatree()
self.format = "NETCDF4"
self.filepath = "datatree.nc4.nc"
dtree = self.dtree
dtree.to_netcdf(filepath=self.filepath)

def time_load_datatree_netcdf4(self):
open_datatree(self.filepath, engine="netcdf4").load()

def time_open_datatree_netcdf4(self):
open_datatree(self.filepath, engine="netcdf4")


class IOWriteNetCDFDask:
timeout = 60
repeat = 1
Expand Down
5 changes: 2 additions & 3 deletions ci/install-upstream-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse
# temporarily remove numexpr
$conda remove -y numexpr
# temporarily remove backends
$conda remove -y cf_units hdf5 h5py netcdf4 pydap
$conda remove -y pydap
# forcibly remove packages to avoid artifacts
$conda remove -y --force \
numpy \
Expand All @@ -37,8 +37,7 @@ python -m pip install \
numpy \
scipy \
matplotlib \
pandas \
h5py
pandas
# for some reason pandas depends on pyarrow already.
# Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge`
python -m pip install \
Expand Down
49 changes: 49 additions & 0 deletions ci/release_contributors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import re
import textwrap

import git
from tlz.itertoolz import last, unique

co_author_re = re.compile(r"Co-authored-by: (?P<name>[^<]+?) <(?P<email>.+)>")


def main():
repo = git.Repo(".")

most_recent_release = last(repo.tags)

# extract information from commits
contributors = {}
for commit in repo.iter_commits(f"{most_recent_release.name}.."):
matches = co_author_re.findall(commit.message)
if matches:
contributors.update({email: name for name, email in matches})
contributors[commit.author.email] = commit.author.name

# deduplicate and ignore
# TODO: extract ignores from .github/release.yml
ignored = ["dependabot", "pre-commit-ci"]
unique_contributors = unique(
contributor
for contributor in contributors.values()
if contributor.removesuffix("[bot]") not in ignored
)

sorted_ = sorted(unique_contributors)
if len(sorted_) > 1:
names = f"{', '.join(sorted_[:-1])} and {sorted_[-1]}"
else:
names = "".join(sorted_)

statement = textwrap.dedent(
f"""\
Thanks to the {len(sorted_)} contributors to this release:
{names}
""".rstrip()
)

print(statement)


if __name__ == "__main__":
main()
2 changes: 1 addition & 1 deletion ci/requirements/all-but-dask.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ dependencies:
- netcdf4
- numba
- numbagg
- numpy<2
- numpy
- packaging
- pandas
- pint>=0.22
Expand Down
3 changes: 2 additions & 1 deletion ci/requirements/doc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ dependencies:
- bottleneck
- cartopy
- cfgrib
- kerchunk
- dask-core>=2022.1
- dask-expr
- hypothesis>=6.75.8
Expand All @@ -21,7 +22,7 @@ dependencies:
- nbsphinx
- netcdf4>=1.5
- numba
- numpy>=1.21,<2
- numpy>=2
- packaging>=21.3
- pandas>=1.4,!=2.1.0
- pooch
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:
- netcdf4
- numba
- numbagg
- numpy<2
- numpy
- packaging
- pandas
# - pint>=0.22
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- numba
- numbagg
- numexpr
- numpy<2
- numpy
- opt_einsum
- packaging
- pandas
Expand Down
4 changes: 4 additions & 0 deletions doc/api-hidden.rst
Original file line number Diff line number Diff line change
Expand Up @@ -693,3 +693,7 @@

coding.times.CFTimedeltaCoder
coding.times.CFDatetimeCoder

groupers.Grouper
groupers.Resampler
groupers.EncodedGroups
25 changes: 21 additions & 4 deletions doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ Dataset contents
Dataset.drop_duplicates
Dataset.drop_dims
Dataset.drop_encoding
Dataset.drop_attrs
Dataset.set_coords
Dataset.reset_coords
Dataset.convert_calendar
Expand Down Expand Up @@ -306,6 +307,7 @@ DataArray contents
DataArray.drop_indexes
DataArray.drop_duplicates
DataArray.drop_encoding
DataArray.drop_attrs
DataArray.reset_coords
DataArray.copy
DataArray.convert_calendar
Expand Down Expand Up @@ -801,6 +803,18 @@ DataArray
DataArrayGroupBy.dims
DataArrayGroupBy.groups

Grouper Objects
---------------

.. currentmodule:: xarray

.. autosummary::
:toctree: generated/

groupers.BinGrouper
groupers.UniqueGrouper
groupers.TimeResampler


Rolling objects
===============
Expand Down Expand Up @@ -1026,17 +1040,20 @@ DataArray
Accessors
=========

.. currentmodule:: xarray
.. currentmodule:: xarray.core

.. autosummary::
:toctree: generated/

core.accessor_dt.DatetimeAccessor
core.accessor_dt.TimedeltaAccessor
core.accessor_str.StringAccessor
accessor_dt.DatetimeAccessor
accessor_dt.TimedeltaAccessor
accessor_str.StringAccessor


Custom Indexes
==============
.. currentmodule:: xarray

.. autosummary::
:toctree: generated/

Expand Down
Loading

0 comments on commit ec49577

Please sign in to comment.