Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dynamic time selection #2055

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
5 changes: 5 additions & 0 deletions .zenodo.json
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,11 @@
"name": "Lehner, Sebastian",
"affiliation": "GeoSphere Austria, Vienna, Austria",
"orcid": "0000-0002-7562-8172"
},
{
"name": "Hamon, Baptiste",
"affiliation": "University of Canterbury, Christchurch, New Zealand",
"orcid": "0009-0007-4530-9772"
}
],
"keywords": [
Expand Down
1 change: 1 addition & 0 deletions AUTHORS.rst
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,4 @@ Contributors
* Adrien Lamarche `@LamAdr <https://github.com/LamAdr>`_
* Faisal Mahmood <mahmood.faisal@ouranos.ca> <faisal.shovon.007@gmail.com> `@faimahsho <https://github.com/faimahsho>`_
* Sebastian Lehner <sebastian.lehner@geosphere.at> `@seblehner <https://github.com/seblehner>`_
* Baptiste Hamon <baptiste.hamon@pg.canterbury.ac.nz> `@baptistehamon <https://github.com/baptistehamon>`_
145 changes: 129 additions & 16 deletions src/xclim/core/calendar.py
Original file line number Diff line number Diff line change
Expand Up @@ -1037,7 +1037,7 @@ def doy_to_days_since(
# 2cases:
# val is a day in the same year as its index : da - offset
# val is a day in the next year : da + doy_max - offset
out = xr.where(dac > base_doy, dac, dac + doy_max) - start_doy
out = xr.where(dac >= base_doy, dac, dac + doy_max) - start_doy
out.attrs.update(da.attrs)
if start is not None:
out.attrs.update(units=f"days after {start}")
Expand Down Expand Up @@ -1117,12 +1117,135 @@ def days_since_to_doy(
return out.convert_calendar(base_calendar).rename(da.name)


def _get_doys(start: int, end: int, inclusive: tuple[bool, bool]):
"""Get the day of year list from start to end.

Parameters
----------
start : int
Start day of year.
end : int
End day of year.
inclusive : 2-tuple of booleans
Whether the bounds should be inclusive or not.

Returns
-------
np.ndarray
Array of day of year between the start and end.
"""
if start <= end:
doys = np.arange(start, end + 1)
else:
doys = np.concatenate((np.arange(start, 367), np.arange(0, end + 1)))
if not inclusive[0]:
doys = doys[1:]
if not inclusive[1]:
doys = doys[:-1]
return doys


def mask_between_doys(
da: xr.DataArray,
doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray],
include_bounds: tuple[bool, bool] = [True, True],
) -> xr.DataArray | xr.Dataset:
"""
Mask the data outside the day of year bounds.

Parameters
----------
da : xr.DataArray or xr.Dataset
Input data. It must have a time coordinate.
doy_bounds : 2-tuple of integers or DataArray
The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from
1 (January 1st) to 365 or 366 (December 31st).
If DataArrays are passed, they must have the same coordinates on the dimensions they share.
They may have a time dimension, in which case the masking is done independently for each period defined by the coordinate,
which means the time coordinate must have an inferable frequency (see :py:func:`xr.infer_freq`).
Timesteps of the input not appearing in the time coordinate of the bounds are masked as "outside the bounds".
Missing values (nan) in the bounds are treated as an open bound (same as a None in a slice).
include_bounds : 2-tuple of booleans
Whether the bounds of `doy_bounds` should be inclusive or not.

Returns
-------
xr.DataArray
Boolean array with the same time coordinate as `da` and any other dimension present on the bounds.
True value inside the period of interest and False outside.
"""
if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): # Simple case
mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds))
else:
start, end = doy_bounds
# convert ints to DataArrays
if isinstance(start, int):
start = xr.full_like(end, start)
elif isinstance(end, int):
end = xr.full_like(start, end)
# Ensure they both have the same dims
# align join='exact' will fail on common but different coords, broadcast will add missing coords
start, end = xr.broadcast(*xr.align(start, end, join="exact"))

if not include_bounds[0]:
start += 1
if not include_bounds[1]:
end -= 1

if "time" in start.dims:
freq = xr.infer_freq(start.time)
# Convert the doy bounds to a duration since the beginning of each period defined in the bound's time coordinate
# Also ensures the bounds share the sime time calendar as the input
# Any missing value is replaced with the min/max of possible values
calkws = dict(
calendar=da.time.dt.calendar, use_cftime=(da.time.dtype == "O")
)
start = doy_to_days_since(start.convert_calendar(**calkws)).fillna(0)
end = doy_to_days_since(end.convert_calendar(**calkws)).fillna(366)

out = []
# For each period, mask the days since between start and end
for base_time, indexes in da.resample(time=freq).groups.items():
group = da.isel(time=indexes)

if base_time in start.time:
start_d = start.sel(time=base_time)
end_d = end.sel(time=base_time)

# select days between start and end for group
days = (group.time - base_time).dt.days
days = days.where(days >= 0)
mask = (days >= start_d) & (days <= end_d)
else: # This group has no defined bounds : put False in the mask
# Array with the same shape as the "mask" in the other case : broadcast of time and bounds dims
template = xr.broadcast(
group.time.dt.day, start.isel(time=0, drop=True)
)[0]
mask = xr.full_like(template, False, dtype="bool")
out.append(mask)
mask = xr.concat(out, dim="time")
else: # Only "Spatial" dims, we can't constrain as in days since, so there are two cases
doys = da.time.dt.dayofyear # for readability
# Any missing value is replaced with the min/max of possible values
start = start.fillna(1)
end = end.fillna(366)
mask = xr.where(
start <= end,
(doys >= start)
& (doys <= end), # case 1 : start <= end, ROI is within a calendar year
~(
(doys >= end) & (doys <= start)
), # case 2 : start > end, ROI crosses the new year
)
return mask


def select_time(
da: xr.DataArray | xr.Dataset,
drop: bool = False,
season: str | Sequence[str] | None = None,
month: int | Sequence[int] | None = None,
doy_bounds: tuple[int, int] | None = None,
doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray] | None = None,
date_bounds: tuple[str, str] | None = None,
include_bounds: bool | tuple[bool, bool] = True,
) -> DataType:
Expand All @@ -1143,9 +1266,10 @@ def select_time(
One or more of 'DJF', 'MAM', 'JJA' and 'SON'.
month : int or sequence of int, optional
Sequence of month numbers (January = 1 ... December = 12).
doy_bounds : 2-tuple of int, optional
doy_bounds : 2-tuple of int or xr.DataArray, optional
The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from
1 (January 1st) to 365 or 366 (December 31st).
1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given,
the int day-of-year corresponds to the year of the xr.DataArray.
If calendar awareness is needed, consider using ``date_bounds`` instead.
date_bounds : 2-tuple of str, optional
The bounds as (start, end) of the period of interest expressed as dates in the month-day (%m-%d) format.
Expand Down Expand Up @@ -1187,17 +1311,6 @@ def select_time(
if N == 0:
return da

def _get_doys(_start, _end, _inclusive):
if _start <= _end:
_doys = np.arange(_start, _end + 1)
else:
_doys = np.concatenate((np.arange(_start, 367), np.arange(0, _end + 1)))
if not _inclusive[0]:
_doys = _doys[1:]
if not _inclusive[1]:
_doys = _doys[:-1]
return _doys

if isinstance(include_bounds, bool):
include_bounds = (include_bounds, include_bounds)

Expand All @@ -1212,7 +1325,7 @@ def _get_doys(_start, _end, _inclusive):
mask = da.time.dt.month.isin(month)

elif doy_bounds is not None:
mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds))
mask = mask_between_doys(da, doy_bounds, include_bounds)

elif date_bounds is not None:
# This one is a bit trickier.
Expand Down