From afddae6e314556c52dd5ffb43cc812b383f2a79f Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Thu, 19 Dec 2024 14:34:34 +1300 Subject: [PATCH 1/9] Add time selection between dynamic doy --- xclim/core/calendar.py | 132 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 117 insertions(+), 15 deletions(-) diff --git a/xclim/core/calendar.py b/xclim/core/calendar.py index 14b47084f..3a0a6caf3 100644 --- a/xclim/core/calendar.py +++ b/xclim/core/calendar.py @@ -1078,12 +1078,124 @@ def days_since_to_doy( return out.convert_calendar(base_calendar).rename(da.name) +def _get_doys(start: int, end: int, inclusive: tuple[bool, bool]): + """Get the day of year list from start to end. + + Parameters + ---------- + start : int + Start day of year. + end : int + End day of year. + inclusive : 2-tuple of booleans + Whether the bounds should be inclusive or not. + + Returns + ------- + np.ndarray + Array of day of year between the start and end. + """ + if start <= end: + doys = np.arange(start, end + 1) + else: + doys = np.concatenate((np.arange(start, 367), np.arange(0, end + 1))) + if not inclusive[0]: + doys = doys[1:] + if not inclusive[1]: + doys = doys[:-1] + return doys + + +def mask_between_doys( + da: xr.DataArray, + doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray], + include_bounds: tuple[bool, bool] = [True, True], +) -> xr.DataArray | xr.Dataset: + """Mask the data outside the day of year bounds. + + Parameters + ---------- + da : xr.DataArray or xr.Dataset + Input data. + doy_bounds : 2-tuple of integers or xr.DataArray + The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from + 1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given, + the int day-of-year corresponds to the year of the xr.DataArray. + include_bounds : 2-tuple of booleans + Whether the bounds of `doy_bounds` should be inclusive or not. + + Returns + ------- + xr.DataArray or xr.Dataset + Boolean mask array with the same shape as `da` with True value inside the period of + interest and False outside. + """ + if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): + mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, [True, True])) + + else: + cal = get_calendar(da, dim="time") + + start, end = doy_bounds + if isinstance(start, int): + start = xr.where(end.isnull(), np.nan, start) + start = start.convert_calendar(cal) + start.attrs["calendar"] = cal + else: + start = start.convert_calendar(cal) + start.attrs["calendar"] = cal + start = doy_to_days_since(start) + + if isinstance(end, int): + end = xr.where(start.isnull(), np.nan, end) + end = end.convert_calendar(cal) + end.attrs["calendar"] = cal + else: + end = end.convert_calendar(cal) + end.attrs["calendar"] = cal + end = doy_to_days_since(end) + + freq = xr.infer_freq(start.time) + out = [] + for base_time, indexes in da.resample(time=freq).groups.items(): + # get group slice + group = da.isel(time=indexes) + + if base_time in start.time: + start_d = start.sel(time=base_time) + else: + start_d = None + if base_time in end.time: + end_d = end.sel(time=base_time) + else: + end_d = None + + if start_d is not None and end_d is not None: + if not include_bounds[0]: + start_d += 1 + if not include_bounds[1]: + end_d -= 1 + + # select days between start and end for group + days = (group.time - base_time).dt.days + days[days < 0] = np.nan + + mask = (days >= start_d) & (days <= end_d) + else: + # Get an array with the good shape and put False + mask = xr.where(group.isel(time=0), False, False) + + out.append(mask) + mask = xr.concat(out, dim="time") + return mask + + def select_time( da: xr.DataArray | xr.Dataset, drop: bool = False, season: str | Sequence[str] | None = None, month: int | Sequence[int] | None = None, - doy_bounds: tuple[int, int] | None = None, + doy_bounds: tuple[int | xr.DataArray, int | xr.DataArray] | None = None, date_bounds: tuple[str, str] | None = None, include_bounds: bool | tuple[bool, bool] = True, ) -> DataType: @@ -1104,9 +1216,10 @@ def select_time( One or more of 'DJF', 'MAM', 'JJA' and 'SON'. month : integer or sequence of integers, optional Sequence of month numbers (January = 1 ... December = 12) - doy_bounds : 2-tuple of integers, optional + doy_bounds : 2-tuple of integers or xr.DataArray, optional The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from - 1 (January 1st) to 365 or 366 (December 31st). + 1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given, + the int day-of-year corresponds to the year of the xr.DataArray. If calendar awareness is needed, consider using ``date_bounds`` instead. date_bounds : 2-tuple of strings, optional The bounds as (start, end) of the period of interest expressed as dates in the month-day (%m-%d) format. @@ -1148,17 +1261,6 @@ def select_time( if N == 0: return da - def _get_doys(_start, _end, _inclusive): - if _start <= _end: - _doys = np.arange(_start, _end + 1) - else: - _doys = np.concatenate((np.arange(_start, 367), np.arange(0, _end + 1))) - if not _inclusive[0]: - _doys = _doys[1:] - if not _inclusive[1]: - _doys = _doys[:-1] - return _doys - if isinstance(include_bounds, bool): include_bounds = (include_bounds, include_bounds) @@ -1173,7 +1275,7 @@ def _get_doys(_start, _end, _inclusive): mask = da.time.dt.month.isin(month) elif doy_bounds is not None: - mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds)) + mask = mask_between_doys(da, doy_bounds, include_bounds) elif date_bounds is not None: # This one is a bit trickier. From 5ec7ec657638adb8ec9fb1e4f0de1d823a541ec6 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Mon, 20 Jan 2025 16:21:31 +1300 Subject: [PATCH 2/9] Fix the inclusion of bounds for int doys --- src/xclim/core/calendar.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 0de782d27..b7d09bcaf 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1171,7 +1171,7 @@ def mask_between_doys( interest and False outside. """ if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): - mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, [True, True])) + mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds)) else: cal = get_calendar(da, dim="time") From ac76379e9aa66128663cc54acae6a514949cfb66 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Wed, 22 Jan 2025 15:55:35 +1300 Subject: [PATCH 3/9] Add details to AUTHORS.rst --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index 14cb18707..0fd1dc9fa 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -48,3 +48,4 @@ Contributors * Adrien Lamarche `@LamAdr `_ * Faisal Mahmood `@faimahsho `_ * Sebastian Lehner `@seblehner `_ +* Baptiste Hamon `@baptistehamon `_ From 2e40c9f0b6baa628718f3e72e20f43c3002e330b Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Wed, 22 Jan 2025 16:15:18 +1300 Subject: [PATCH 4/9] Add details to zenodo file --- .zenodo.json | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.zenodo.json b/.zenodo.json index bf4fc7f19..e53f692ac 100644 --- a/.zenodo.json +++ b/.zenodo.json @@ -153,6 +153,11 @@ "name": "Lehner, Sebastian", "affiliation": "GeoSphere Austria, Vienna, Austria", "orcid": "0000-0002-7562-8172" + }, + { + "name": "Hamon, Baptiste", + "affiliation": "University of Canterbury, Christchurch, New Zealand", + "orcid": "0009-0007-4530-9772" } ], "keywords": [ From 0c815b9429760d1935c1be09ebe80a8fd66c3607 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Tue, 28 Jan 2025 13:39:24 +1300 Subject: [PATCH 5/9] Add a check to ensure that doy bounds have same freq in mask_between_doys --- src/xclim/core/calendar.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index b7d09bcaf..471fffae2 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1195,7 +1195,21 @@ def mask_between_doys( end.attrs["calendar"] = cal end = doy_to_days_since(end) - freq = xr.infer_freq(start.time) + freq = [] + for bound in [start, end]: + try: + freq.append(xr.infer_freq(bound.time)) + except ValueError: + freq.append(None) + freq = set(freq) - {None} + if len(freq) != 1: + raise ValueError( + f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}." + " Please consider providing `freq` manually." + ) + else: + freq = freq.pop() + out = [] for base_time, indexes in da.resample(time=freq).groups.items(): # get group slice From c52cd63b259b4ead6b2736a523a1ed683ffb984d Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Tue, 28 Jan 2025 14:44:43 +1300 Subject: [PATCH 6/9] Fix bug when masking between doys on dataset --- src/xclim/core/calendar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 471fffae2..69327e9cd 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1237,7 +1237,8 @@ def mask_between_doys( mask = (days >= start_d) & (days <= end_d) else: # Get an array with the good shape and put False - mask = xr.where(group.isel(time=0), False, False) + mask = start.isel(time=0).drop_vars("time").expand_dims(time=group.time) + mask = xr.full_like(mask, False) out.append(mask) mask = xr.concat(out, dim="time") From 36b1862afd2732d45f09ad68b7c9f9e2699e2ac9 Mon Sep 17 00:00:00 2001 From: Baptiste Hamon Date: Tue, 28 Jan 2025 14:56:58 +1300 Subject: [PATCH 7/9] Edit freq error message in mask_between_doys --- src/xclim/core/calendar.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index 69327e9cd..e3790ddb2 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1205,7 +1205,6 @@ def mask_between_doys( if len(freq) != 1: raise ValueError( f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}." - " Please consider providing `freq` manually." ) else: freq = freq.pop() From ac20249ee8524fec32524ab5a5abbb9aa189f3bc Mon Sep 17 00:00:00 2001 From: Pascal Bourgault Date: Fri, 31 Jan 2025 17:54:43 -0500 Subject: [PATCH 8/9] Rewrite mask_between_doys to add spatial dims support --- src/xclim/core/calendar.py | 144 ++++++++++++++++++------------------- 1 file changed, 70 insertions(+), 74 deletions(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index e3790ddb2..bbb579cd3 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1037,7 +1037,7 @@ def doy_to_days_since( # 2cases: # val is a day in the same year as its index : da - offset # val is a day in the next year : da + doy_max - offset - out = xr.where(dac > base_doy, dac, dac + doy_max) - start_doy + out = xr.where(dac >= base_doy, dac, dac + doy_max) - start_doy out.attrs.update(da.attrs) if start is not None: out.attrs.update(units=f"days after {start}") @@ -1156,91 +1156,87 @@ def mask_between_doys( Parameters ---------- da : xr.DataArray or xr.Dataset - Input data. - doy_bounds : 2-tuple of integers or xr.DataArray + Input data. It must have a time coordinate. + doy_bounds : 2-tuple of integers or DataArray The bounds as (start, end) of the period of interest expressed in day-of-year, integers going from - 1 (January 1st) to 365 or 366 (December 31st). If a combination of int and xr.DataArray is given, - the int day-of-year corresponds to the year of the xr.DataArray. + 1 (January 1st) to 365 or 366 (December 31st). + If DataArrays are passed, they must have the same coordinates on the dimensions they share. + They may have a time dimension, in which case the masking is done independently for each period defined by the coordinate, + which means the time coordinate must have an inferable frequency (see :py:func:`xr.infer_freq`). + Timesteps of the input not appearing in the time coordinate of the bounds are masked as "outside the bounds". + Missing values (nan) in the bounds are treated as an open bound (same as a None in a slice). include_bounds : 2-tuple of booleans Whether the bounds of `doy_bounds` should be inclusive or not. Returns ------- - xr.DataArray or xr.Dataset - Boolean mask array with the same shape as `da` with True value inside the period of - interest and False outside. + xr.DataArray + Boolean array with the same time coordinate as `da` and any other dimension present on the bounds. + True value inside the period of interest and False outside. """ - if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): + if isinstance(doy_bounds[0], int) and isinstance(doy_bounds[1], int): # Simple case mask = da.time.dt.dayofyear.isin(_get_doys(*doy_bounds, include_bounds)) - else: - cal = get_calendar(da, dim="time") - start, end = doy_bounds + # convert ints to DataArrays if isinstance(start, int): - start = xr.where(end.isnull(), np.nan, start) - start = start.convert_calendar(cal) - start.attrs["calendar"] = cal - else: - start = start.convert_calendar(cal) - start.attrs["calendar"] = cal - start = doy_to_days_since(start) - - if isinstance(end, int): - end = xr.where(start.isnull(), np.nan, end) - end = end.convert_calendar(cal) - end.attrs["calendar"] = cal - else: - end = end.convert_calendar(cal) - end.attrs["calendar"] = cal - end = doy_to_days_since(end) - - freq = [] - for bound in [start, end]: - try: - freq.append(xr.infer_freq(bound.time)) - except ValueError: - freq.append(None) - freq = set(freq) - {None} - if len(freq) != 1: - raise ValueError( - f"Non-inferrable resampling frequency or inconsistent frequencies. Got start, end = {freq}." + start = xr.full_like(end, start) + elif isinstance(end, int): + end = xr.full_like(start, end) + # Ensure they both have the same dims + # align join='exact' will fail on common but different coords, broadcast will add missing coords + start, end = xr.broadcast(*xr.align(start, end, join="exact")) + + if not include_bounds[0]: + start += 1 + if not include_bounds[1]: + end -= 1 + + if "time" in start.dims: + freq = xr.infer_freq(start.time) + # Convert the doy bounds to a duration since the beginning of each period defined in the bound's time coordinate + # Also ensures the bounds share the sime time calendar as the input + # Any missing value is replaced with the min/max of possible values + calkws = dict( + calendar=da.time.dt.calendar, use_cftime=(da.time.dtype == "O") + ) + start = doy_to_days_since(start.convert_calendar(**calkws)).fillna(0) + end = doy_to_days_since(end.convert_calendar(**calkws)).fillna(366) + + out = [] + # For each period, mask the days since between start and end + for base_time, indexes in da.resample(time=freq).groups.items(): + group = da.isel(time=indexes) + + if base_time in start.time: + start_d = start.sel(time=base_time) + end_d = end.sel(time=base_time) + + # select days between start and end for group + days = (group.time - base_time).dt.days + days = days.where(days >= 0) + mask = (days >= start_d) & (days <= end_d) + else: # This group has no defined bounds : put False in the mask + # Array with the same shape as the "mask" in the other case : broadcast of time and bounds dims + template = xr.broadcast( + group.time.dt.day, start.isel(time=0, drop=True) + )[0] + mask = xr.full_like(template, False, dtype="bool") + out.append(mask) + mask = xr.concat(out, dim="time") + else: # Only "Spatial" dims, we can't constrain as in days since, so there are two cases + doys = da.time.dt.dayofyear # for readability + # Any missing value is replaced with the min/max of possible values + start = start.fillna(1) + end = end.fillna(366) + mask = xr.where( + start <= end, + (doys >= start) + & (doys <= end), # case 1 : start <= end, ROI is within a calendar year + ~( + (doys >= end) & (doys <= start) + ), # case 2 : start > end, ROI crosses the new year ) - else: - freq = freq.pop() - - out = [] - for base_time, indexes in da.resample(time=freq).groups.items(): - # get group slice - group = da.isel(time=indexes) - - if base_time in start.time: - start_d = start.sel(time=base_time) - else: - start_d = None - if base_time in end.time: - end_d = end.sel(time=base_time) - else: - end_d = None - - if start_d is not None and end_d is not None: - if not include_bounds[0]: - start_d += 1 - if not include_bounds[1]: - end_d -= 1 - - # select days between start and end for group - days = (group.time - base_time).dt.days - days[days < 0] = np.nan - - mask = (days >= start_d) & (days <= end_d) - else: - # Get an array with the good shape and put False - mask = start.isel(time=0).drop_vars("time").expand_dims(time=group.time) - mask = xr.full_like(mask, False) - - out.append(mask) - mask = xr.concat(out, dim="time") return mask From 5c91ef33984717f90f5fa0635ce43604085c8c18 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 5 Feb 2025 20:42:50 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/xclim/core/calendar.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/xclim/core/calendar.py b/src/xclim/core/calendar.py index bbb579cd3..61881924e 100644 --- a/src/xclim/core/calendar.py +++ b/src/xclim/core/calendar.py @@ -1118,7 +1118,8 @@ def days_since_to_doy( def _get_doys(start: int, end: int, inclusive: tuple[bool, bool]): - """Get the day of year list from start to end. + """ + Get the day of year list from start to end. Parameters ----------