From e8ba3f042d15e57b757fdbab48ce09ea09d04f73 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 11 Dec 2024 12:17:07 +0100 Subject: [PATCH 01/21] Correctly interpolate seasonal in Grouper --- src/xclim/sdba/base.py | 41 ++++++++++++++++++++++++------------ src/xclim/testing/helpers.py | 8 ++++--- tests/test_bootstrapping.py | 32 ++++++++++++++-------------- tests/test_calendar.py | 8 +++---- tests/test_helpers.py | 8 +++---- tests/test_sdba/test_base.py | 18 ++++++++++++---- 6 files changed, 71 insertions(+), 44 deletions(-) diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index c060a054b..a41fcca9d 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -295,21 +295,36 @@ def get_index( return da[self.dim].rename("group") ind = da.indexes[self.dim] - if self.prop == "week": - i = da[self.dim].copy(data=ind.isocalendar().week).astype(int) - elif self.prop == "season": - i = da[self.dim].copy(data=ind.month % 12 // 3) - else: - i = getattr(ind, self.prop) - if not np.issubdtype(i.dtype, np.integer): - raise ValueError( - f"Index {self.name} is not of type int (rather {i.dtype}), " - f"but {self.__class__.__name__} requires integer indexes." - ) + if interp and self.dim == "time": + if interp and self.dim == "time" and self.prop == "month": + i = ind.month - 0.5 + ind.day / ind.days_in_month + + elif interp and self.dim == "time" and self.prop == "season": + calendar = ind.calendar if hasattr(ind, "calendar") else "standard" + length_year = ( + 360 + if calendar == "360_day" + else 365 + (0 if calendar == "noleap" else ind.is_leap_year) + ) + i = ind.dayofyear / length_year * 4 - 1 / 6 + else: + raise ValueError( + f"Interpolation is not supported for {self.dim}.{self.prop}." + ) + else: + if self.prop == "week": + i = da[self.dim].copy(data=ind.isocalendar().week).astype(int) + elif self.prop == "season": + i = da[self.dim].copy(data=ind.month % 12 // 3) + else: + i = getattr(ind, self.prop) - if interp and self.dim == "time" and self.prop == "month": - i = ind.month - 0.5 + ind.day / ind.days_in_month + if not np.issubdtype(i.dtype, np.integer): + raise ValueError( + f"Index {self.name} is not of type int (rather {i.dtype}), " + f"but {self.__class__.__name__} requires integer indexes." + ) xi = xr.DataArray( i, diff --git a/src/xclim/testing/helpers.py b/src/xclim/testing/helpers.py index 11fb0ac2e..63a3b34c5 100644 --- a/src/xclim/testing/helpers.py +++ b/src/xclim/testing/helpers.py @@ -190,7 +190,7 @@ def test_timeseries( units: str | None = None, freq: str = "D", as_dataset: bool = False, - cftime: bool = False, + calendar: str | None = None, ) -> xr.DataArray | xr.Dataset: """ Create a generic timeseries object based on pre-defined dictionaries of existing variables. @@ -217,8 +217,10 @@ def test_timeseries( xr.DataArray or xr.Dataset A DataArray or Dataset with time, lon and lat dimensions. """ - if cftime: - coords = xr.cftime_range(start, periods=len(values), freq=freq) + if calendar: + coords = xr.cftime_range( + start, periods=len(values), freq=freq, calendar=calendar + ) else: coords = pd.date_range(start, periods=len(values), freq=freq) diff --git a/tests/test_bootstrapping.py b/tests/test_bootstrapping.py index 484f7f163..fb98eaf34 100644 --- a/tests/test_bootstrapping.py +++ b/tests/test_bootstrapping.py @@ -23,30 +23,30 @@ class Test_bootstrap: @pytest.mark.slow @pytest.mark.parametrize("use_dask", [True, False]) @pytest.mark.parametrize( - "var,p,index,freq, cftime", + "var,p,index,freq, calendar", ( - ["tas", 98, tg90p, "MS", False], - ["tasmin", 98, tn90p, "YS-JUL", False], - ["tasmax", 98, tx90p, "QS-APR", False], - ["tasmax", 98, tx90p, "QS-APR", True], - ["tasmin", 2, tn10p, "MS", False], - ["tasmax", 2, tx10p, "YS", False], - ["tasmax", 2, tx10p, "YS", True], - ["tas", 2, tg10p, "MS", False], - ["tasmax", 98, warm_spell_duration_index, "MS", False], - ["tasmin", 2, cold_spell_duration_index, "MS", False], - ["pr", 99, days_over_precip_thresh, "MS", False], - ["pr", 98, fraction_over_precip_thresh, "MS", False], - ["pr", 98, fraction_over_precip_thresh, "MS", True], + ["tas", 98, tg90p, "MS", None], + ["tasmin", 98, tn90p, "YS-JUL", None], + ["tasmax", 98, tx90p, "QS-APR", None], + ["tasmax", 98, tx90p, "QS-APR", "standard"], + ["tasmin", 2, tn10p, "MS", None], + ["tasmax", 2, tx10p, "YS", None], + ["tasmax", 2, tx10p, "YS", "standard"], + ["tas", 2, tg10p, "MS", None], + ["tasmax", 98, warm_spell_duration_index, "MS", None], + ["tasmin", 2, cold_spell_duration_index, "MS", None], + ["pr", 99, days_over_precip_thresh, "MS", None], + ["pr", 98, fraction_over_precip_thresh, "MS", None], + ["pr", 98, fraction_over_precip_thresh, "MS", "standard"], ), ) - def test_bootstrap(self, var, p, index, freq, cftime, use_dask, random): + def test_bootstrap(self, var, p, index, freq, calendar, use_dask, random): # -- GIVEN arr = self.ar1( alpha=0.8, n=int(4 * 365.25), random=random, positive_values=(var == "pr") ) climate_var = _test_timeseries( - arr, start="2000-01-01", variable=var, cftime=cftime + arr, start="2000-01-01", variable=var, calendar=calendar ) if use_dask: climate_var = climate_var.chunk(dict(time=50)) diff --git a/tests/test_calendar.py b/tests/test_calendar.py index 58b8d5857..7d40e75c9 100644 --- a/tests/test_calendar.py +++ b/tests/test_calendar.py @@ -482,13 +482,13 @@ def test_convert_doy(): ) -@pytest.mark.parametrize("cftime", [True, False]) +@pytest.mark.parametrize("calendar", ["standard", None]) @pytest.mark.parametrize( "w,s,m,f,ss", [(30, 10, None, "YS", 0), (3, 1, None, "QS-DEC", 60), (6, None, None, "MS", 0)], ) -def test_stack_periods(tas_series, cftime, w, s, m, f, ss): - da = tas_series(np.arange(365 * 50), start="2000-01-01", cftime=cftime) +def test_stack_periods(tas_series, calendar, w, s, m, f, ss): + da = tas_series(np.arange(365 * 50), start="2000-01-01", calendar=calendar) da_stck = stack_periods( da, window=w, stride=s, min_length=m, freq=f, align_days=False @@ -502,7 +502,7 @@ def test_stack_periods(tas_series, cftime, w, s, m, f, ss): def test_stack_periods_special(tas_series): - da = tas_series(np.arange(365 * 48 + 12), cftime=True, start="2004-01-01") + da = tas_series(np.arange(365 * 48 + 12), calendar="standard", start="2004-01-01") with pytest.raises(ValueError, match="unaligned day-of-year"): stack_periods(da) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 8ac4e262f..beee15c25 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -179,10 +179,10 @@ def test_resample_map_passthrough(tas_series): assert not uses_dask(out) -@pytest.mark.parametrize("cftime", [False, True]) -def test_make_hourly_temperature(tasmax_series, tasmin_series, cftime): - tasmax = tasmax_series(np.array([20]), units="degC", cftime=cftime) - tasmin = tasmin_series(np.array([0]), units="degC", cftime=cftime).expand_dims( +@pytest.mark.parametrize("calendar", [None, "standard"]) +def test_make_hourly_temperature(tasmax_series, tasmin_series, calendar): + tasmax = tasmax_series(np.array([20]), units="degC", calendar="standard") + tasmin = tasmin_series(np.array([0]), units="degC", calendar=calendar).expand_dims( lat=[0] ) diff --git a/tests/test_sdba/test_base.py b/tests/test_sdba/test_base.py index 255ea061e..5619b5d47 100644 --- a/tests/test_sdba/test_base.py +++ b/tests/test_sdba/test_base.py @@ -50,11 +50,21 @@ def test_grouper_group(tas_series, group, window, nvals): @pytest.mark.parametrize( - "group,interp,val90", - [("time", False, True), ("time.month", False, 3), ("time.month", True, 3.5)], + "group,interp,val90,calendar", + [ + ("time", False, True, None), + ("time.month", False, 3, None), + ("time.month", True, 3.5, None), + ("time.season", False, 1, None), + ("time.season", True, 0.8278688524590164, None), + ("time.month", True, 3.533333333333333, "360_day"), + ("time.month", True, 3.533333333333333, "noleap"), + ("time.season", True, 0.8444444444444444, "360_day"), + ("time.season", True, 0.8305936073059361, "noleap"), + ], ) -def test_grouper_get_index(tas_series, group, interp, val90): - tas = tas_series(np.ones(366), start="2000-01-01") +def test_grouper_get_index(tas_series, group, interp, val90, calendar): + tas = tas_series(np.ones(366), start="2000-01-01", calendar=calendar) grouper = Grouper(group) indx = grouper.get_index(tas, interp=interp) # 90 is March 31st From 26a9b0fa725546b597affd618d0832ce1a214f97 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 11 Dec 2024 12:17:17 +0100 Subject: [PATCH 02/21] Update Changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index ffd7ab3fb..01085ecb9 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,6 +21,7 @@ Bug fixes * Fixed an issue with ``nimbus`` that was causing URL path components to be improperly joined. (:pull:`1997`). * `base_kws_vars` in `MBCn` is now copied inside the `adjust` function so that in-place changes do not change the dict globally. (:pull:`1999`). * Fixed a bug in the logic of ``xclim.testing.utils.load_registry`` that impacted the ability to load a `registry.txt` from a non-default repository. (:pull:`2001`). +* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didnt correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`). Internal changes ^^^^^^^^^^^^^^^^ From 86613139fb4180065656620778ce0091b66652a3 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 11 Dec 2024 12:24:01 +0100 Subject: [PATCH 03/21] Fix test_make_hourly_temps --- tests/test_helpers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index beee15c25..064227052 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -181,7 +181,7 @@ def test_resample_map_passthrough(tas_series): @pytest.mark.parametrize("calendar", [None, "standard"]) def test_make_hourly_temperature(tasmax_series, tasmin_series, calendar): - tasmax = tasmax_series(np.array([20]), units="degC", calendar="standard") + tasmax = tasmax_series(np.array([20]), units="degC", calendar=calendar) tasmin = tasmin_series(np.array([0]), units="degC", calendar=calendar).expand_dims( lat=[0] ) From 2c18c222e680e4b18e397091a8da1165bcf1d92b Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 11 Dec 2024 12:28:44 +0100 Subject: [PATCH 04/21] Fix linting --- src/xclim/testing/helpers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/xclim/testing/helpers.py b/src/xclim/testing/helpers.py index 63a3b34c5..1c813f8b6 100644 --- a/src/xclim/testing/helpers.py +++ b/src/xclim/testing/helpers.py @@ -209,8 +209,8 @@ def test_timeseries( The frequency of the time dimension. Default is daily/"D". as_dataset : bool Whether to return a Dataset or a DataArray. Default is False. - cftime : bool - Whether to use cftime or not. Default is False. + calendar : str or None + Whether to use a calendar. If a calendar is provided, cftime is used. Returns ------- From 18687b36b90fc010391dfcbec1a9c11e85a4cf47 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 11 Dec 2024 16:59:53 +0100 Subject: [PATCH 05/21] Add comment explaining the factor 1/6 --- src/xclim/sdba/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index a41fcca9d..fbbcaa740 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -307,6 +307,9 @@ def get_index( if calendar == "360_day" else 365 + (0 if calendar == "noleap" else ind.is_leap_year) ) + # This is assuming that seasons have the same length. The factor 1/6 comes from the fact that + # the first season is shifted by 1 month the but the middle of the season is shifted in the other direction + # by half a month so -(1/12-1/24)*4 = -1/6 i = ind.dayofyear / length_year * 4 - 1 / 6 else: raise ValueError( From ca2fc5f7ff9571a2d13c0b78ec3b6fbde101c839 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 11 Dec 2024 17:05:15 +0100 Subject: [PATCH 06/21] Fix typo in Changelog --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 01085ecb9..be434c3ce 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -21,7 +21,7 @@ Bug fixes * Fixed an issue with ``nimbus`` that was causing URL path components to be improperly joined. (:pull:`1997`). * `base_kws_vars` in `MBCn` is now copied inside the `adjust` function so that in-place changes do not change the dict globally. (:pull:`1999`). * Fixed a bug in the logic of ``xclim.testing.utils.load_registry`` that impacted the ability to load a `registry.txt` from a non-default repository. (:pull:`2001`). -* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didnt correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`). +* Fixed a bug in ``xclim.sdba.Grouper.get_index`` that didn't correctly interpolate seasonal values (:issue:`2014`, :pull:`2019`). Internal changes ^^^^^^^^^^^^^^^^ From c111abdfcacbcf1ff6a6df4307a193fbccf40b21 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Thu, 12 Dec 2024 23:44:36 +0100 Subject: [PATCH 07/21] Change order of params for pip install --- docs/installation.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/installation.rst b/docs/installation.rst index d83d5d7fd..d045b99fa 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -160,4 +160,4 @@ To create a conda environment including `xclim`'s dependencies and several optio conda env create -n my_xclim_env python=3.10 --file=environment.yml conda activate my_xclim_env - (my_xclim_env) python -m pip install -e --no-deps . + (my_xclim_env) python -m pip install --no-deps -e . From 1046620e0d065a2e1f67a2bf621b9c2d06b0ecdf Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Tue, 7 Jan 2025 16:17:50 +0100 Subject: [PATCH 08/21] Fix xp creation when extrapolating on quantiles --- src/xclim/sdba/base.py | 4 ++-- src/xclim/sdba/nbutils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index fbbcaa740..352d170ae 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -297,10 +297,10 @@ def get_index( ind = da.indexes[self.dim] if interp and self.dim == "time": - if interp and self.dim == "time" and self.prop == "month": + if self.prop == "month": i = ind.month - 0.5 + ind.day / ind.days_in_month - elif interp and self.dim == "time" and self.prop == "season": + elif self.prop == "season": calendar = ind.calendar if hasattr(ind, "calendar") else "standard" length_year = ( 360 diff --git a/src/xclim/sdba/nbutils.py b/src/xclim/sdba/nbutils.py index 28fad5647..e96c6e5cd 100644 --- a/src/xclim/sdba/nbutils.py +++ b/src/xclim/sdba/nbutils.py @@ -393,7 +393,7 @@ def _extrapolate_on_quantiles( Arguments are the same as _interp_on_quantiles_2D. """ bnds = _first_and_last_nonnull(oldx) - xp = np.arange(bnds.shape[0]) + xp = oldg[:, 0] toolow = newx < np.interp(newg, xp, bnds[:, 0]) toohigh = newx > np.interp(newg, xp, bnds[:, 1]) if method == "constant": From 0aebbb40508f170128abe547f9f5836f700ff714 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Tue, 14 Jan 2025 18:00:26 +0100 Subject: [PATCH 09/21] Add warning log when using linear interp in qdm_adjust --- src/xclim/sdba/_adjustment.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/xclim/sdba/_adjustment.py b/src/xclim/sdba/_adjustment.py index 4398cec0e..791f7f294 100644 --- a/src/xclim/sdba/_adjustment.py +++ b/src/xclim/sdba/_adjustment.py @@ -587,6 +587,13 @@ def qdm_adjust(ds: xr.Dataset, *, group, interp, extrapolation, kind) -> xr.Data sim : Data to adjust. """ sim_q = group.apply(u.rank, ds.sim, main_only=True, pct=True) + if group.prop and interp != "nearest": + import logging + + logging.getLogger("xclim").warning( + f"Using a {interp} interrpolation with QuantileDeltaMapping might create sudden jumps between different" + " groups. See discussion https://github.com/Ouranosinc/xclim/discussions/2048 for more information " + ) af = u.interp_on_quantiles( sim_q, ds.quantiles, From b93ae6d40e80a77a5a574fd7eed5c9aa86851192 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 15 Jan 2025 11:23:42 +0100 Subject: [PATCH 10/21] Use warnings library instead of logging.warning --- src/xclim/sdba/_adjustment.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/xclim/sdba/_adjustment.py b/src/xclim/sdba/_adjustment.py index 791f7f294..291659e12 100644 --- a/src/xclim/sdba/_adjustment.py +++ b/src/xclim/sdba/_adjustment.py @@ -8,6 +8,7 @@ from __future__ import annotations +import warnings from collections.abc import Callable, Sequence import numpy as np @@ -588,12 +589,12 @@ def qdm_adjust(ds: xr.Dataset, *, group, interp, extrapolation, kind) -> xr.Data """ sim_q = group.apply(u.rank, ds.sim, main_only=True, pct=True) if group.prop and interp != "nearest": - import logging - logging.getLogger("xclim").warning( - f"Using a {interp} interrpolation with QuantileDeltaMapping might create sudden jumps between different" - " groups. See discussion https://github.com/Ouranosinc/xclim/discussions/2048 for more information " + msg = ( + f"Using a {interp} interpolation with QuantileDeltaMapping might create sudden jumps between different" + " groups. See discussion https://github.com/Ouranosinc/xclim/discussions/2048 for more information." ) + warnings.warn(msg) af = u.interp_on_quantiles( sim_q, ds.quantiles, From 90d834509953a0b16c6455be99a9ae38a9376de6 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Thu, 16 Jan 2025 10:45:08 +0100 Subject: [PATCH 11/21] Fix QDM with 360_day calendar --- src/xclim/sdba/base.py | 7 ++++--- src/xclim/sdba/utils.py | 5 +++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index 352d170ae..094ebcca5 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -179,7 +179,9 @@ def prop_name(self): """Create a significant name for the grouping.""" return "year" if self.prop == "group" else self.prop - def get_coordinate(self, ds: xr.Dataset | None = None) -> xr.DataArray: + def get_coordinate( + self, ds: xr.Dataset | xr.DataArray | None = None + ) -> xr.DataArray: """Return the coordinate as in the output of group.apply. Currently, only implemented for groupings with prop == `month` or `dayofyear`. @@ -196,8 +198,7 @@ def get_coordinate(self, ds: xr.Dataset | None = None) -> xr.DataArray: if ds is not None: cal = get_calendar(ds, dim=self.dim) mdoy = max( - xr.coding.calendar_ops._days_in_year(yr, cal) - for yr in np.unique(ds[self.dim].dt.year) + xr.coding.calendar_ops._days_in_year(ds[self.dim].dt.year, cal) ) else: mdoy = 365 diff --git a/src/xclim/sdba/utils.py b/src/xclim/sdba/utils.py index f8c2a6d2f..3a40a4858 100644 --- a/src/xclim/sdba/utils.py +++ b/src/xclim/sdba/utils.py @@ -475,9 +475,10 @@ def interp_on_quantiles( return out if prop not in xq.dims: - xq = xq.expand_dims({prop: group.get_coordinate()}) + prop_coords = group.get_coordinate(newx) + xq = xq.expand_dims({prop: prop_coords}) if prop not in yq.dims: - yq = yq.expand_dims({prop: group.get_coordinate()}) + yq = yq.expand_dims({prop: prop_coords}) # Adding the cyclic bounds fails for string coordinates like seasons # That's why we map the seasons to integers From 835ee1658efadbf99af41ca65bebeb9ee26a7c9b Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Thu, 16 Jan 2025 13:14:00 +0100 Subject: [PATCH 12/21] Enable seasonal scaling --- src/xclim/sdba/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/xclim/sdba/utils.py b/src/xclim/sdba/utils.py index 3a40a4858..81f56f8a9 100644 --- a/src/xclim/sdba/utils.py +++ b/src/xclim/sdba/utils.py @@ -219,6 +219,8 @@ def broadcast( sel.update({group.prop: group.get_index(x, interp=interp != "nearest")}) if sel: + if group.prop == "season": + grouped = grouped.assign_coords(season=map_season_to_int(grouped.season)) # Extract the correct mean factor for each time step. if interp == "nearest": # Interpolate both the time group and the quantile. grouped = grouped.sel(sel, method="nearest") From f410e0a091777da35bddb09bd9a2df2a71f3cf1d Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 22 Jan 2025 09:35:35 +0100 Subject: [PATCH 13/21] Aulemahal comments --- src/xclim/sdba/utils.py | 3 +-- src/xclim/testing/helpers.py | 7 +++++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/xclim/sdba/utils.py b/src/xclim/sdba/utils.py index 81f56f8a9..45a1c000b 100644 --- a/src/xclim/sdba/utils.py +++ b/src/xclim/sdba/utils.py @@ -475,9 +475,8 @@ def interp_on_quantiles( output_dtypes=[yq.dtype], ) return out - + prop_coords = group.get_coordinate(newx) if prop not in xq.dims: - prop_coords = group.get_coordinate(newx) xq = xq.expand_dims({prop: prop_coords}) if prop not in yq.dims: yq = yq.expand_dims({prop: prop_coords}) diff --git a/src/xclim/testing/helpers.py b/src/xclim/testing/helpers.py index c44cfcbed..7b1df4a90 100644 --- a/src/xclim/testing/helpers.py +++ b/src/xclim/testing/helpers.py @@ -189,6 +189,7 @@ def test_timeseries( units: str | None = None, freq: str = "D", as_dataset: bool = False, + cftime: bool = False, calendar: str | None = None, ) -> xr.DataArray | xr.Dataset: """ @@ -208,6 +209,8 @@ def test_timeseries( The frequency of the time dimension. Default is daily/"D". as_dataset : bool Whether to return a Dataset or a DataArray. Default is False. + cftime : bool + Whether to use cftime or not. Default is False. calendar : str or None Whether to use a calendar. If a calendar is provided, cftime is used. @@ -216,9 +219,9 @@ def test_timeseries( xr.DataArray or xr.Dataset A DataArray or Dataset with time, lon and lat dimensions. """ - if calendar: + if calendar or cftime: coords = xr.cftime_range( - start, periods=len(values), freq=freq, calendar=calendar + start, periods=len(values), freq=freq, calendar=calendar or "standard" ) else: coords = pd.date_range(start, periods=len(values), freq=freq) From 5a4a31f2016f6e57dfa8e5412b27b3f76e24822e Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 22 Jan 2025 09:39:23 +0100 Subject: [PATCH 14/21] Add to changelog --- CHANGELOG.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index df31dce22..0a203a4cd 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,6 +16,7 @@ New features and enhancements * New function ``ensemble.partition.general_partition`` (:pull:`2035`) * Added a new ``xclim.indices.generic.bivariate_count_occurrences`` function to count instances where operations and performed and validated for two variables. (:pull:`2030`). * `xclim` now tracks energy usage and carbon emissions ("last run", "average", and "total") during CI workflows using the `eco-ci-energy-estimation` GitHub Action. (:pull:`2046`). +* ``xclim.testing.helpers.test_timeseries`` (:pull:`2019`) now accepts a `calendar` argument thats forwarded to ``xr.cftime_range``. Internal changes ^^^^^^^^^^^^^^^^ From 10a4c91cc2dceb24d6d3a615e62ba029e30e6b31 Mon Sep 17 00:00:00 2001 From: Trevor James Smith <10819524+Zeitsperre@users.noreply.github.com> Date: Wed, 22 Jan 2025 09:51:22 -0500 Subject: [PATCH 15/21] Update CHANGELOG.rst --- CHANGELOG.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 0a203a4cd..81d881886 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -16,7 +16,7 @@ New features and enhancements * New function ``ensemble.partition.general_partition`` (:pull:`2035`) * Added a new ``xclim.indices.generic.bivariate_count_occurrences`` function to count instances where operations and performed and validated for two variables. (:pull:`2030`). * `xclim` now tracks energy usage and carbon emissions ("last run", "average", and "total") during CI workflows using the `eco-ci-energy-estimation` GitHub Action. (:pull:`2046`). -* ``xclim.testing.helpers.test_timeseries`` (:pull:`2019`) now accepts a `calendar` argument thats forwarded to ``xr.cftime_range``. +* ``xclim.testing.helpers.test_timeseries`` now accepts a `calendar` argument that is forwarded to ``xr.cftime_range``. (:pull:`2019`). Internal changes ^^^^^^^^^^^^^^^^ From 4c8a4688afcc42569ba6f05123a459e822b4eb76 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Mon, 27 Jan 2025 10:59:44 +0100 Subject: [PATCH 16/21] Fix real_data test by allowing linear interp for dayofyear --- src/xclim/sdba/base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index 0c2d3f626..4baa9edf0 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -311,6 +311,8 @@ def get_index( # the first season is shifted by 1 month the but the middle of the season is shifted in the other direction # by half a month so -(1/12-1/24)*4 = -1/6 i = ind.dayofyear / length_year * 4 - 1 / 6 + elif self.prop == "dayofyear": + i = ind.dayofyear else: raise ValueError( f"Interpolation is not supported for {self.dim}.{self.prop}." From d501a98b1539437016931db8ff408e5bdd3a310c Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Tue, 28 Jan 2025 16:28:59 +0100 Subject: [PATCH 17/21] Revert allowing linear interp with dayofyear, change test to use nearest --- src/xclim/sdba/base.py | 2 -- tests/test_sdba/test_adjustment.py | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/xclim/sdba/base.py b/src/xclim/sdba/base.py index 4baa9edf0..0c2d3f626 100644 --- a/src/xclim/sdba/base.py +++ b/src/xclim/sdba/base.py @@ -311,8 +311,6 @@ def get_index( # the first season is shifted by 1 month the but the middle of the season is shifted in the other direction # by half a month so -(1/12-1/24)*4 = -1/6 i = ind.dayofyear / length_year * 4 - 1 / 6 - elif self.prop == "dayofyear": - i = ind.dayofyear else: raise ValueError( f"Interpolation is not supported for {self.dim}.{self.prop}." diff --git a/tests/test_sdba/test_adjustment.py b/tests/test_sdba/test_adjustment.py index d9a348ea7..670b24cd0 100644 --- a/tests/test_sdba/test_adjustment.py +++ b/tests/test_sdba/test_adjustment.py @@ -842,7 +842,7 @@ def test_real_data(self, open_dataset): ref, hist, group=Grouper("time.dayofyear", window=31), nquantiles=quantiles ) - scen = EQM.adjust(hist, interp="linear", extrapolation="constant") + scen = EQM.adjust(hist, extrapolation="constant") EX = ExtremeValues.train(ref, hist, cluster_thresh="1 mm/day", q_thresh=0.97) new_scen = EX.adjust(scen, hist, frac=0.000000001) From 4ff05c47ea995ebcce545001010f38a9e0297e51 Mon Sep 17 00:00:00 2001 From: saschahofmann Date: Wed, 29 Jan 2025 16:31:32 +0100 Subject: [PATCH 18/21] Use nearest intero in sdba-advanced EQM adjustment --- docs/notebooks/sdba-advanced.ipynb | 206 ++++++++++------------------- 1 file changed, 68 insertions(+), 138 deletions(-) diff --git a/docs/notebooks/sdba-advanced.ipynb b/docs/notebooks/sdba-advanced.ipynb index 1acd134dc..e1216a185 100644 --- a/docs/notebooks/sdba-advanced.ipynb +++ b/docs/notebooks/sdba-advanced.ipynb @@ -16,20 +16,17 @@ "\n", "Some `xclim.sdba`-specific tips:\n", "\n", - "* Most adjustment method will need to perform operation on the whole `time` coordinate, so it is best to optimize chunking along the other dimensions. This is often different from how public data is shared, where more universal 3D chunks are used.\n", + "- Most adjustment method will need to perform operation on the whole `time` coordinate, so it is best to optimize chunking along the other dimensions. This is often different from how public data is shared, where more universal 3D chunks are used.\n", "\n", - " Chunking of outputs can be controlled in xarray's [to_netcdf](https://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html?highlight=to_netcdf#xarray.Dataset.to_netcdf). We also suggest using [Zarr](https://zarr.readthedocs.io/en/stable/) files. According to [its creators](https://ui.adsabs.harvard.edu/abs/2018AGUFMIN33A..06A/abstract), `zarr` stores should give better performances, especially because of their better ability for parallel I/O. See [Dataset.to_zarr](https://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_zarr.html?highlight=to_zarr#xarray.Dataset.to_zarr) and this useful [rechunking package](https://rechunker.readthedocs.io).\n", + " Chunking of outputs can be controlled in xarray's [to_netcdf](https://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_netcdf.html?highlight=to_netcdf#xarray.Dataset.to_netcdf). We also suggest using [Zarr](https://zarr.readthedocs.io/en/stable/) files. According to [its creators](https://ui.adsabs.harvard.edu/abs/2018AGUFMIN33A..06A/abstract), `zarr` stores should give better performances, especially because of their better ability for parallel I/O. See [Dataset.to_zarr](https://xarray.pydata.org/en/stable/generated/xarray.Dataset.to_zarr.html?highlight=to_zarr#xarray.Dataset.to_zarr) and this useful [rechunking package](https://rechunker.readthedocs.io).\n", "\n", + "- One of the main bottleneck for adjustments with small groups is that dask needs to build and optimize an enormous task graph. This issue has been greatly reduced with xclim 0.27 and the use of `map_blocks` in the adjustment methods. However, not all adjustment methods use this optimized syntax.\n", "\n", - "* One of the main bottleneck for adjustments with small groups is that dask needs to build and optimize an enormous task graph. This issue has been greatly reduced with xclim 0.27 and the use of `map_blocks` in the adjustment methods. However, not all adjustment methods use this optimized syntax.\n", + " In order to help dask, one can split the processing in parts. For splitting training and adjustment, see [the section below](#Initializing-an-Adjustment-object-from-a-training-dataset).\n", "\n", - " In order to help dask, one can split the processing in parts. For splitting training and adjustment, see [the section below](#Initializing-an-Adjustment-object-from-a-training-dataset).\n", + "- Another massive bottleneck of parallelization of `xarray` is the thread-locking behaviour of some methods. It is quite difficult to isolate and avoid these locking instances, so one of the best workarounds is to use `dask` configurations with many _processes_ and few _threads_. The former do not share memory and thus are not impacted when a lock is activated from a thread in another worker. However, this adds many memory transfer operations and, by experience, reduces dask's ability to parallelize some pipelines. Such a dask Client is usually created with a large `n_workers` and a small `threads_per_worker`.\n", "\n", - "\n", - "* Another massive bottleneck of parallelization of `xarray` is the thread-locking behaviour of some methods. It is quite difficult to isolate and avoid these locking instances, so one of the best workarounds is to use `dask` configurations with many _processes_ and few _threads_. The former do not share memory and thus are not impacted when a lock is activated from a thread in another worker. However, this adds many memory transfer operations and, by experience, reduces dask's ability to parallelize some pipelines. Such a dask Client is usually created with a large `n_workers` and a small `threads_per_worker`.\n", - "\n", - "\n", - "* Sometimes, datasets have auxiliary coordinates (for example : lat / lon in a rotated pole dataset). Xarray handles these variables as data variables and will **not** load them if dask is used. However, in some operations, `xclim` or `xarray` will trigger access to those variables, triggering computations each time, since they are `dask`-based. To avoid this behaviour, one can load the coordinates, or simply remove them from the inputs." + "- Sometimes, datasets have auxiliary coordinates (for example : lat / lon in a rotated pole dataset). Xarray handles these variables as data variables and will **not** load them if dask is used. However, in some operations, `xclim` or `xarray` will trigger access to those variables, triggering computations each time, since they are `dask`-based. To avoid this behaviour, one can load the coordinates, or simply remove them from the inputs.\n" ] }, { @@ -42,7 +39,7 @@ "\n", "In xclim's implementation, the user can choose between local _constancy_ ($d=0$, local estimates are weighted averages) and local _linearity_ ($d=1$, local estimates are taken from linear regressions). Two weighting functions are currently implemented : \"tricube\" ($w(x) = (1 - x^3)^3$) and \"gaussian\" ($w(x) = e^{-x^2 / 2\\sigma^2}$). Finally, the number of Cleveland's _robustifying iterations_ is controllable through `niter`. After computing an estimate of $y(x)$, the weights are modulated by a function of the distance between the estimate and the points and the procedure is started over. These iterations are made to weaken the effect of outliers on the estimate.\n", "\n", - "The next example shows the application of the LOESS to daily temperature data. The black line and dot are the estimated $y$, outputs of the `sdba.loess.loess_smoothing` function, using local linear regression (passing $d = 1$), a window spanning 20% ($f = 0.2$) of the domain, the \"tricube\" weighting function and only one iteration. The red curve illustrates the weighting function on January 1st 2014, where the red circles are the nearest-neighbours used in the estimation." + "The next example shows the application of the LOESS to daily temperature data. The black line and dot are the estimated $y$, outputs of the `sdba.loess.loess_smoothing` function, using local linear regression (passing $d = 1$), a window spanning 20% ($f = 0.2$) of the domain, the \"tricube\" weighting function and only one iteration. The red curve illustrates the weighting function on January 1st 2014, where the red circles are the nearest-neighbours used in the estimation.\n" ] }, { @@ -99,9 +96,7 @@ "# Plot nearest neighbors and weighing function\n", "wax = ax.twinx()\n", "wax.plot(tas.time, weights, color=\"indianred\")\n", - "ax.plot(\n", - " tas.time, tas.where(tas * weights > 0), \"o\", color=\"lightcoral\", fillstyle=\"none\"\n", - ")\n", + "ax.plot(tas.time, tas.where(tas * weights > 0), \"o\", color=\"lightcoral\", fillstyle=\"none\")\n", "\n", "ax.plot(ti, ys[366], \"ko\")\n", "wax.set_ylabel(\"Weights\")\n", @@ -114,12 +109,11 @@ "source": [ "LOESS smoothing can suffer from heavy boundary effects. On the previous graph, we can associate the strange bend on the left end of the line to them. The next example shows a stronger case. Usually, $\\frac{f}{2}N$ points on each side should be discarded. On the other hand, LOESS has the advantage of always staying within the bounds of the data.\n", "\n", - "\n", "### LOESS Detrending\n", "\n", "In climate science, it can be used in the detrending process. `xclim` provides `sdba.detrending.LoessDetrend` in order to compute trend with the LOESS smoothing and remove them from timeseries.\n", "\n", - "First we create some toy data with a sinusoidal annual cycle, random noise and a linear temperature increase." + "First we create some toy data with a sinusoidal annual cycle, random noise and a linear temperature increase.\n" ] }, { @@ -147,7 +141,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Then we compute the trend on the data. Here, we compute on the whole timeseries (`group='time'`) with the parameters suggested above." + "Then we compute the trend on the data. Here, we compute on the whole timeseries (`group='time'`) with the parameters suggested above.\n" ] }, { @@ -184,7 +178,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As said earlier, this example shows how the Loess has strong boundary effects. It is recommended to remove the $\\frac{f}{2}\\cdot N$ outermost points on each side, as shown by the gray bars in the graph above." + "As said earlier, this example shows how the Loess has strong boundary effects. It is recommended to remove the $\\frac{f}{2}\\cdot N$ outermost points on each side, as shown by the gray bars in the graph above.\n" ] }, { @@ -193,7 +187,7 @@ "source": [ "## Initializing an Adjustment object from a training dataset\n", "\n", - "For large scale uses, when the training step deserves its own computation and write to disk, or simply when there are multiples `sim` to be adjusted with the same training, it is helpful to be able to instantiate the Adjustment objects from the training dataset itself. This trick relies on a global attribute \"adj_params\" set on the training dataset." + "For large scale uses, when the training step deserves its own computation and write to disk, or simply when there are multiples `sim` to be adjusted with the same training, it is helpful to be able to instantiate the Adjustment objects from the training dataset itself. This trick relies on a global attribute \"adj_params\" set on the training dataset.\n" ] }, { @@ -209,10 +203,7 @@ "t = xr.cftime_range(\"2000-01-01\", \"2030-12-31\", freq=\"D\", calendar=\"noleap\")\n", "ref = xr.DataArray(\n", " (\n", - " -20 * np.cos(2 * np.pi * t.dayofyear / 365)\n", - " + 2 * np.random.random_sample((t.size,))\n", - " + 273.15\n", - " + 0.1 * (t - t[0]).days / 365\n", + " -20 * np.cos(2 * np.pi * t.dayofyear / 365) + 2 * np.random.random_sample((t.size,)) + 273.15 + 0.1 * (t - t[0]).days / 365\n", " ), # \"warming\" of 1K per decade,\n", " dims=(\"time\",),\n", " coords={\"time\": t},\n", @@ -220,10 +211,7 @@ ")\n", "sim = xr.DataArray(\n", " (\n", - " -18 * np.cos(2 * np.pi * t.dayofyear / 365)\n", - " + 2 * np.random.random_sample((t.size,))\n", - " + 273.15\n", - " + 0.11 * (t - t[0]).days / 365\n", + " -18 * np.cos(2 * np.pi * t.dayofyear / 365) + 2 * np.random.random_sample((t.size,)) + 273.15 + 0.11 * (t - t[0]).days / 365\n", " ), # \"warming\" of 1.1K per decade\n", " dims=(\"time\",),\n", " coords={\"time\": t},\n", @@ -242,9 +230,7 @@ "source": [ "from xclim.sdba.adjustment import QuantileDeltaMapping\n", "\n", - "QDM = QuantileDeltaMapping.train(\n", - " ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\"\n", - ")\n", + "QDM = QuantileDeltaMapping.train(ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\")\n", "QDM" ] }, @@ -252,7 +238,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The trained `QDM` exposes the training data in the `ds` attribute, Here, we will write it to disk, read it back and initialize a new object from it. Notice the `adj_params` in the dataset, that has the same value as the repr string printed just above. Also, notice the `_xclim_adjustment` attribute that contains a JSON string, so we can rebuild the adjustment object later." + "The trained `QDM` exposes the training data in the `ds` attribute, Here, we will write it to disk, read it back and initialize a new object from it. Notice the `adj_params` in the dataset, that has the same value as the repr string printed just above. Also, notice the `_xclim_adjustment` attribute that contains a JSON string, so we can rebuild the adjustment object later.\n" ] }, { @@ -284,7 +270,7 @@ "source": [ "In the case above, creating a full object from the dataset doesn't make the most sense since we are in the same python session, with the \"old\" object still available. This method effective when we reload the training data in a different python session, say on another computer. **However, take note that there is no retro-compatibility insurance.** If the `QuantileDeltaMapping` class was to change in a new xclim version, one would not be able to create the new object from a dataset saved with the old one.\n", "\n", - "For the case where we stay in the same python session, it is still useful to trigger the dask computations. For small datasets, that could mean a simple `QDM.ds.load()`, but sometimes even the training data is too large to be full loaded in memory. In that case, we could also do:" + "For the case where we stay in the same python session, it is still useful to trigger the dask computations. For small datasets, that could mean a simple `QDM.ds.load()`, but sometimes even the training data is too large to be full loaded in memory. In that case, we could also do:\n" ] }, { @@ -319,7 +305,7 @@ "\n", "For the moment, this feature is still under construction and only a few `Adjustment` actually provide these extra outputs. Please open issues on the GitHub repo if you have needs or ideas of interesting diagnostic variables.\n", "\n", - "For example, `QDM.adjust` adds `sim_q`, which gives the quantile of each element of `sim` within its group." + "For example, `QDM.adjust` adds `sim_q`, which gives the quantile of each element of `sim` within its group.\n" ] }, { @@ -331,9 +317,7 @@ "from xclim import set_options\n", "\n", "with set_options(sdba_extra_output=True):\n", - " QDM = QuantileDeltaMapping.train(\n", - " ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\"\n", - " )\n", + " QDM = QuantileDeltaMapping.train(ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\")\n", " out = QDM.adjust(sim)\n", "\n", "out.sim_q" @@ -364,7 +348,7 @@ "\n", "In the following example, `QDM` is configured with `group=\"time.dayofyear\"` which will perform the adjustment for each day of year (doy) separately. When using `stack_periods` the extracted windows are all concatenated along the new `period` axis, and they all share the same time coordinate. As such, for the `doy` information to make sense, we must use a calendar with uniform year lengths. Otherwise, the `doy` values would shift one day at each leap year.\n", "\n", - "" + "\n" ] }, { @@ -373,9 +357,7 @@ "metadata": {}, "outputs": [], "source": [ - "QDM = QuantileDeltaMapping.train(\n", - " ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\"\n", - ")\n", + "QDM = QuantileDeltaMapping.train(ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\")\n", "\n", "scen_nowin = QDM.adjust(sim)" ] @@ -396,7 +378,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here, we retrieve the full timeseries (minus the lasy year that couldn't fit in any window)." + "Here, we retrieve the full timeseries (minus the lasy year that couldn't fit in any window).\n" ] }, { @@ -419,7 +401,7 @@ "\n", "We will transform the variables that need it to the additive space, adding some jitter in the process to avoid $log(0)$ situations. Then, we will stack the different variables into a single `DataArray`, allowing us to use `PrincipalComponents` in a multi-variate way. Following the PCA, a simple quantile-mapping method is used, both adjustment acting on the residuals, while the mean of the simulated trend is adjusted on its own. Each step will be explained.\n", "\n", - "First, open the data, convert the calendar and the units. Because we will perform adjustments on \"dayofyear\" groups (with a window), keeping standard calendars results in an extra \"dayofyear\" with only a quarter of the data. It's usual to transform to a \"noleap\" calendar, which drops the 29th of February, as it only has a small impact on the data." + "First, open the data, convert the calendar and the units. Because we will perform adjustments on \"dayofyear\" groups (with a window), keeping standard calendars results in an extra \"dayofyear\" with only a quarter of the data. It's usual to transform to a \"noleap\" calendar, which drops the 29th of February, as it only has a small impact on the data.\n" ] }, { @@ -436,9 +418,7 @@ "\n", "group = sdba.Grouper(\"time.dayofyear\", window=31)\n", "\n", - "dref = convert_calendar(open_dataset(\"sdba/ahccd_1950-2013.nc\"), \"noleap\").sel(\n", - " time=slice(\"1981\", \"2010\")\n", - ")\n", + "dref = convert_calendar(open_dataset(\"sdba/ahccd_1950-2013.nc\"), \"noleap\").sel(time=slice(\"1981\", \"2010\"))\n", "dsim = open_dataset(\"sdba/CanESM2_1950-2100.nc\")\n", "\n", "dref = dref.assign(\n", @@ -452,13 +432,14 @@ "metadata": {}, "source": [ "### 1. Jitter, additive space transformation and variable stacking\n", + "\n", "Here, `tasmax` is already ready to be adjusted in an additive way, because all data points are far from the physical zero (0 K). This is not the case for `pr`, which is why we want to transform that variable to the additive space, to avoid splitting our workflow in two. For `pr` the \"log\" transformation is simply:\n", "\n", "$$ pr' = \\ln\\left(pr - b\\right) $$\n", "\n", "Where $b$ is the lower bound, here 0 mm/d. However, we could have exact zeros (0 mm/d) in the datasets, which will translate into $-\\infty$. To avoid this, we simply replace the smallest values by a random distribution of very small, but not problematic, values. In the following, all values below 0.1 mm/d are replaced by a uniform random distribution of values within the range (0, 0.1) mm/d (bounds excluded).\n", "\n", - "Finally, the variables are stacked together into a single `DataArray`." + "Finally, the variables are stacked together into a single `DataArray`.\n" ] }, { @@ -492,9 +473,10 @@ "metadata": {}, "source": [ "### 2. Get residuals and trends\n", - "The adjustment will be performed on residuals only. The adjusted timeseries `sim` will be detrended with the LOESS routine described above. Because of the short length of `ref` and `hist` and the potential boundary effects of using LOESS with them, we compute the 30-year mean. In other words, instead of _detrending_ inputs, we are _normalizing_ those inputs.\n", "\n", - "While the residuals are adjusted with `PrincipalComponents` and `EmpiricalQuantileMapping`, the trend of `sim` still needs to be offset according to the means of `ref` and `hist`. This is similar to what `DetrendedQuantileMapping` does. The offset step could have been done on the trend itself or at the end on `scen`, it doesn't really matter. We do it here because it keeps it close to where the `scaling` is computed." + "The adjustment will be performed on residuals only. The adjusted timeseries `sim` will be detrended with the LOESS routine described above. Because of the short length of `ref` and `hist` and the potential boundary effects of using LOESS with them, we compute the 30-year mean. In other words, instead of _detrending_ inputs, we are _normalizing_ those inputs.\n", + "\n", + "While the residuals are adjusted with `PrincipalComponents` and `EmpiricalQuantileMapping`, the trend of `sim` still needs to be offset according to the means of `ref` and `hist`. This is similar to what `DetrendedQuantileMapping` does. The offset step could have been done on the trend itself or at the end on `scen`, it doesn't really matter. We do it here because it keeps it close to where the `scaling` is computed.\n" ] }, { @@ -504,9 +486,7 @@ "outputs": [], "source": [ "ref_res, ref_norm = sdba.processing.normalize(ref, group=group, kind=\"+\")\n", - "hist_res, hist_norm = sdba.processing.normalize(\n", - " sim.sel(time=slice(\"1981\", \"2010\")), group=group, kind=\"+\"\n", - ")\n", + "hist_res, hist_norm = sdba.processing.normalize(sim.sel(time=slice(\"1981\", \"2010\")), group=group, kind=\"+\")\n", "scaling = sdba.utils.get_correction(hist_norm, ref_norm, kind=\"+\")" ] }, @@ -516,9 +496,7 @@ "metadata": {}, "outputs": [], "source": [ - "sim_scaled = sdba.utils.apply_correction(\n", - " sim, sdba.utils.broadcast(scaling, sim, group=group), kind=\"+\"\n", - ")\n", + "sim_scaled = sdba.utils.apply_correction(sim, sdba.utils.broadcast(scaling, sim, group=group), kind=\"+\")\n", "\n", "loess = sdba.detrending.LoessDetrend(group=group, f=0.2, d=0, kind=\"+\", niter=1)\n", "simfit = loess.fit(sim_scaled)\n", @@ -530,7 +508,8 @@ "metadata": {}, "source": [ "### 3. Adjustments\n", - "Following, Alavoine et Grenier (2022), we decided to perform the multivariate Principal Components adjustment first and then re-adjust with the simple Quantile-Mapping." + "\n", + "Following, Alavoine et Grenier (2022), we decided to perform the multivariate Principal Components adjustment first and then re-adjust with the simple Quantile-Mapping.\n" ] }, { @@ -539,9 +518,7 @@ "metadata": {}, "outputs": [], "source": [ - "PCA = sdba.adjustment.PrincipalComponents.train(\n", - " ref_res, hist_res, group=group, crd_dim=\"multivar\", best_orientation=\"simple\"\n", - ")\n", + "PCA = sdba.adjustment.PrincipalComponents.train(ref_res, hist_res, group=group, crd_dim=\"multivar\", best_orientation=\"simple\")\n", "\n", "scen1_res = PCA.adjust(sim_res)" ] @@ -560,7 +537,7 @@ " kind=\"+\",\n", ")\n", "\n", - "scen2_res = EQM.adjust(scen1_res, interp=\"linear\", extrapolation=\"constant\")" + "scen2_res = EQM.adjust(scen1_res, extrapolation=\"constant\")" ] }, { @@ -568,7 +545,8 @@ "metadata": {}, "source": [ "### 4. Re-trend and transform back to the physical space\n", - "Add back the trend (which includes the scaling), unstack the variables to a dataset and transform `pr` back to the physical space. All functions have conserved and handled the attributes, so we don't need to repeat the additive space bounds. The annual cycle of both variables on the reference period in Vancouver is plotted to confirm the adjustment adds a positive effect." + "\n", + "Add back the trend (which includes the scaling), unstack the variables to a dataset and transform `pr` back to the physical space. All functions have conserved and handled the attributes, so we don't need to repeat the additive space bounds. The annual cycle of both variables on the reference period in Vancouver is plotted to confirm the adjustment adds a positive effect.\n" ] }, { @@ -588,15 +566,9 @@ "metadata": {}, "outputs": [], "source": [ - "dref.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", - " \"time.dayofyear\"\n", - ").mean().plot(label=\"obs\")\n", - "dsim.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", - " \"time.dayofyear\"\n", - ").mean().plot(label=\"raw\")\n", - "dscen.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", - " \"time.dayofyear\"\n", - ").mean().plot(label=\"scen\")\n", + "dref.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"obs\")\n", + "dsim.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"raw\")\n", + "dscen.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"scen\")\n", "plt.legend()" ] }, @@ -606,15 +578,9 @@ "metadata": {}, "outputs": [], "source": [ - "dref.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", - " \"time.dayofyear\"\n", - ").mean().plot(label=\"obs\")\n", - "dsim.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", - " \"time.dayofyear\"\n", - ").mean().plot(label=\"raw\")\n", - "dscen.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", - " \"time.dayofyear\"\n", - ").mean().plot(label=\"scen\")\n", + "dref.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"obs\")\n", + "dsim.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"raw\")\n", + "dscen.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"scen\")\n", "plt.legend()" ] }, @@ -622,14 +588,14 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Frequency adaption with a rolling window" + "# Frequency adaption with a rolling window\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In the previous example, we performed bias adjustment with a rolling window. Here we show how to include frequency adaptation (see `sdba.ipynb` for the simple case `group=\"time\"`). We first generate the same precipitation dataset used in `sdba.ipynb`" + "In the previous example, we performed bias adjustment with a rolling window. Here we show how to include frequency adaptation (see `sdba.ipynb` for the simple case `group=\"time\"`). We first generate the same precipitation dataset used in `sdba.ipynb`\n" ] }, { @@ -645,19 +611,11 @@ "\n", "vals = np.random.randint(0, 1000, size=(t.size,)) / 100\n", "vals_ref = (4 ** np.where(vals < 9, vals / 100, vals)) / 3e6\n", - "vals_sim = (\n", - " (1 + 0.1 * np.random.random_sample((t.size,)))\n", - " * (4 ** np.where(vals < 9.5, vals / 100, vals))\n", - " / 3e6\n", - ")\n", + "vals_sim = (1 + 0.1 * np.random.random_sample((t.size,))) * (4 ** np.where(vals < 9.5, vals / 100, vals)) / 3e6\n", "\n", - "pr_ref = xr.DataArray(\n", - " vals_ref, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"}\n", - ")\n", + "pr_ref = xr.DataArray(vals_ref, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"})\n", "pr_ref = pr_ref.sel(time=slice(\"2000\", \"2015\"))\n", - "pr_sim = xr.DataArray(\n", - " vals_sim, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"}\n", - ")\n", + "pr_sim = xr.DataArray(vals_sim, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"})\n", "pr_hist = pr_sim.sel(time=slice(\"2000\", \"2015\"))" ] }, @@ -665,7 +623,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Bias adjustment on a rolling window can be performed in the same way as shown in `sdba.ipynb`, but instead of being a single string precising the time grouping (e.g. `time.month`), the `group` argument is built with `sdba.Grouper` function" + "Bias adjustment on a rolling window can be performed in the same way as shown in `sdba.ipynb`, but instead of being a single string precising the time grouping (e.g. `time.month`), the `group` argument is built with `sdba.Grouper` function\n" ] }, { @@ -680,12 +638,8 @@ "from xclim import sdba\n", "\n", "group = sdba.Grouper(\"time.dayofyear\", window=31)\n", - "hist_ad, pth, dP0 = sdba.processing.adapt_freq(\n", - " pr_ref, pr_hist, thresh=\"0.05 mm d-1\", group=group\n", - ")\n", - "QM_ad = sdba.EmpiricalQuantileMapping.train(\n", - " pr_ref, hist_ad, nquantiles=15, kind=\"*\", group=group\n", - ")\n", + "hist_ad, pth, dP0 = sdba.processing.adapt_freq(pr_ref, pr_hist, thresh=\"0.05 mm d-1\", group=group)\n", + "QM_ad = sdba.EmpiricalQuantileMapping.train(pr_ref, hist_ad, nquantiles=15, kind=\"*\", group=group)\n", "scen_ad = QM_ad.adjust(pr_sim)\n", "\n", "pr_ref.sel(time=\"2010\").plot(alpha=0.9, label=\"Reference\")\n", @@ -698,9 +652,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "In the figure above, `scen` occasionally has small peaks where `sim` is 0, indicating that there are more \"dry days\" (days with almost no precipitation) in `hist` than in `ref`. The frequency-adaptation [Themeßl et al. (2010)](https://doi.org/10.1007/s10584-011-0224-4) performed in the step above only worked partially. \n", + "In the figure above, `scen` occasionally has small peaks where `sim` is 0, indicating that there are more \"dry days\" (days with almost no precipitation) in `hist` than in `ref`. The frequency-adaptation [Themeßl et al. (2010)](https://doi.org/10.1007/s10584-011-0224-4) performed in the step above only worked partially.\n", "\n", - "The reason for this is the following. The first step above combines precipitations in 365 overlapping blocks of 31 days * Y years, one block for each day of the year. Each block is adapted, and the 16th day-of-year slice (at the center of the block) is assigned to the corresponding day-of-year in the adapted dataset `hist_ad`. As we proceed to the training, we re-form those 31 days * Y years blocks, but this step does not invert the last one: There can still be more zeroes in the simulation than in the reference. \n", + "The reason for this is the following. The first step above combines precipitations in 365 overlapping blocks of 31 days _ Y years, one block for each day of the year. Each block is adapted, and the 16th day-of-year slice (at the center of the block) is assigned to the corresponding day-of-year in the adapted dataset `hist_ad`. As we proceed to the training, we re-form those 31 days _ Y years blocks, but this step does not invert the last one: There can still be more zeroes in the simulation than in the reference.\n", "\n", "To alleviate this issue, another way of proceeding is to perform a frequency adaptation on the blocks, and then use the same blocks in the training step, as we show below.\n" ] @@ -738,9 +692,9 @@ "\n", "It can be useful to perform diagnostic tests on adjusted simulations to assess if the bias correction method is working properly, or to compare two different bias correction techniques.\n", "\n", - "A diagnostic test includes calculations of a property (mean, 20-year return value, annual cycle amplitude, ...) on the simulation and on the scenario (adjusted simulation), then a measure (bias, relative bias, ratio, ...) of the difference. Usually, the property collapse the time dimension of the simulation/scenario and returns one value by grid point.\n", + "A diagnostic test includes calculations of a property (mean, 20-year return value, annual cycle amplitude, ...) on the simulation and on the scenario (adjusted simulation), then a measure (bias, relative bias, ratio, ...) of the difference. Usually, the property collapse the time dimension of the simulation/scenario and returns one value by grid point.\n", "\n", - "You'll find those in ``xclim.sdba.properties`` and ``xclim.sdba.measures``, where they are implemented as special subclasses of xclim's ``Indicator``, which means they can be worked with the same way as conventional indicators (used in YAML modules for example)." + "You'll find those in `xclim.sdba.properties` and `xclim.sdba.measures`, where they are implemented as special subclasses of xclim's `Indicator`, which means they can be worked with the same way as conventional indicators (used in YAML modules for example).\n" ] }, { @@ -757,20 +711,14 @@ "# load test data\n", "hist = open_dataset(\"sdba/CanESM2_1950-2100.nc\").sel(time=slice(\"1950\", \"1980\")).tasmax\n", "ref = open_dataset(\"sdba/nrcan_1950-2013.nc\").sel(time=slice(\"1950\", \"1980\")).tasmax\n", - "sim = (\n", - " open_dataset(\"sdba/CanESM2_1950-2100.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax\n", - ") # biased\n", + "sim = open_dataset(\"sdba/CanESM2_1950-2100.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax # biased\n", "\n", "# learn the bias in historical simulation compared to reference\n", - "QM = sdba.EmpiricalQuantileMapping.train(\n", - " ref, hist, nquantiles=50, group=\"time\", kind=\"+\"\n", - ")\n", + "QM = sdba.EmpiricalQuantileMapping.train(ref, hist, nquantiles=50, group=\"time\", kind=\"+\")\n", "\n", "# correct the bias in the future\n", "scen = QM.adjust(sim, extrapolation=\"constant\", interp=\"nearest\")\n", - "ref_future = (\n", - " open_dataset(\"sdba/nrcan_1950-2013.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax\n", - ") # truth\n", + "ref_future = open_dataset(\"sdba/nrcan_1950-2013.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax # truth\n", "\n", "plt.figure(figsize=(15, 5))\n", "lw = 0.3\n", @@ -789,18 +737,12 @@ "outputs": [], "source": [ "# calculate the mean warm Spell Length Distribution\n", - "sim_prop = sdba.properties.spell_length_distribution(\n", - " da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\"\n", - ")\n", + "sim_prop = sdba.properties.spell_length_distribution(da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\")\n", "\n", "\n", - "scen_prop = sdba.properties.spell_length_distribution(\n", - " da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\"\n", - ")\n", + "scen_prop = sdba.properties.spell_length_distribution(da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\")\n", "\n", - "ref_prop = sdba.properties.spell_length_distribution(\n", - " da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\"\n", - ")\n", + "ref_prop = sdba.properties.spell_length_distribution(da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\")\n", "# measure the difference between the prediction and the reference with an absolute bias of the properties\n", "measure_sim = sdba.measures.bias(sim_prop, ref_prop)\n", "measure_scen = sdba.measures.bias(scen_prop, ref_prop)\n", @@ -808,9 +750,7 @@ "plt.figure(figsize=(5, 3))\n", "plt.plot(measure_sim.location, measure_sim.values, \".\", label=\"biased model (sim)\")\n", "plt.plot(measure_scen.location, measure_scen.values, \".\", label=\"adjusted model (scen)\")\n", - "plt.title(\n", - " \"Bias of the mean of the warm spell \\n length distribution compared to observations\"\n", - ")\n", + "plt.title(\"Bias of the mean of the warm spell \\n length distribution compared to observations\")\n", "plt.legend()\n", "plt.ylim(-2.5, 2.5)" ] @@ -820,7 +760,7 @@ "metadata": {}, "source": [ "It is possible the change the 'group' of the property from 'time' to 'time.season' or 'time.month'.\n", - " This will return 4 or 12 values per grid point, respectively." + "This will return 4 or 12 values per grid point, respectively.\n" ] }, { @@ -830,17 +770,11 @@ "outputs": [], "source": [ "# calculate the mean warm Spell Length Distribution\n", - "sim_prop = sdba.properties.spell_length_distribution(\n", - " da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\"\n", - ")\n", + "sim_prop = sdba.properties.spell_length_distribution(da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\")\n", "\n", - "scen_prop = sdba.properties.spell_length_distribution(\n", - " da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\"\n", - ")\n", + "scen_prop = sdba.properties.spell_length_distribution(da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\")\n", "\n", - "ref_prop = sdba.properties.spell_length_distribution(\n", - " da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\"\n", - ")\n", + "ref_prop = sdba.properties.spell_length_distribution(da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\")\n", "# Properties are often associated with the same measures. This correspondence is implemented in xclim:\n", "measure = sdba.properties.spell_length_distribution.get_measure()\n", "measure_sim = measure(sim_prop, ref_prop)\n", @@ -849,9 +783,7 @@ "fig, axs = plt.subplots(2, 2, figsize=(9, 6))\n", "axs = axs.ravel()\n", "for i in range(4):\n", - " axs[i].plot(\n", - " measure_sim.location, measure_sim.values[:, i], \".\", label=\"biased model (sim)\"\n", - " )\n", + " axs[i].plot(measure_sim.location, measure_sim.values[:, i], \".\", label=\"biased model (sim)\")\n", " axs[i].plot(\n", " measure_scen.location,\n", " measure_scen.isel(season=i).values,\n", @@ -861,9 +793,7 @@ " axs[i].set_title(measure_scen.season.values[i])\n", " axs[i].legend(loc=\"lower right\")\n", " axs[i].set_ylim(-2.5, 2.5)\n", - "fig.suptitle(\n", - " \"Bias of the mean of the warm spell length distribution compared to observations\"\n", - ")\n", + "fig.suptitle(\"Bias of the mean of the warm spell length distribution compared to observations\")\n", "plt.tight_layout()" ] } From cf45fd87ae1ef9c47b9432ce54baee87a902e7fe Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Jan 2025 15:32:53 +0000 Subject: [PATCH 19/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- docs/notebooks/sdba-advanced.ipynb | 136 ++++++++++++++++++++++------- 1 file changed, 103 insertions(+), 33 deletions(-) diff --git a/docs/notebooks/sdba-advanced.ipynb b/docs/notebooks/sdba-advanced.ipynb index e1216a185..3e55fe3c8 100644 --- a/docs/notebooks/sdba-advanced.ipynb +++ b/docs/notebooks/sdba-advanced.ipynb @@ -96,7 +96,9 @@ "# Plot nearest neighbors and weighing function\n", "wax = ax.twinx()\n", "wax.plot(tas.time, weights, color=\"indianred\")\n", - "ax.plot(tas.time, tas.where(tas * weights > 0), \"o\", color=\"lightcoral\", fillstyle=\"none\")\n", + "ax.plot(\n", + " tas.time, tas.where(tas * weights > 0), \"o\", color=\"lightcoral\", fillstyle=\"none\"\n", + ")\n", "\n", "ax.plot(ti, ys[366], \"ko\")\n", "wax.set_ylabel(\"Weights\")\n", @@ -203,7 +205,10 @@ "t = xr.cftime_range(\"2000-01-01\", \"2030-12-31\", freq=\"D\", calendar=\"noleap\")\n", "ref = xr.DataArray(\n", " (\n", - " -20 * np.cos(2 * np.pi * t.dayofyear / 365) + 2 * np.random.random_sample((t.size,)) + 273.15 + 0.1 * (t - t[0]).days / 365\n", + " -20 * np.cos(2 * np.pi * t.dayofyear / 365)\n", + " + 2 * np.random.random_sample((t.size,))\n", + " + 273.15\n", + " + 0.1 * (t - t[0]).days / 365\n", " ), # \"warming\" of 1K per decade,\n", " dims=(\"time\",),\n", " coords={\"time\": t},\n", @@ -211,7 +216,10 @@ ")\n", "sim = xr.DataArray(\n", " (\n", - " -18 * np.cos(2 * np.pi * t.dayofyear / 365) + 2 * np.random.random_sample((t.size,)) + 273.15 + 0.11 * (t - t[0]).days / 365\n", + " -18 * np.cos(2 * np.pi * t.dayofyear / 365)\n", + " + 2 * np.random.random_sample((t.size,))\n", + " + 273.15\n", + " + 0.11 * (t - t[0]).days / 365\n", " ), # \"warming\" of 1.1K per decade\n", " dims=(\"time\",),\n", " coords={\"time\": t},\n", @@ -230,7 +238,9 @@ "source": [ "from xclim.sdba.adjustment import QuantileDeltaMapping\n", "\n", - "QDM = QuantileDeltaMapping.train(ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\")\n", + "QDM = QuantileDeltaMapping.train(\n", + " ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\"\n", + ")\n", "QDM" ] }, @@ -317,7 +327,9 @@ "from xclim import set_options\n", "\n", "with set_options(sdba_extra_output=True):\n", - " QDM = QuantileDeltaMapping.train(ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\")\n", + " QDM = QuantileDeltaMapping.train(\n", + " ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\"\n", + " )\n", " out = QDM.adjust(sim)\n", "\n", "out.sim_q" @@ -357,7 +369,9 @@ "metadata": {}, "outputs": [], "source": [ - "QDM = QuantileDeltaMapping.train(ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\")\n", + "QDM = QuantileDeltaMapping.train(\n", + " ref, hist, nquantiles=15, kind=\"+\", group=\"time.dayofyear\"\n", + ")\n", "\n", "scen_nowin = QDM.adjust(sim)" ] @@ -418,7 +432,9 @@ "\n", "group = sdba.Grouper(\"time.dayofyear\", window=31)\n", "\n", - "dref = convert_calendar(open_dataset(\"sdba/ahccd_1950-2013.nc\"), \"noleap\").sel(time=slice(\"1981\", \"2010\"))\n", + "dref = convert_calendar(open_dataset(\"sdba/ahccd_1950-2013.nc\"), \"noleap\").sel(\n", + " time=slice(\"1981\", \"2010\")\n", + ")\n", "dsim = open_dataset(\"sdba/CanESM2_1950-2100.nc\")\n", "\n", "dref = dref.assign(\n", @@ -486,7 +502,9 @@ "outputs": [], "source": [ "ref_res, ref_norm = sdba.processing.normalize(ref, group=group, kind=\"+\")\n", - "hist_res, hist_norm = sdba.processing.normalize(sim.sel(time=slice(\"1981\", \"2010\")), group=group, kind=\"+\")\n", + "hist_res, hist_norm = sdba.processing.normalize(\n", + " sim.sel(time=slice(\"1981\", \"2010\")), group=group, kind=\"+\"\n", + ")\n", "scaling = sdba.utils.get_correction(hist_norm, ref_norm, kind=\"+\")" ] }, @@ -496,7 +514,9 @@ "metadata": {}, "outputs": [], "source": [ - "sim_scaled = sdba.utils.apply_correction(sim, sdba.utils.broadcast(scaling, sim, group=group), kind=\"+\")\n", + "sim_scaled = sdba.utils.apply_correction(\n", + " sim, sdba.utils.broadcast(scaling, sim, group=group), kind=\"+\"\n", + ")\n", "\n", "loess = sdba.detrending.LoessDetrend(group=group, f=0.2, d=0, kind=\"+\", niter=1)\n", "simfit = loess.fit(sim_scaled)\n", @@ -518,7 +538,9 @@ "metadata": {}, "outputs": [], "source": [ - "PCA = sdba.adjustment.PrincipalComponents.train(ref_res, hist_res, group=group, crd_dim=\"multivar\", best_orientation=\"simple\")\n", + "PCA = sdba.adjustment.PrincipalComponents.train(\n", + " ref_res, hist_res, group=group, crd_dim=\"multivar\", best_orientation=\"simple\"\n", + ")\n", "\n", "scen1_res = PCA.adjust(sim_res)" ] @@ -566,9 +588,15 @@ "metadata": {}, "outputs": [], "source": [ - "dref.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"obs\")\n", - "dsim.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"raw\")\n", - "dscen.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"scen\")\n", + "dref.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", + " \"time.dayofyear\"\n", + ").mean().plot(label=\"obs\")\n", + "dsim.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", + " \"time.dayofyear\"\n", + ").mean().plot(label=\"raw\")\n", + "dscen.tasmax.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", + " \"time.dayofyear\"\n", + ").mean().plot(label=\"scen\")\n", "plt.legend()" ] }, @@ -578,9 +606,15 @@ "metadata": {}, "outputs": [], "source": [ - "dref.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"obs\")\n", - "dsim.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"raw\")\n", - "dscen.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\"time.dayofyear\").mean().plot(label=\"scen\")\n", + "dref.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", + " \"time.dayofyear\"\n", + ").mean().plot(label=\"obs\")\n", + "dsim.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", + " \"time.dayofyear\"\n", + ").mean().plot(label=\"raw\")\n", + "dscen.pr.sel(time=slice(\"1981\", \"2010\"), location=\"Vancouver\").groupby(\n", + " \"time.dayofyear\"\n", + ").mean().plot(label=\"scen\")\n", "plt.legend()" ] }, @@ -611,11 +645,19 @@ "\n", "vals = np.random.randint(0, 1000, size=(t.size,)) / 100\n", "vals_ref = (4 ** np.where(vals < 9, vals / 100, vals)) / 3e6\n", - "vals_sim = (1 + 0.1 * np.random.random_sample((t.size,))) * (4 ** np.where(vals < 9.5, vals / 100, vals)) / 3e6\n", + "vals_sim = (\n", + " (1 + 0.1 * np.random.random_sample((t.size,)))\n", + " * (4 ** np.where(vals < 9.5, vals / 100, vals))\n", + " / 3e6\n", + ")\n", "\n", - "pr_ref = xr.DataArray(vals_ref, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"})\n", + "pr_ref = xr.DataArray(\n", + " vals_ref, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"}\n", + ")\n", "pr_ref = pr_ref.sel(time=slice(\"2000\", \"2015\"))\n", - "pr_sim = xr.DataArray(vals_sim, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"})\n", + "pr_sim = xr.DataArray(\n", + " vals_sim, coords={\"time\": t}, dims=(\"time\",), attrs={\"units\": \"mm/day\"}\n", + ")\n", "pr_hist = pr_sim.sel(time=slice(\"2000\", \"2015\"))" ] }, @@ -638,8 +680,12 @@ "from xclim import sdba\n", "\n", "group = sdba.Grouper(\"time.dayofyear\", window=31)\n", - "hist_ad, pth, dP0 = sdba.processing.adapt_freq(pr_ref, pr_hist, thresh=\"0.05 mm d-1\", group=group)\n", - "QM_ad = sdba.EmpiricalQuantileMapping.train(pr_ref, hist_ad, nquantiles=15, kind=\"*\", group=group)\n", + "hist_ad, pth, dP0 = sdba.processing.adapt_freq(\n", + " pr_ref, pr_hist, thresh=\"0.05 mm d-1\", group=group\n", + ")\n", + "QM_ad = sdba.EmpiricalQuantileMapping.train(\n", + " pr_ref, hist_ad, nquantiles=15, kind=\"*\", group=group\n", + ")\n", "scen_ad = QM_ad.adjust(pr_sim)\n", "\n", "pr_ref.sel(time=\"2010\").plot(alpha=0.9, label=\"Reference\")\n", @@ -711,14 +757,20 @@ "# load test data\n", "hist = open_dataset(\"sdba/CanESM2_1950-2100.nc\").sel(time=slice(\"1950\", \"1980\")).tasmax\n", "ref = open_dataset(\"sdba/nrcan_1950-2013.nc\").sel(time=slice(\"1950\", \"1980\")).tasmax\n", - "sim = open_dataset(\"sdba/CanESM2_1950-2100.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax # biased\n", + "sim = (\n", + " open_dataset(\"sdba/CanESM2_1950-2100.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax\n", + ") # biased\n", "\n", "# learn the bias in historical simulation compared to reference\n", - "QM = sdba.EmpiricalQuantileMapping.train(ref, hist, nquantiles=50, group=\"time\", kind=\"+\")\n", + "QM = sdba.EmpiricalQuantileMapping.train(\n", + " ref, hist, nquantiles=50, group=\"time\", kind=\"+\"\n", + ")\n", "\n", "# correct the bias in the future\n", "scen = QM.adjust(sim, extrapolation=\"constant\", interp=\"nearest\")\n", - "ref_future = open_dataset(\"sdba/nrcan_1950-2013.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax # truth\n", + "ref_future = (\n", + " open_dataset(\"sdba/nrcan_1950-2013.nc\").sel(time=slice(\"1980\", \"2010\")).tasmax\n", + ") # truth\n", "\n", "plt.figure(figsize=(15, 5))\n", "lw = 0.3\n", @@ -737,12 +789,18 @@ "outputs": [], "source": [ "# calculate the mean warm Spell Length Distribution\n", - "sim_prop = sdba.properties.spell_length_distribution(da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\")\n", + "sim_prop = sdba.properties.spell_length_distribution(\n", + " da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\"\n", + ")\n", "\n", "\n", - "scen_prop = sdba.properties.spell_length_distribution(da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\")\n", + "scen_prop = sdba.properties.spell_length_distribution(\n", + " da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\"\n", + ")\n", "\n", - "ref_prop = sdba.properties.spell_length_distribution(da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\")\n", + "ref_prop = sdba.properties.spell_length_distribution(\n", + " da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time\"\n", + ")\n", "# measure the difference between the prediction and the reference with an absolute bias of the properties\n", "measure_sim = sdba.measures.bias(sim_prop, ref_prop)\n", "measure_scen = sdba.measures.bias(scen_prop, ref_prop)\n", @@ -750,7 +808,9 @@ "plt.figure(figsize=(5, 3))\n", "plt.plot(measure_sim.location, measure_sim.values, \".\", label=\"biased model (sim)\")\n", "plt.plot(measure_scen.location, measure_scen.values, \".\", label=\"adjusted model (scen)\")\n", - "plt.title(\"Bias of the mean of the warm spell \\n length distribution compared to observations\")\n", + "plt.title(\n", + " \"Bias of the mean of the warm spell \\n length distribution compared to observations\"\n", + ")\n", "plt.legend()\n", "plt.ylim(-2.5, 2.5)" ] @@ -770,11 +830,17 @@ "outputs": [], "source": [ "# calculate the mean warm Spell Length Distribution\n", - "sim_prop = sdba.properties.spell_length_distribution(da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\")\n", + "sim_prop = sdba.properties.spell_length_distribution(\n", + " da=sim, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\"\n", + ")\n", "\n", - "scen_prop = sdba.properties.spell_length_distribution(da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\")\n", + "scen_prop = sdba.properties.spell_length_distribution(\n", + " da=scen, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\"\n", + ")\n", "\n", - "ref_prop = sdba.properties.spell_length_distribution(da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\")\n", + "ref_prop = sdba.properties.spell_length_distribution(\n", + " da=ref_future, thresh=\"28 degC\", op=\">\", stat=\"mean\", group=\"time.season\"\n", + ")\n", "# Properties are often associated with the same measures. This correspondence is implemented in xclim:\n", "measure = sdba.properties.spell_length_distribution.get_measure()\n", "measure_sim = measure(sim_prop, ref_prop)\n", @@ -783,7 +849,9 @@ "fig, axs = plt.subplots(2, 2, figsize=(9, 6))\n", "axs = axs.ravel()\n", "for i in range(4):\n", - " axs[i].plot(measure_sim.location, measure_sim.values[:, i], \".\", label=\"biased model (sim)\")\n", + " axs[i].plot(\n", + " measure_sim.location, measure_sim.values[:, i], \".\", label=\"biased model (sim)\"\n", + " )\n", " axs[i].plot(\n", " measure_scen.location,\n", " measure_scen.isel(season=i).values,\n", @@ -793,7 +861,9 @@ " axs[i].set_title(measure_scen.season.values[i])\n", " axs[i].legend(loc=\"lower right\")\n", " axs[i].set_ylim(-2.5, 2.5)\n", - "fig.suptitle(\"Bias of the mean of the warm spell length distribution compared to observations\")\n", + "fig.suptitle(\n", + " \"Bias of the mean of the warm spell length distribution compared to observations\"\n", + ")\n", "plt.tight_layout()" ] } From 2849533e2ab305681fdf1ec5229f4795a2e5c5ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=89ric=20Dupuis?= <71575674+coxipi@users.noreply.github.com> Date: Tue, 4 Feb 2025 10:07:47 -0500 Subject: [PATCH 20/21] remove warning msg for quantile interp --- src/xclim/sdba/_adjustment.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/xclim/sdba/_adjustment.py b/src/xclim/sdba/_adjustment.py index 291659e12..f1ef5772b 100644 --- a/src/xclim/sdba/_adjustment.py +++ b/src/xclim/sdba/_adjustment.py @@ -588,13 +588,6 @@ def qdm_adjust(ds: xr.Dataset, *, group, interp, extrapolation, kind) -> xr.Data sim : Data to adjust. """ sim_q = group.apply(u.rank, ds.sim, main_only=True, pct=True) - if group.prop and interp != "nearest": - - msg = ( - f"Using a {interp} interpolation with QuantileDeltaMapping might create sudden jumps between different" - " groups. See discussion https://github.com/Ouranosinc/xclim/discussions/2048 for more information." - ) - warnings.warn(msg) af = u.interp_on_quantiles( sim_q, ds.quantiles, From f518971994a87763e1b6ddbbd22f2f4e642cace6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Feb 2025 15:08:40 +0000 Subject: [PATCH 21/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/xclim/sdba/_adjustment.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/xclim/sdba/_adjustment.py b/src/xclim/sdba/_adjustment.py index f1ef5772b..4398cec0e 100644 --- a/src/xclim/sdba/_adjustment.py +++ b/src/xclim/sdba/_adjustment.py @@ -8,7 +8,6 @@ from __future__ import annotations -import warnings from collections.abc import Callable, Sequence import numpy as np