From 4a3ad2269b9d6b33ec028acd0b3cdb358663878c Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Fri, 19 Jan 2024 11:55:42 +0100 Subject: [PATCH] unify freq strings (independent of pd version) --- xarray/coding/cftime_offsets.py | 75 +++++++++++++++++++++-------- xarray/core/groupby.py | 4 +- xarray/tests/test_cftime_offsets.py | 17 +++---- xarray/tests/test_dataset.py | 2 +- xarray/tests/test_groupby.py | 6 +-- xarray/tests/test_missing.py | 2 +- xarray/tests/test_plot.py | 2 +- 7 files changed, 69 insertions(+), 39 deletions(-) diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py index 100f3b249d2..77094f4f2c7 100644 --- a/xarray/coding/cftime_offsets.py +++ b/xarray/coding/cftime_offsets.py @@ -746,7 +746,7 @@ def _emit_freq_deprecation_warning(deprecated_freq): emit_user_level_warning(message, FutureWarning) -def to_offset(freq): +def to_offset(freq, warn=True): """Convert a frequency string to the appropriate subclass of BaseCFTimeOffset.""" if isinstance(freq, BaseCFTimeOffset): @@ -758,7 +758,7 @@ def to_offset(freq): raise ValueError("Invalid frequency string provided") freq = freq_data["freq"] - if freq in _DEPRECATED_FREQUENICES: + if warn and freq in _DEPRECATED_FREQUENICES: _emit_freq_deprecation_warning(freq) multiples = freq_data["multiple"] multiples = 1 if multiples is None else int(multiples) @@ -1226,7 +1226,8 @@ def date_range( start=start, end=end, periods=periods, - freq=freq, + # TODO remove translation once requiring pandas >= 2.2 + freq=_new_to_legacy_freq(freq), tz=tz, normalize=normalize, name=name, @@ -1254,6 +1255,54 @@ def date_range( ) +def _new_to_legacy_freq(freq): + # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd + # frequencies, but older versions of pandas do not support these as + # frequency strings. Until xarray's minimum pandas version is 2.2 or above, + # we add logic to continue using the deprecated "M" and "Q" frequency + # strings in these circumstances. + + # TODO: remove once requiring pandas >= 2.2 + + if Version(pd.__version__) < Version("2.2"): + freq_as_offset = to_offset(freq) + if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: + freq = freq.replace("ME", "M") + elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: + freq = freq.replace("QE", "Q") + elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: + freq = freq.replace("YS", "AS") + elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq: + # Check for and replace "Y-" instead of just "Y" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "Y-MAY" -> "A-MAY". + freq = freq.replace("Y-", "A-") + + return freq + + +def _legacy_to_new_freq(freq): + # to avoid internal deprecation warnings when freq is determined using pandas < 2.2 + + # TODO: remove once requiring pandas >= 2.2 + + if Version(pd.__version__) < Version("2.2"): + freq_as_offset = to_offset(freq, warn=False) + if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq: + freq = freq.replace("M", "ME") + elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq: + freq = freq.replace("Q", "QE") + elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq: + freq = freq.replace("AS", "YS") + elif isinstance(freq_as_offset, YearEnd) and "Y-" not in freq: + # Check for and replace "Y-" instead of just "Y" to prevent + # corrupting anchored offsets that contain "Y" in the month + # abbreviation, e.g. "Y-MAY" -> "A-MAY". + freq = freq.replace("A-", "Y-") + + return freq + + def date_range_like(source, calendar, use_cftime=None): """Generate a datetime array with the same frequency, start and end as another one, but in a different calendar. @@ -1298,24 +1347,8 @@ def date_range_like(source, calendar, use_cftime=None): "`date_range_like` was unable to generate a range as the source frequency was not inferable." ) - # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd - # frequencies, but older versions of pandas do not support these as - # frequency strings. Until xarray's minimum pandas version is 2.2 or above, - # we add logic to continue using the deprecated "M" and "Q" frequency - # strings in these circumstances. - if Version(pd.__version__) < Version("2.2"): - freq_as_offset = to_offset(freq) - if isinstance(freq_as_offset, MonthEnd) and "ME" in freq: - freq = freq.replace("ME", "M") - elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq: - freq = freq.replace("QE", "Q") - elif isinstance(freq_as_offset, YearBegin) and "YS" in freq: - freq = freq.replace("YS", "AS") - elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq: - # Check for and replace "Y-" instead of just "Y" to prevent - # corrupting anchored offsets that contain "Y" in the month - # abbreviation, e.g. "Y-MAY" -> "A-MAY". - freq = freq.replace("Y-", "A-") + # TODO remove once requiring pandas >= 2.2 + freq = _legacy_to_new_freq(freq) use_cftime = _should_cftime_be_used(source, calendar, use_cftime) diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py index ebb488d42c9..fcc4dcb68fb 100644 --- a/xarray/core/groupby.py +++ b/xarray/core/groupby.py @@ -17,6 +17,7 @@ import numpy as np import pandas as pd +from xarray.coding.cftime_offsets import _new_to_legacy_freq from xarray.core import dtypes, duck_array_ops, nputils, ops from xarray.core._aggregations import ( DataArrayGroupByAggregations, @@ -532,7 +533,8 @@ def __post_init__(self) -> None: ) else: index_grouper = pd.Grouper( - freq=grouper.freq, + # TODO remove once requiring pandas >= 2.2 + freq=_new_to_legacy_freq(grouper.freq), closed=grouper.closed, label=grouper.label, origin=grouper.origin, diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py index 0ffcb5e8ab9..2aaae9ce11c 100644 --- a/xarray/tests/test_cftime_offsets.py +++ b/xarray/tests/test_cftime_offsets.py @@ -1313,7 +1313,7 @@ def test_calendar_year_length( assert len(result) == expected_number_of_days -@pytest.mark.parametrize("freq", ["Y", "M", "D"]) +@pytest.mark.parametrize("freq", ["Y", "ME", "D"]) def test_dayofweek_after_cftime_range(freq: str) -> None: result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek @@ -1392,29 +1392,24 @@ def test_date_range_errors() -> None: ) def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd): expected_xarray_freq = freq + expected_pandas_freq = freq # pandas changed what is returned for infer_freq in version 2.2. The # development version of xarray follows this, but we need to adapt this test # to still handle older versions of pandas. if Version(pd.__version__) < Version("2.2"): if "ME" in freq: - freq = freq.replace("ME", "M") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("ME", "M") elif "QE" in freq: - freq = freq.replace("QE", "Q") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("QE", "Q") elif "YS" in freq: - freq = freq.replace("YS", "AS") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("YS", "AS") elif "Y-" in freq: - freq = freq.replace("Y-", "A-") - expected_pandas_freq = freq + expected_pandas_freq = expected_pandas_freq.replace("Y-", "A-") elif "h" in freq: expected_pandas_freq = freq.replace("h", "H") else: raise ValueError(f"Test not implemented for freq {freq!r}") - else: - expected_pandas_freq = freq source = date_range(start, periods=12, freq=freq, calendar=cal_src) diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py index 664d108b89c..bd3e79d0ebe 100644 --- a/xarray/tests/test_dataset.py +++ b/xarray/tests/test_dataset.py @@ -6945,7 +6945,7 @@ def test_differentiate_datetime(dask) -> None: @pytest.mark.parametrize("dask", [True, False]) def test_differentiate_cftime(dask) -> None: rs = np.random.RandomState(42) - coord = xr.cftime_range("2000", periods=8, freq="2M") + coord = xr.cftime_range("2000", periods=8, freq="2ME") da = xr.DataArray( rs.randn(8, 6), diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py index e45d8ed0bef..72b64f3caae 100644 --- a/xarray/tests/test_groupby.py +++ b/xarray/tests/test_groupby.py @@ -1754,19 +1754,19 @@ def test_resample_doctest(self, use_cftime: bool) -> None: time=( "time", xr.date_range( - "2001-01-01", freq="M", periods=6, use_cftime=use_cftime + "2001-01-01", freq="ME", periods=6, use_cftime=use_cftime ), ), labels=("time", np.array(["a", "b", "c", "c", "b", "a"])), ), ) - actual = da.resample(time="3M").count() + actual = da.resample(time="3ME").count() expected = DataArray( [1, 3, 1], dims="time", coords={ "time": xr.date_range( - "2001-01-01", freq="3M", periods=3, use_cftime=use_cftime + "2001-01-01", freq="3ME", periods=3, use_cftime=use_cftime ) }, ) diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py index 45a649605f3..5dba4d91025 100644 --- a/xarray/tests/test_missing.py +++ b/xarray/tests/test_missing.py @@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar): @requires_cftime @pytest.mark.parametrize( - ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"]) + ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1ME", "1Y"]) ) def test_get_clean_interp_index_dt(cf_da, calendar, freq): """In the gregorian case, the index should be proportional to normal datetimes.""" diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py index 697db9c5e80..22aad173918 100644 --- a/xarray/tests/test_plot.py +++ b/xarray/tests/test_plot.py @@ -2955,7 +2955,7 @@ def setUp(self) -> None: """ # case for 1d array data = np.random.rand(4, 12) - time = xr.cftime_range(start="2017", periods=12, freq="1M", calendar="noleap") + time = xr.cftime_range(start="2017", periods=12, freq="1ME", calendar="noleap") darray = DataArray(data, dims=["x", "time"]) darray.coords["time"] = time