From 4a3ad2269b9d6b33ec028acd0b3cdb358663878c Mon Sep 17 00:00:00 2001
From: Mathias Hauser <mathias.hauser@env.ethz.ch>
Date: Fri, 19 Jan 2024 11:55:42 +0100
Subject: [PATCH] unify freq strings (independent of pd version)

---
 xarray/coding/cftime_offsets.py     | 75 +++++++++++++++++++++--------
 xarray/core/groupby.py              |  4 +-
 xarray/tests/test_cftime_offsets.py | 17 +++----
 xarray/tests/test_dataset.py        |  2 +-
 xarray/tests/test_groupby.py        |  6 +--
 xarray/tests/test_missing.py        |  2 +-
 xarray/tests/test_plot.py           |  2 +-
 7 files changed, 69 insertions(+), 39 deletions(-)

diff --git a/xarray/coding/cftime_offsets.py b/xarray/coding/cftime_offsets.py
index 100f3b249d2..77094f4f2c7 100644
--- a/xarray/coding/cftime_offsets.py
+++ b/xarray/coding/cftime_offsets.py
@@ -746,7 +746,7 @@ def _emit_freq_deprecation_warning(deprecated_freq):
     emit_user_level_warning(message, FutureWarning)
 
 
-def to_offset(freq):
+def to_offset(freq, warn=True):
     """Convert a frequency string to the appropriate subclass of
     BaseCFTimeOffset."""
     if isinstance(freq, BaseCFTimeOffset):
@@ -758,7 +758,7 @@ def to_offset(freq):
             raise ValueError("Invalid frequency string provided")
 
     freq = freq_data["freq"]
-    if freq in _DEPRECATED_FREQUENICES:
+    if warn and freq in _DEPRECATED_FREQUENICES:
         _emit_freq_deprecation_warning(freq)
     multiples = freq_data["multiple"]
     multiples = 1 if multiples is None else int(multiples)
@@ -1226,7 +1226,8 @@ def date_range(
                 start=start,
                 end=end,
                 periods=periods,
-                freq=freq,
+                # TODO remove translation once requiring pandas >= 2.2
+                freq=_new_to_legacy_freq(freq),
                 tz=tz,
                 normalize=normalize,
                 name=name,
@@ -1254,6 +1255,54 @@ def date_range(
     )
 
 
+def _new_to_legacy_freq(freq):
+    # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
+    # frequencies, but older versions of pandas do not support these as
+    # frequency strings.  Until xarray's minimum pandas version is 2.2 or above,
+    # we add logic to continue using the deprecated "M" and "Q" frequency
+    # strings in these circumstances.
+
+    # TODO: remove once requiring pandas >= 2.2
+
+    if Version(pd.__version__) < Version("2.2"):
+        freq_as_offset = to_offset(freq)
+        if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
+            freq = freq.replace("ME", "M")
+        elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
+            freq = freq.replace("QE", "Q")
+        elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
+            freq = freq.replace("YS", "AS")
+        elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq:
+            # Check for and replace "Y-" instead of just "Y" to prevent
+            # corrupting anchored offsets that contain "Y" in the month
+            # abbreviation, e.g. "Y-MAY" -> "A-MAY".
+            freq = freq.replace("Y-", "A-")
+
+    return freq
+
+
+def _legacy_to_new_freq(freq):
+    # to avoid internal deprecation warnings when freq is determined using pandas < 2.2
+
+    # TODO: remove once requiring pandas >= 2.2
+
+    if Version(pd.__version__) < Version("2.2"):
+        freq_as_offset = to_offset(freq, warn=False)
+        if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq:
+            freq = freq.replace("M", "ME")
+        elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq:
+            freq = freq.replace("Q", "QE")
+        elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq:
+            freq = freq.replace("AS", "YS")
+        elif isinstance(freq_as_offset, YearEnd) and "Y-" not in freq:
+            # Check for and replace "Y-" instead of just "Y" to prevent
+            # corrupting anchored offsets that contain "Y" in the month
+            # abbreviation, e.g. "Y-MAY" -> "A-MAY".
+            freq = freq.replace("A-", "Y-")
+
+    return freq
+
+
 def date_range_like(source, calendar, use_cftime=None):
     """Generate a datetime array with the same frequency, start and end as
     another one, but in a different calendar.
@@ -1298,24 +1347,8 @@ def date_range_like(source, calendar, use_cftime=None):
             "`date_range_like` was unable to generate a range as the source frequency was not inferable."
         )
 
-    # xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
-    # frequencies, but older versions of pandas do not support these as
-    # frequency strings.  Until xarray's minimum pandas version is 2.2 or above,
-    # we add logic to continue using the deprecated "M" and "Q" frequency
-    # strings in these circumstances.
-    if Version(pd.__version__) < Version("2.2"):
-        freq_as_offset = to_offset(freq)
-        if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
-            freq = freq.replace("ME", "M")
-        elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
-            freq = freq.replace("QE", "Q")
-        elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
-            freq = freq.replace("YS", "AS")
-        elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq:
-            # Check for and replace "Y-" instead of just "Y" to prevent
-            # corrupting anchored offsets that contain "Y" in the month
-            # abbreviation, e.g. "Y-MAY" -> "A-MAY".
-            freq = freq.replace("Y-", "A-")
+    # TODO remove once requiring pandas >= 2.2
+    freq = _legacy_to_new_freq(freq)
 
     use_cftime = _should_cftime_be_used(source, calendar, use_cftime)
 
diff --git a/xarray/core/groupby.py b/xarray/core/groupby.py
index ebb488d42c9..fcc4dcb68fb 100644
--- a/xarray/core/groupby.py
+++ b/xarray/core/groupby.py
@@ -17,6 +17,7 @@
 import numpy as np
 import pandas as pd
 
+from xarray.coding.cftime_offsets import _new_to_legacy_freq
 from xarray.core import dtypes, duck_array_ops, nputils, ops
 from xarray.core._aggregations import (
     DataArrayGroupByAggregations,
@@ -532,7 +533,8 @@ def __post_init__(self) -> None:
             )
         else:
             index_grouper = pd.Grouper(
-                freq=grouper.freq,
+                # TODO remove once requiring pandas >= 2.2
+                freq=_new_to_legacy_freq(grouper.freq),
                 closed=grouper.closed,
                 label=grouper.label,
                 origin=grouper.origin,
diff --git a/xarray/tests/test_cftime_offsets.py b/xarray/tests/test_cftime_offsets.py
index 0ffcb5e8ab9..2aaae9ce11c 100644
--- a/xarray/tests/test_cftime_offsets.py
+++ b/xarray/tests/test_cftime_offsets.py
@@ -1313,7 +1313,7 @@ def test_calendar_year_length(
     assert len(result) == expected_number_of_days
 
 
-@pytest.mark.parametrize("freq", ["Y", "M", "D"])
+@pytest.mark.parametrize("freq", ["Y", "ME", "D"])
 def test_dayofweek_after_cftime_range(freq: str) -> None:
     result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek
     expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek
@@ -1392,29 +1392,24 @@ def test_date_range_errors() -> None:
 )
 def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd):
     expected_xarray_freq = freq
+    expected_pandas_freq = freq
 
     # pandas changed what is returned for infer_freq in version 2.2.  The
     # development version of xarray follows this, but we need to adapt this test
     # to still handle older versions of pandas.
     if Version(pd.__version__) < Version("2.2"):
         if "ME" in freq:
-            freq = freq.replace("ME", "M")
-            expected_pandas_freq = freq
+            expected_pandas_freq = expected_pandas_freq.replace("ME", "M")
         elif "QE" in freq:
-            freq = freq.replace("QE", "Q")
-            expected_pandas_freq = freq
+            expected_pandas_freq = expected_pandas_freq.replace("QE", "Q")
         elif "YS" in freq:
-            freq = freq.replace("YS", "AS")
-            expected_pandas_freq = freq
+            expected_pandas_freq = expected_pandas_freq.replace("YS", "AS")
         elif "Y-" in freq:
-            freq = freq.replace("Y-", "A-")
-            expected_pandas_freq = freq
+            expected_pandas_freq = expected_pandas_freq.replace("Y-", "A-")
         elif "h" in freq:
             expected_pandas_freq = freq.replace("h", "H")
         else:
             raise ValueError(f"Test not implemented for freq {freq!r}")
-    else:
-        expected_pandas_freq = freq
 
     source = date_range(start, periods=12, freq=freq, calendar=cal_src)
 
diff --git a/xarray/tests/test_dataset.py b/xarray/tests/test_dataset.py
index 664d108b89c..bd3e79d0ebe 100644
--- a/xarray/tests/test_dataset.py
+++ b/xarray/tests/test_dataset.py
@@ -6945,7 +6945,7 @@ def test_differentiate_datetime(dask) -> None:
 @pytest.mark.parametrize("dask", [True, False])
 def test_differentiate_cftime(dask) -> None:
     rs = np.random.RandomState(42)
-    coord = xr.cftime_range("2000", periods=8, freq="2M")
+    coord = xr.cftime_range("2000", periods=8, freq="2ME")
 
     da = xr.DataArray(
         rs.randn(8, 6),
diff --git a/xarray/tests/test_groupby.py b/xarray/tests/test_groupby.py
index e45d8ed0bef..72b64f3caae 100644
--- a/xarray/tests/test_groupby.py
+++ b/xarray/tests/test_groupby.py
@@ -1754,19 +1754,19 @@ def test_resample_doctest(self, use_cftime: bool) -> None:
                 time=(
                     "time",
                     xr.date_range(
-                        "2001-01-01", freq="M", periods=6, use_cftime=use_cftime
+                        "2001-01-01", freq="ME", periods=6, use_cftime=use_cftime
                     ),
                 ),
                 labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
             ),
         )
-        actual = da.resample(time="3M").count()
+        actual = da.resample(time="3ME").count()
         expected = DataArray(
             [1, 3, 1],
             dims="time",
             coords={
                 "time": xr.date_range(
-                    "2001-01-01", freq="3M", periods=3, use_cftime=use_cftime
+                    "2001-01-01", freq="3ME", periods=3, use_cftime=use_cftime
                 )
             },
         )
diff --git a/xarray/tests/test_missing.py b/xarray/tests/test_missing.py
index 45a649605f3..5dba4d91025 100644
--- a/xarray/tests/test_missing.py
+++ b/xarray/tests/test_missing.py
@@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar):
 
 @requires_cftime
 @pytest.mark.parametrize(
-    ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"])
+    ("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1ME", "1Y"])
 )
 def test_get_clean_interp_index_dt(cf_da, calendar, freq):
     """In the gregorian case, the index should be proportional to normal datetimes."""
diff --git a/xarray/tests/test_plot.py b/xarray/tests/test_plot.py
index 697db9c5e80..22aad173918 100644
--- a/xarray/tests/test_plot.py
+++ b/xarray/tests/test_plot.py
@@ -2955,7 +2955,7 @@ def setUp(self) -> None:
         """
         # case for 1d array
         data = np.random.rand(4, 12)
-        time = xr.cftime_range(start="2017", periods=12, freq="1M", calendar="noleap")
+        time = xr.cftime_range(start="2017", periods=12, freq="1ME", calendar="noleap")
         darray = DataArray(data, dims=["x", "time"])
         darray.coords["time"] = time