Skip to content

Commit

Permalink
unify freq strings (independent of pd version)
Browse files Browse the repository at this point in the history
  • Loading branch information
mathause committed Jan 19, 2024
1 parent 357a444 commit 4a3ad22
Show file tree
Hide file tree
Showing 7 changed files with 69 additions and 39 deletions.
75 changes: 54 additions & 21 deletions xarray/coding/cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -746,7 +746,7 @@ def _emit_freq_deprecation_warning(deprecated_freq):
emit_user_level_warning(message, FutureWarning)


def to_offset(freq):
def to_offset(freq, warn=True):
"""Convert a frequency string to the appropriate subclass of
BaseCFTimeOffset."""
if isinstance(freq, BaseCFTimeOffset):
Expand All @@ -758,7 +758,7 @@ def to_offset(freq):
raise ValueError("Invalid frequency string provided")

freq = freq_data["freq"]
if freq in _DEPRECATED_FREQUENICES:
if warn and freq in _DEPRECATED_FREQUENICES:
_emit_freq_deprecation_warning(freq)
multiples = freq_data["multiple"]
multiples = 1 if multiples is None else int(multiples)
Expand Down Expand Up @@ -1226,7 +1226,8 @@ def date_range(
start=start,
end=end,
periods=periods,
freq=freq,
# TODO remove translation once requiring pandas >= 2.2
freq=_new_to_legacy_freq(freq),
tz=tz,
normalize=normalize,
name=name,
Expand Down Expand Up @@ -1254,6 +1255,54 @@ def date_range(
)


def _new_to_legacy_freq(freq):
# xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
# frequencies, but older versions of pandas do not support these as
# frequency strings. Until xarray's minimum pandas version is 2.2 or above,
# we add logic to continue using the deprecated "M" and "Q" frequency
# strings in these circumstances.

# TODO: remove once requiring pandas >= 2.2

if Version(pd.__version__) < Version("2.2"):
freq_as_offset = to_offset(freq)
if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
freq = freq.replace("ME", "M")
elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
freq = freq.replace("QE", "Q")
elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
freq = freq.replace("YS", "AS")
elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq:
# Check for and replace "Y-" instead of just "Y" to prevent
# corrupting anchored offsets that contain "Y" in the month
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
freq = freq.replace("Y-", "A-")

return freq


def _legacy_to_new_freq(freq):
# to avoid internal deprecation warnings when freq is determined using pandas < 2.2

# TODO: remove once requiring pandas >= 2.2

if Version(pd.__version__) < Version("2.2"):
freq_as_offset = to_offset(freq, warn=False)
if isinstance(freq_as_offset, MonthEnd) and "ME" not in freq:
freq = freq.replace("M", "ME")
elif isinstance(freq_as_offset, QuarterEnd) and "QE" not in freq:
freq = freq.replace("Q", "QE")
elif isinstance(freq_as_offset, YearBegin) and "YS" not in freq:
freq = freq.replace("AS", "YS")
elif isinstance(freq_as_offset, YearEnd) and "Y-" not in freq:
# Check for and replace "Y-" instead of just "Y" to prevent
# corrupting anchored offsets that contain "Y" in the month
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
freq = freq.replace("A-", "Y-")

return freq


def date_range_like(source, calendar, use_cftime=None):
"""Generate a datetime array with the same frequency, start and end as
another one, but in a different calendar.
Expand Down Expand Up @@ -1298,24 +1347,8 @@ def date_range_like(source, calendar, use_cftime=None):
"`date_range_like` was unable to generate a range as the source frequency was not inferable."
)

# xarray will now always return "ME" and "QE" for MonthEnd and QuarterEnd
# frequencies, but older versions of pandas do not support these as
# frequency strings. Until xarray's minimum pandas version is 2.2 or above,
# we add logic to continue using the deprecated "M" and "Q" frequency
# strings in these circumstances.
if Version(pd.__version__) < Version("2.2"):
freq_as_offset = to_offset(freq)
if isinstance(freq_as_offset, MonthEnd) and "ME" in freq:
freq = freq.replace("ME", "M")
elif isinstance(freq_as_offset, QuarterEnd) and "QE" in freq:
freq = freq.replace("QE", "Q")
elif isinstance(freq_as_offset, YearBegin) and "YS" in freq:
freq = freq.replace("YS", "AS")
elif isinstance(freq_as_offset, YearEnd) and "Y-" in freq:
# Check for and replace "Y-" instead of just "Y" to prevent
# corrupting anchored offsets that contain "Y" in the month
# abbreviation, e.g. "Y-MAY" -> "A-MAY".
freq = freq.replace("Y-", "A-")
# TODO remove once requiring pandas >= 2.2
freq = _legacy_to_new_freq(freq)

use_cftime = _should_cftime_be_used(source, calendar, use_cftime)

Expand Down
4 changes: 3 additions & 1 deletion xarray/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
import numpy as np
import pandas as pd

from xarray.coding.cftime_offsets import _new_to_legacy_freq
from xarray.core import dtypes, duck_array_ops, nputils, ops
from xarray.core._aggregations import (
DataArrayGroupByAggregations,
Expand Down Expand Up @@ -532,7 +533,8 @@ def __post_init__(self) -> None:
)
else:
index_grouper = pd.Grouper(
freq=grouper.freq,
# TODO remove once requiring pandas >= 2.2
freq=_new_to_legacy_freq(grouper.freq),
closed=grouper.closed,
label=grouper.label,
origin=grouper.origin,
Expand Down
17 changes: 6 additions & 11 deletions xarray/tests/test_cftime_offsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -1313,7 +1313,7 @@ def test_calendar_year_length(
assert len(result) == expected_number_of_days


@pytest.mark.parametrize("freq", ["Y", "M", "D"])
@pytest.mark.parametrize("freq", ["Y", "ME", "D"])
def test_dayofweek_after_cftime_range(freq: str) -> None:
result = cftime_range("2000-02-01", periods=3, freq=freq).dayofweek
expected = pd.date_range("2000-02-01", periods=3, freq=freq).dayofweek

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 all-but-dask

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9 min-all-deps

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.9

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.10 flaky

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.12

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / ubuntu-latest py3.11

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.9

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.12

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME

Check failure on line 1319 in xarray/tests/test_cftime_offsets.py

View workflow job for this annotation

GitHub Actions / macos-latest py3.11

test_dayofweek_after_cftime_range[ME] ValueError: Invalid frequency: ME
Expand Down Expand Up @@ -1392,29 +1392,24 @@ def test_date_range_errors() -> None:
)
def test_date_range_like(start, freq, cal_src, cal_tgt, use_cftime, exp0, exp_pd):
expected_xarray_freq = freq
expected_pandas_freq = freq

# pandas changed what is returned for infer_freq in version 2.2. The
# development version of xarray follows this, but we need to adapt this test
# to still handle older versions of pandas.
if Version(pd.__version__) < Version("2.2"):
if "ME" in freq:
freq = freq.replace("ME", "M")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("ME", "M")
elif "QE" in freq:
freq = freq.replace("QE", "Q")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("QE", "Q")
elif "YS" in freq:
freq = freq.replace("YS", "AS")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("YS", "AS")
elif "Y-" in freq:
freq = freq.replace("Y-", "A-")
expected_pandas_freq = freq
expected_pandas_freq = expected_pandas_freq.replace("Y-", "A-")
elif "h" in freq:
expected_pandas_freq = freq.replace("h", "H")
else:
raise ValueError(f"Test not implemented for freq {freq!r}")
else:
expected_pandas_freq = freq

source = date_range(start, periods=12, freq=freq, calendar=cal_src)

Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -6945,7 +6945,7 @@ def test_differentiate_datetime(dask) -> None:
@pytest.mark.parametrize("dask", [True, False])
def test_differentiate_cftime(dask) -> None:
rs = np.random.RandomState(42)
coord = xr.cftime_range("2000", periods=8, freq="2M")
coord = xr.cftime_range("2000", periods=8, freq="2ME")

da = xr.DataArray(
rs.randn(8, 6),
Expand Down
6 changes: 3 additions & 3 deletions xarray/tests/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1754,19 +1754,19 @@ def test_resample_doctest(self, use_cftime: bool) -> None:
time=(
"time",
xr.date_range(
"2001-01-01", freq="M", periods=6, use_cftime=use_cftime
"2001-01-01", freq="ME", periods=6, use_cftime=use_cftime
),
),
labels=("time", np.array(["a", "b", "c", "c", "b", "a"])),
),
)
actual = da.resample(time="3M").count()
actual = da.resample(time="3ME").count()
expected = DataArray(
[1, 3, 1],
dims="time",
coords={
"time": xr.date_range(
"2001-01-01", freq="3M", periods=3, use_cftime=use_cftime
"2001-01-01", freq="3ME", periods=3, use_cftime=use_cftime
)
},
)
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ def test_get_clean_interp_index_cf_calendar(cf_da, calendar):

@requires_cftime
@pytest.mark.parametrize(
("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1M", "1Y"])
("calendar", "freq"), zip(["gregorian", "proleptic_gregorian"], ["1D", "1ME", "1Y"])
)
def test_get_clean_interp_index_dt(cf_da, calendar, freq):
"""In the gregorian case, the index should be proportional to normal datetimes."""
Expand Down
2 changes: 1 addition & 1 deletion xarray/tests/test_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -2955,7 +2955,7 @@ def setUp(self) -> None:
"""
# case for 1d array
data = np.random.rand(4, 12)
time = xr.cftime_range(start="2017", periods=12, freq="1M", calendar="noleap")
time = xr.cftime_range(start="2017", periods=12, freq="1ME", calendar="noleap")
darray = DataArray(data, dims=["x", "time"])
darray.coords["time"] = time

Expand Down

0 comments on commit 4a3ad22

Please sign in to comment.