Skip to content

Commit

Permalink
fix and test empty CFTimeIndex (#8600)
Browse files Browse the repository at this point in the history
* fix empty cftimeindex repr

* switch to None

* require cftime

* fix empty cftimindex

* add tests

* none not a string in repr

* make it explicit

* explicitely test dtype of date fields

* set date_field dtype

* use repr fstring to avoid conditional

* whats new entry

* Apply suggestions from code review

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>

---------

Co-authored-by: Spencer Clark <spencerkclark@gmail.com>
  • Loading branch information
mathause and spencerkclark authored Jan 15, 2024
1 parent 357a444 commit 53fdfca
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 8 deletions.
3 changes: 2 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ Bug fixes
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Vendor `SerializableLock` from dask and use as default lock for netcdf4 backends (:issue:`8442`, :pull:`8571`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.

- Add tests and fixes for empty :py:class:`CFTimeIndex`, including broken html repr (:issue:`7298`, :pull:`8600`).
By `Mathias Hauser <https://github.com/mathause>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
23 changes: 20 additions & 3 deletions xarray/coding/cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def _parsed_string_to_bounds(date_type, resolution, parsed):

def get_date_field(datetimes, field):
"""Adapted from pandas.tslib.get_date_field"""
return np.array([getattr(date, field) for date in datetimes])
return np.array([getattr(date, field) for date in datetimes], dtype=np.int64)


def _field_accessor(name, docstring=None, min_cftime_version="0.0"):
Expand Down Expand Up @@ -272,8 +272,8 @@ def format_attrs(index, separator=", "):
attrs = {
"dtype": f"'{index.dtype}'",
"length": f"{len(index)}",
"calendar": f"'{index.calendar}'",
"freq": f"'{index.freq}'" if len(index) >= 3 else None,
"calendar": f"{index.calendar!r}",
"freq": f"{index.freq!r}",
}

attrs_str = [f"{k}={v}" for k, v in attrs.items()]
Expand Down Expand Up @@ -630,6 +630,10 @@ def to_datetimeindex(self, unsafe=False):
>>> times.to_datetimeindex()
DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None)
"""

if not self._data.size:
return pd.DatetimeIndex([])

nptimes = cftime_to_nptime(self)
calendar = infer_calendar_name(self)
if calendar not in _STANDARD_CALENDARS and not unsafe:
Expand Down Expand Up @@ -679,6 +683,9 @@ def asi8(self):
"""Convert to integers with units of microseconds since 1970-01-01."""
from xarray.core.resample_cftime import exact_cftime_datetime_difference

if not self._data.size:
return np.array([], dtype=np.int64)

epoch = self.date_type(1970, 1, 1)
return np.array(
[
Expand All @@ -693,19 +700,29 @@ def calendar(self):
"""The calendar used by the datetimes in the index."""
from xarray.coding.times import infer_calendar_name

if not self._data.size:
return None

return infer_calendar_name(self)

@property
def freq(self):
"""The frequency used by the dates in the index."""
from xarray.coding.frequencies import infer_freq

# min 3 elemtents required to determine freq
if self._data.size < 3:
return None

return infer_freq(self)

def _round_via_method(self, freq, method):
"""Round dates using a specified method."""
from xarray.coding.cftime_offsets import CFTIME_TICKS, to_offset

if not self._data.size:
return CFTimeIndex(np.array(self))

offset = to_offset(freq)
if not isinstance(offset, CFTIME_TICKS):
raise ValueError(f"{offset} is a non-fixed frequency")
Expand Down
80 changes: 77 additions & 3 deletions xarray/tests/test_cftimeindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,28 +238,57 @@ def test_assert_all_valid_date_type(date_type, index):
)
def test_cftimeindex_field_accessors(index, field, expected):
result = getattr(index, field)
expected = np.array(expected, dtype=np.int64)
assert_array_equal(result, expected)
assert result.dtype == expected.dtype


@requires_cftime
@pytest.mark.parametrize(
("field"),
[
"year",
"month",
"day",
"hour",
"minute",
"second",
"microsecond",
"dayofyear",
"dayofweek",
"days_in_month",
],
)
def test_empty_cftimeindex_field_accessors(field):
index = CFTimeIndex([])
result = getattr(index, field)
expected = np.array([], dtype=np.int64)
assert_array_equal(result, expected)
assert result.dtype == expected.dtype


@requires_cftime
def test_cftimeindex_dayofyear_accessor(index):
result = index.dayofyear
expected = [date.dayofyr for date in index]
expected = np.array([date.dayofyr for date in index], dtype=np.int64)
assert_array_equal(result, expected)
assert result.dtype == expected.dtype


@requires_cftime
def test_cftimeindex_dayofweek_accessor(index):
result = index.dayofweek
expected = [date.dayofwk for date in index]
expected = np.array([date.dayofwk for date in index], dtype=np.int64)
assert_array_equal(result, expected)
assert result.dtype == expected.dtype


@requires_cftime
def test_cftimeindex_days_in_month_accessor(index):
result = index.days_in_month
expected = [date.daysinmonth for date in index]
expected = np.array([date.daysinmonth for date in index], dtype=np.int64)
assert_array_equal(result, expected)
assert result.dtype == expected.dtype


@requires_cftime
Expand Down Expand Up @@ -959,6 +988,31 @@ def test_cftimeindex_calendar_property(calendar, expected):
assert index.calendar == expected


@requires_cftime
def test_empty_cftimeindex_calendar_property():
index = CFTimeIndex([])
assert index.calendar is None


@requires_cftime
@pytest.mark.parametrize(
"calendar",
[
"noleap",
"365_day",
"360_day",
"julian",
"gregorian",
"standard",
"proleptic_gregorian",
],
)
def test_cftimeindex_freq_property_none_size_lt_3(calendar):
for periods in range(3):
index = xr.cftime_range(start="2000", periods=periods, calendar=calendar)
assert index.freq is None


@requires_cftime
@pytest.mark.parametrize(
("calendar", "expected"),
Expand Down Expand Up @@ -1152,6 +1206,18 @@ def test_rounding_methods_against_datetimeindex(freq, method):
assert result.equals(expected)


@requires_cftime
@pytest.mark.parametrize("method", ["floor", "ceil", "round"])
def test_rounding_methods_empty_cftimindex(method):
index = CFTimeIndex([])
result = getattr(index, method)("2s")

expected = CFTimeIndex([])

assert result.equals(expected)
assert result is not index


@requires_cftime
@pytest.mark.parametrize("method", ["floor", "ceil", "round"])
def test_rounding_methods_invalid_freq(method):
Expand Down Expand Up @@ -1230,6 +1296,14 @@ def test_asi8_distant_date():
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_asi8_empty_cftimeindex():
index = xr.CFTimeIndex([])
result = index.asi8
expected = np.array([], dtype=np.int64)
np.testing.assert_array_equal(result, expected)


@requires_cftime
def test_infer_freq_valid_types():
cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D")
Expand Down
17 changes: 16 additions & 1 deletion xarray/tests/test_formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import xarray as xr
from xarray.core import formatting
from xarray.tests import requires_dask, requires_netCDF4
from xarray.tests import requires_cftime, requires_dask, requires_netCDF4


class TestFormatting:
Expand Down Expand Up @@ -803,3 +803,18 @@ def test_format_xindexes(as_dataset: bool) -> None:

actual = repr(obj.xindexes)
assert actual == expected


@requires_cftime
def test_empty_cftimeindex_repr() -> None:
index = xr.coding.cftimeindex.CFTimeIndex([])

expected = """\
Indexes:
time CFTimeIndex([], dtype='object', length=0, calendar=None, freq=None)"""
expected = dedent(expected)

da = xr.DataArray([], coords={"time": index})

actual = repr(da.indexes)
assert actual == expected

0 comments on commit 53fdfca

Please sign in to comment.