From 53fdfcac1dbd0d882f27cecc51af5a26b264dd34 Mon Sep 17 00:00:00 2001 From: Mathias Hauser Date: Mon, 15 Jan 2024 22:49:34 +0100 Subject: [PATCH] fix and test empty CFTimeIndex (#8600) * fix empty cftimeindex repr * switch to None * require cftime * fix empty cftimindex * add tests * none not a string in repr * make it explicit * explicitely test dtype of date fields * set date_field dtype * use repr fstring to avoid conditional * whats new entry * Apply suggestions from code review Co-authored-by: Spencer Clark --------- Co-authored-by: Spencer Clark --- doc/whats-new.rst | 3 +- xarray/coding/cftimeindex.py | 23 +++++++-- xarray/tests/test_cftimeindex.py | 80 ++++++++++++++++++++++++++++++-- xarray/tests/test_formatting.py | 17 ++++++- 4 files changed, 115 insertions(+), 8 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index f9d308171a9..db32de3c9cd 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -69,7 +69,8 @@ Bug fixes By `Kai Mühlbauer `_. - Vendor `SerializableLock` from dask and use as default lock for netcdf4 backends (:issue:`8442`, :pull:`8571`). By `Kai Mühlbauer `_. - +- Add tests and fixes for empty :py:class:`CFTimeIndex`, including broken html repr (:issue:`7298`, :pull:`8600`). + By `Mathias Hauser `_. Documentation ~~~~~~~~~~~~~ diff --git a/xarray/coding/cftimeindex.py b/xarray/coding/cftimeindex.py index b38d815187d..bddcea97787 100644 --- a/xarray/coding/cftimeindex.py +++ b/xarray/coding/cftimeindex.py @@ -187,7 +187,7 @@ def _parsed_string_to_bounds(date_type, resolution, parsed): def get_date_field(datetimes, field): """Adapted from pandas.tslib.get_date_field""" - return np.array([getattr(date, field) for date in datetimes]) + return np.array([getattr(date, field) for date in datetimes], dtype=np.int64) def _field_accessor(name, docstring=None, min_cftime_version="0.0"): @@ -272,8 +272,8 @@ def format_attrs(index, separator=", "): attrs = { "dtype": f"'{index.dtype}'", "length": f"{len(index)}", - "calendar": f"'{index.calendar}'", - "freq": f"'{index.freq}'" if len(index) >= 3 else None, + "calendar": f"{index.calendar!r}", + "freq": f"{index.freq!r}", } attrs_str = [f"{k}={v}" for k, v in attrs.items()] @@ -630,6 +630,10 @@ def to_datetimeindex(self, unsafe=False): >>> times.to_datetimeindex() DatetimeIndex(['2000-01-01', '2000-01-02'], dtype='datetime64[ns]', freq=None) """ + + if not self._data.size: + return pd.DatetimeIndex([]) + nptimes = cftime_to_nptime(self) calendar = infer_calendar_name(self) if calendar not in _STANDARD_CALENDARS and not unsafe: @@ -679,6 +683,9 @@ def asi8(self): """Convert to integers with units of microseconds since 1970-01-01.""" from xarray.core.resample_cftime import exact_cftime_datetime_difference + if not self._data.size: + return np.array([], dtype=np.int64) + epoch = self.date_type(1970, 1, 1) return np.array( [ @@ -693,6 +700,9 @@ def calendar(self): """The calendar used by the datetimes in the index.""" from xarray.coding.times import infer_calendar_name + if not self._data.size: + return None + return infer_calendar_name(self) @property @@ -700,12 +710,19 @@ def freq(self): """The frequency used by the dates in the index.""" from xarray.coding.frequencies import infer_freq + # min 3 elemtents required to determine freq + if self._data.size < 3: + return None + return infer_freq(self) def _round_via_method(self, freq, method): """Round dates using a specified method.""" from xarray.coding.cftime_offsets import CFTIME_TICKS, to_offset + if not self._data.size: + return CFTimeIndex(np.array(self)) + offset = to_offset(freq) if not isinstance(offset, CFTIME_TICKS): raise ValueError(f"{offset} is a non-fixed frequency") diff --git a/xarray/tests/test_cftimeindex.py b/xarray/tests/test_cftimeindex.py index e09fe2461ce..062756e614b 100644 --- a/xarray/tests/test_cftimeindex.py +++ b/xarray/tests/test_cftimeindex.py @@ -238,28 +238,57 @@ def test_assert_all_valid_date_type(date_type, index): ) def test_cftimeindex_field_accessors(index, field, expected): result = getattr(index, field) + expected = np.array(expected, dtype=np.int64) assert_array_equal(result, expected) + assert result.dtype == expected.dtype + + +@requires_cftime +@pytest.mark.parametrize( + ("field"), + [ + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "dayofyear", + "dayofweek", + "days_in_month", + ], +) +def test_empty_cftimeindex_field_accessors(field): + index = CFTimeIndex([]) + result = getattr(index, field) + expected = np.array([], dtype=np.int64) + assert_array_equal(result, expected) + assert result.dtype == expected.dtype @requires_cftime def test_cftimeindex_dayofyear_accessor(index): result = index.dayofyear - expected = [date.dayofyr for date in index] + expected = np.array([date.dayofyr for date in index], dtype=np.int64) assert_array_equal(result, expected) + assert result.dtype == expected.dtype @requires_cftime def test_cftimeindex_dayofweek_accessor(index): result = index.dayofweek - expected = [date.dayofwk for date in index] + expected = np.array([date.dayofwk for date in index], dtype=np.int64) assert_array_equal(result, expected) + assert result.dtype == expected.dtype @requires_cftime def test_cftimeindex_days_in_month_accessor(index): result = index.days_in_month - expected = [date.daysinmonth for date in index] + expected = np.array([date.daysinmonth for date in index], dtype=np.int64) assert_array_equal(result, expected) + assert result.dtype == expected.dtype @requires_cftime @@ -959,6 +988,31 @@ def test_cftimeindex_calendar_property(calendar, expected): assert index.calendar == expected +@requires_cftime +def test_empty_cftimeindex_calendar_property(): + index = CFTimeIndex([]) + assert index.calendar is None + + +@requires_cftime +@pytest.mark.parametrize( + "calendar", + [ + "noleap", + "365_day", + "360_day", + "julian", + "gregorian", + "standard", + "proleptic_gregorian", + ], +) +def test_cftimeindex_freq_property_none_size_lt_3(calendar): + for periods in range(3): + index = xr.cftime_range(start="2000", periods=periods, calendar=calendar) + assert index.freq is None + + @requires_cftime @pytest.mark.parametrize( ("calendar", "expected"), @@ -1152,6 +1206,18 @@ def test_rounding_methods_against_datetimeindex(freq, method): assert result.equals(expected) +@requires_cftime +@pytest.mark.parametrize("method", ["floor", "ceil", "round"]) +def test_rounding_methods_empty_cftimindex(method): + index = CFTimeIndex([]) + result = getattr(index, method)("2s") + + expected = CFTimeIndex([]) + + assert result.equals(expected) + assert result is not index + + @requires_cftime @pytest.mark.parametrize("method", ["floor", "ceil", "round"]) def test_rounding_methods_invalid_freq(method): @@ -1230,6 +1296,14 @@ def test_asi8_distant_date(): np.testing.assert_array_equal(result, expected) +@requires_cftime +def test_asi8_empty_cftimeindex(): + index = xr.CFTimeIndex([]) + result = index.asi8 + expected = np.array([], dtype=np.int64) + np.testing.assert_array_equal(result, expected) + + @requires_cftime def test_infer_freq_valid_types(): cf_indx = xr.cftime_range("2000-01-01", periods=3, freq="D") diff --git a/xarray/tests/test_formatting.py b/xarray/tests/test_formatting.py index 181b0205352..6ed4103aef7 100644 --- a/xarray/tests/test_formatting.py +++ b/xarray/tests/test_formatting.py @@ -10,7 +10,7 @@ import xarray as xr from xarray.core import formatting -from xarray.tests import requires_dask, requires_netCDF4 +from xarray.tests import requires_cftime, requires_dask, requires_netCDF4 class TestFormatting: @@ -803,3 +803,18 @@ def test_format_xindexes(as_dataset: bool) -> None: actual = repr(obj.xindexes) assert actual == expected + + +@requires_cftime +def test_empty_cftimeindex_repr() -> None: + index = xr.coding.cftimeindex.CFTimeIndex([]) + + expected = """\ + Indexes: + time CFTimeIndex([], dtype='object', length=0, calendar=None, freq=None)""" + expected = dedent(expected) + + da = xr.DataArray([], coords={"time": index}) + + actual = repr(da.indexes) + assert actual == expected