Skip to content

Commit

Permalink
TST (string dtype): resolve xfails in pandas/tests/series (pandas-dev…
Browse files Browse the repository at this point in the history
…#60233)

* TST (string dtype): resolve xfails in pandas/tests/series

* a few more

* link TODO to issue

* fix for non-future mode
  • Loading branch information
jorisvandenbossche authored Nov 8, 2024
1 parent f9d2e50 commit 3f7bc81
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 56 deletions.
4 changes: 0 additions & 4 deletions pandas/tests/series/accessors/test_dt_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas._libs.tslibs.timezones import maybe_get_tz

from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -556,7 +554,6 @@ def test_strftime(self):
)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_strftime_dt64_days(self):
ser = Series(date_range("20130101", periods=5))
ser.iloc[0] = pd.NaT
Expand All @@ -571,7 +568,6 @@ def test_strftime_dt64_days(self):

expected = Index(
["2015/03/01", "2015/03/02", "2015/03/03", "2015/03/04", "2015/03/05"],
dtype=np.object_,
)
# dtype may be S10 or U10 depending on python version
tm.assert_index_equal(result, expected)
Expand Down
21 changes: 15 additions & 6 deletions pandas/tests/series/indexing/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.errors import IndexingError

from pandas import (
Expand Down Expand Up @@ -251,18 +249,29 @@ def test_slice(string_series, object_series):
tm.assert_series_equal(string_series, original)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_timedelta_assignment():
# GH 8209
s = Series([], dtype=object)
s.loc["B"] = timedelta(1)
tm.assert_series_equal(s, Series(Timedelta("1 days"), index=["B"]))
expected = Series(
Timedelta("1 days"), dtype="timedelta64[ns]", index=Index(["B"], dtype=object)
)
tm.assert_series_equal(s, expected)

s = s.reindex(s.index.insert(0, "A"))
tm.assert_series_equal(s, Series([np.nan, Timedelta("1 days")], index=["A", "B"]))
expected = Series(
[np.nan, Timedelta("1 days")],
dtype="timedelta64[ns]",
index=Index(["A", "B"], dtype=object),
)
tm.assert_series_equal(s, expected)

s.loc["A"] = timedelta(1)
expected = Series(Timedelta("1 days"), index=["A", "B"])
expected = Series(
Timedelta("1 days"),
dtype="timedelta64[ns]",
index=Index(["A", "B"], dtype=object),
)
tm.assert_series_equal(s, expected)


Expand Down
47 changes: 28 additions & 19 deletions pandas/tests/series/indexing/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,7 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
HAS_PYARROW,
WASM,
)
from pandas.compat import WASM
from pandas.compat.numpy import np_version_gte1p24
from pandas.errors import IndexingError

Expand All @@ -32,6 +27,7 @@
NaT,
Period,
Series,
StringDtype,
Timedelta,
Timestamp,
array,
Expand Down Expand Up @@ -535,14 +531,16 @@ def test_append_timedelta_does_not_cast(self, td, using_infer_string, request):
tm.assert_series_equal(ser, expected)
assert isinstance(ser["td"], Timedelta)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_setitem_with_expansion_type_promotion(self):
# GH#12599
ser = Series(dtype=object)
ser["a"] = Timestamp("2016-01-01")
ser["b"] = 3.0
ser["c"] = "foo"
expected = Series([Timestamp("2016-01-01"), 3.0, "foo"], index=["a", "b", "c"])
expected = Series(
[Timestamp("2016-01-01"), 3.0, "foo"],
index=Index(["a", "b", "c"], dtype=object),
)
tm.assert_series_equal(ser, expected)

def test_setitem_not_contained(self, string_series):
Expand Down Expand Up @@ -826,11 +824,6 @@ def test_mask_key(self, obj, key, expected, raises, val, indexer_sli):
else:
indexer_sli(obj)[mask] = val

@pytest.mark.xfail(
using_string_dtype() and not HAS_PYARROW,
reason="TODO(infer_string)",
strict=False,
)
def test_series_where(self, obj, key, expected, raises, val, is_inplace):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True
Expand All @@ -846,6 +839,11 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace):
obj = obj.copy()
arr = obj._values

if raises and obj.dtype == "string":
with pytest.raises(TypeError, match="Invalid value"):
obj.where(~mask, val)
return

res = obj.where(~mask, val)

if val is NA and res.dtype == object:
Expand All @@ -858,25 +856,23 @@ def test_series_where(self, obj, key, expected, raises, val, is_inplace):

self._check_inplace(is_inplace, orig, arr, obj)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_index_where(self, obj, key, expected, raises, val, using_infer_string):
def test_index_where(self, obj, key, expected, raises, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if using_infer_string and obj.dtype == object:
if raises and obj.dtype == "string":
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).where(~mask, val)
else:
res = Index(obj).where(~mask, val)
expected_idx = Index(expected, dtype=expected.dtype)
tm.assert_index_equal(res, expected_idx)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
def test_index_putmask(self, obj, key, expected, raises, val, using_infer_string):
def test_index_putmask(self, obj, key, expected, raises, val):
mask = np.zeros(obj.shape, dtype=bool)
mask[key] = True

if using_infer_string and obj.dtype == object:
if raises and obj.dtype == "string":
with pytest.raises(TypeError, match="Invalid value"):
Index(obj).putmask(mask, val)
else:
Expand Down Expand Up @@ -1372,6 +1368,19 @@ def raises(self):
return False


@pytest.mark.parametrize(
"val,exp_dtype,raises",
[
(1, object, True),
("e", StringDtype(na_value=np.nan), False),
],
)
class TestCoercionString(CoercionTest):
@pytest.fixture
def obj(self):
return Series(["a", "b", "c", "d"], dtype=StringDtype(na_value=np.nan))


@pytest.mark.parametrize(
"val,exp_dtype,raises",
[
Expand Down
17 changes: 7 additions & 10 deletions pandas/tests/series/indexing/test_where.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.core.dtypes.common import is_integer

import pandas as pd
Expand Down Expand Up @@ -231,7 +229,6 @@ def test_where_ndframe_align():
tm.assert_series_equal(out, expected)


@pytest.mark.xfail(using_string_dtype(), reason="can't set ints into string")
def test_where_setitem_invalid():
# GH 2702
# make sure correct exceptions are raised on invalid list assignment
Expand All @@ -241,7 +238,7 @@ def test_where_setitem_invalid():
"different length than the value"
)
# slice
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[0:3] = list(range(27))
Expand All @@ -251,18 +248,18 @@ def test_where_setitem_invalid():
tm.assert_series_equal(s.astype(np.int64), expected)

# slice with step
s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[0:4:2] = list(range(27))

s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)
s[0:4:2] = list(range(2))
expected = Series([0, "b", 1, "d", "e", "f"])
tm.assert_series_equal(s, expected)

# neg slices
s = Series(list("abcdef"))
s = Series(list("abcdef"), dtype=object)

with pytest.raises(ValueError, match=msg("slice")):
s[:-1] = list(range(27))
Expand All @@ -272,18 +269,18 @@ def test_where_setitem_invalid():
tm.assert_series_equal(s, expected)

# list
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(27))

s = Series(list("abc"))
s = Series(list("abc"), dtype=object)

with pytest.raises(ValueError, match=msg("list-like")):
s[[0, 1, 2]] = list(range(2))

# scalar
s = Series(list("abc"))
s = Series(list("abc"), dtype=object)
s[0] = list(range(10))
expected = Series([list(range(10)), "b", "c"])
tm.assert_series_equal(s, expected)
Expand Down
29 changes: 16 additions & 13 deletions pandas/tests/series/methods/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
import pandas._testing as tm
from pandas.core.arrays import IntervalArray
Expand Down Expand Up @@ -628,15 +626,23 @@ def test_replace_nullable_numeric(self):
with pytest.raises(TypeError, match="Invalid value"):
ints.replace(1, 9.5)

@pytest.mark.xfail(using_string_dtype(), reason="can't fill 1 in string")
@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series(self, regex):
# GH-48644
series = pd.Series(["0"])
series = pd.Series(["0"], dtype=object)
expected = pd.Series([1], dtype=object)
result = series.replace(to_replace="0", value=1, regex=regex)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("regex", [False, True])
def test_replace_regex_dtype_series_string(self, regex, using_infer_string):
if not using_infer_string:
# then this is object dtype which is already tested above
return
series = pd.Series(["0"], dtype="str")
with pytest.raises(TypeError, match="Invalid value"):
series.replace(to_replace="0", value=1, regex=regex)

def test_replace_different_int_types(self, any_int_numpy_dtype):
# GH#45311
labs = pd.Series([1, 1, 1, 0, 0, 2, 2, 2], dtype=any_int_numpy_dtype)
Expand All @@ -656,21 +662,18 @@ def test_replace_value_none_dtype_numeric(self, val):
expected = pd.Series([1, None], dtype=object)
tm.assert_series_equal(result, expected)

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_replace_change_dtype_series(self, using_infer_string):
def test_replace_change_dtype_series(self):
# GH#25797
df = pd.DataFrame.from_dict({"Test": ["0.5", True, "0.6"]})
warn = FutureWarning if using_infer_string else None
with tm.assert_produces_warning(warn, match="Downcasting"):
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame.from_dict({"Test": ["0.5", np.nan, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", True, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([True], [np.nan])
expected = pd.DataFrame({"Test": ["0.5", np.nan, "0.6"]}, dtype=object)
tm.assert_frame_equal(df, expected)

df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].replace([None], [np.nan])
tm.assert_frame_equal(df, expected)

df = pd.DataFrame.from_dict({"Test": ["0.5", None, "0.6"]})
df = pd.DataFrame({"Test": ["0.5", None, "0.6"]}, dtype=object)
df["Test"] = df["Test"].fillna(np.nan)
tm.assert_frame_equal(df, expected)

Expand Down
5 changes: 1 addition & 4 deletions pandas/tests/series/methods/test_unstack.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

import pandas as pd
from pandas import (
DataFrame,
Expand Down Expand Up @@ -136,11 +134,10 @@ def test_unstack_mixed_type_name_in_multiindex(
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_unstack_multi_index_categorical_values():
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
columns=Index(list("ABCD")),
index=date_range("2000-01-01", periods=10, freq="B"),
)
mi = df.stack().index.rename(["major", "minor"])
Expand Down
1 change: 1 addition & 0 deletions pandas/tests/series/test_logical_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def test_logical_ops_label_based(self, using_infer_string):
for e in [Series(["z"])]:
if using_infer_string:
# TODO(infer_string) should this behave differently?
# -> https://github.com/pandas-dev/pandas/issues/60234
with pytest.raises(
TypeError, match="not supported for dtype|unsupported operand type"
):
Expand Down

0 comments on commit 3f7bc81

Please sign in to comment.