From fd15c0bf60db82e69e9becd2d65c6cf5e6937031 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Thu, 26 Sep 2024 17:25:34 +0200 Subject: [PATCH] refactor: Another set of new-stream test skip/fixes --- .../src/plans/conversion/expr_expansion.rs | 2 +- .../src/physical_plan/lower_expr.rs | 2 +- .../operations/namespaces/test_categorical.py | 20 +++++-- .../tests/unit/operations/test_transpose.py | 9 +++ .../tests/unit/operations/test_unpivot.py | 56 +++++++++---------- .../tests/unit/sql/test_wildcard_opts.py | 2 +- py-polars/tests/unit/test_datatypes.py | 1 + 7 files changed, 53 insertions(+), 39 deletions(-) diff --git a/crates/polars-plan/src/plans/conversion/expr_expansion.rs b/crates/polars-plan/src/plans/conversion/expr_expansion.rs index b17db4c728d0..4d1aa76caff5 100644 --- a/crates/polars-plan/src/plans/conversion/expr_expansion.rs +++ b/crates/polars-plan/src/plans/conversion/expr_expansion.rs @@ -643,7 +643,7 @@ fn find_flags(expr: &Expr) -> PolarsResult { #[cfg(feature = "dtype-struct")] fn toggle_cse(opt_flags: &mut OptFlags) { - if opt_flags.contains(OptFlags::EAGER) { + if opt_flags.contains(OptFlags::EAGER) && !opt_flags.contains(OptFlags::NEW_STREAMING) { #[cfg(debug_assertions)] { use polars_core::config::verbose; diff --git a/crates/polars-stream/src/physical_plan/lower_expr.rs b/crates/polars-stream/src/physical_plan/lower_expr.rs index 919694e8c538..39493af054c2 100644 --- a/crates/polars-stream/src/physical_plan/lower_expr.rs +++ b/crates/polars-stream/src/physical_plan/lower_expr.rs @@ -348,7 +348,7 @@ fn build_fallback_node_with_ctx( expr, Context::Default, ctx.expr_arena, - None, + Some(&ctx.phys_sm[input_node].output_schema), &mut conv_state, ) }) diff --git a/py-polars/tests/unit/operations/namespaces/test_categorical.py b/py-polars/tests/unit/operations/namespaces/test_categorical.py index 708abf7eed4d..3e491894c18e 100644 --- a/py-polars/tests/unit/operations/namespaces/test_categorical.py +++ b/py-polars/tests/unit/operations/namespaces/test_categorical.py @@ -1,3 +1,5 @@ +import pytest + import polars as pl from polars.testing import assert_frame_equal @@ -58,20 +60,26 @@ def test_categorical_lexical_ordering_after_concat() -> None: } -def test_sort_categoricals_6014() -> None: +@pytest.mark.may_fail_auto_streaming +def test_sort_categoricals_6014_internal() -> None: with pl.StringCache(): # create basic categorical - df1 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( + df = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( pl.col("key").cast(pl.Categorical) ) + + out = df.sort("key") + assert out.to_dict(as_series=False) == {"key": ["bbb", "aaa", "ccc"]} + + +def test_sort_categoricals_6014_lexical() -> None: + with pl.StringCache(): # create lexically-ordered categorical - df2 = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( + df = pl.DataFrame({"key": ["bbb", "aaa", "ccc"]}).with_columns( pl.col("key").cast(pl.Categorical("lexical")) ) - out = df1.sort("key") - assert out.to_dict(as_series=False) == {"key": ["bbb", "aaa", "ccc"]} - out = df2.sort("key") + out = df.sort("key") assert out.to_dict(as_series=False) == {"key": ["aaa", "bbb", "ccc"]} diff --git a/py-polars/tests/unit/operations/test_transpose.py b/py-polars/tests/unit/operations/test_transpose.py index ebb58ac4f2bf..a43a6e7f629e 100644 --- a/py-polars/tests/unit/operations/test_transpose.py +++ b/py-polars/tests/unit/operations/test_transpose.py @@ -13,6 +13,7 @@ from polars.testing import assert_frame_equal, assert_series_equal +@pytest.mark.may_fail_auto_streaming def test_transpose_supertype() -> None: df = pl.DataFrame({"a": [1, 2, 3], "b": ["foo", "bar", "ham"]}) result = df.transpose() @@ -26,6 +27,7 @@ def test_transpose_supertype() -> None: assert_frame_equal(result, expected) +@pytest.mark.may_fail_auto_streaming def test_transpose_tz_naive_and_tz_aware() -> None: df = pl.DataFrame( { @@ -41,6 +43,7 @@ def test_transpose_tz_naive_and_tz_aware() -> None: df.transpose() +@pytest.mark.may_fail_auto_streaming def test_transpose_struct() -> None: df = pl.DataFrame( { @@ -82,6 +85,7 @@ def test_transpose_struct() -> None: assert_frame_equal(result, expected) +@pytest.mark.may_fail_auto_streaming def test_transpose_arguments() -> None: df = pl.DataFrame({"a": [1, 2, 3], "b": [1, 2, 3]}) expected = pl.DataFrame( @@ -136,6 +140,7 @@ def name_generator() -> Iterator[str]: assert_frame_equal(expected, out) +@pytest.mark.may_fail_auto_streaming def test_transpose_categorical_data() -> None: with pl.StringCache(): df = pl.DataFrame( @@ -174,6 +179,7 @@ def test_transpose_categorical_data() -> None: ).transpose() +@pytest.mark.may_fail_auto_streaming def test_transpose_logical_data() -> None: df = pl.DataFrame( { @@ -192,6 +198,7 @@ def test_transpose_logical_data() -> None: assert_frame_equal(result, expected) +@pytest.mark.may_fail_auto_streaming def test_err_transpose_object() -> None: class CustomObject: pass @@ -200,12 +207,14 @@ class CustomObject: pl.DataFrame([CustomObject()]).transpose() +@pytest.mark.may_fail_auto_streaming def test_transpose_name_from_column_13777() -> None: csv_file = io.BytesIO(b"id,kc\nhi,3") df = pl.read_csv(csv_file).transpose(column_names="id") assert_series_equal(df.to_series(0), pl.Series("hi", [3])) +@pytest.mark.may_fail_auto_streaming def test_transpose_multiple_chunks() -> None: df = pl.DataFrame({"a": ["1"]}) expected = pl.DataFrame({"column_0": ["1"], "column_1": ["1"]}) diff --git a/py-polars/tests/unit/operations/test_unpivot.py b/py-polars/tests/unit/operations/test_unpivot.py index 7b51d91122dc..ada642c294ae 100644 --- a/py-polars/tests/unit/operations/test_unpivot.py +++ b/py-polars/tests/unit/operations/test_unpivot.py @@ -7,48 +7,44 @@ def test_unpivot() -> None: df = pl.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]}) + expected = { + ("a", "B", 1), + ("b", "B", 3), + ("c", "B", 5), + ("a", "C", 2), + ("b", "C", 4), + ("c", "C", 6), + } for _idv, _vv in (("A", ("B", "C")), (cs.string(), cs.integer())): unpivoted_eager = df.unpivot(index="A", on=["B", "C"]) - assert all(unpivoted_eager["value"] == [1, 3, 5, 2, 4, 6]) + assert set(unpivoted_eager.iter_rows()) == expected - unpivoted_lazy = df.lazy().unpivot(index="A", on=["B", "C"]) - assert all(unpivoted_lazy.collect()["value"] == [1, 3, 5, 2, 4, 6]) + unpivoted_lazy = df.lazy().unpivot(index="A", on=["B", "C"]).collect() + assert set(unpivoted_lazy.iter_rows()) == expected unpivoted = df.unpivot(index="A", on="B") - assert all(unpivoted["value"] == [1, 3, 5]) - n = 3 - + assert set(unpivoted["value"]) == {1, 3, 5} + + expected_full = { + ("A", "a"), + ("A", "b"), + ("A", "c"), + ("B", "1"), + ("B", "3"), + ("B", "5"), + ("C", "2"), + ("C", "4"), + ("C", "6"), + } for unpivoted in [df.unpivot(), df.lazy().unpivot().collect()]: - assert unpivoted["variable"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert unpivoted["value"].to_list() == [ - "a", - "b", - "c", - "1", - "3", - "5", - "2", - "4", - "6", - ] + assert set(unpivoted.iter_rows()) == expected_full with pytest.deprecated_call(match="unpivot"): for unpivoted in [ df.melt(value_name="foo", variable_name="bar"), df.lazy().melt(value_name="foo", variable_name="bar").collect(), ]: - assert unpivoted["bar"].to_list() == ["A"] * n + ["B"] * n + ["C"] * n - assert unpivoted["foo"].to_list() == [ - "a", - "b", - "c", - "1", - "3", - "5", - "2", - "4", - "6", - ] + assert set(unpivoted.iter_rows()) == expected_full def test_unpivot_projection_pd_7747() -> None: diff --git a/py-polars/tests/unit/sql/test_wildcard_opts.py b/py-polars/tests/unit/sql/test_wildcard_opts.py index e27ce9ac14b3..c31a55d61829 100644 --- a/py-polars/tests/unit/sql/test_wildcard_opts.py +++ b/py-polars/tests/unit/sql/test_wildcard_opts.py @@ -180,6 +180,6 @@ def test_select_wildcard_errors(df: pl.DataFrame) -> None: # note: missing "()" around the exclude option results in dupe col with pytest.raises( DuplicateError, - match="the name 'City' is duplicate", + match="City", ): assert df.sql("SELECT * EXCLUDE Address, City FROM self") diff --git a/py-polars/tests/unit/test_datatypes.py b/py-polars/tests/unit/test_datatypes.py index 9bd545125f64..4d604f2964e9 100644 --- a/py-polars/tests/unit/test_datatypes.py +++ b/py-polars/tests/unit/test_datatypes.py @@ -138,6 +138,7 @@ def test_repr(dtype: PolarsDataType, representation: str) -> None: assert repr(dtype) == representation +@pytest.mark.may_fail_auto_streaming def test_conversion_dtype() -> None: df = ( pl.DataFrame(