From d63da250b3d2e765dfc0809c2c88bb24b0a6ddc9 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 3 Feb 2025 23:00:29 +0100 Subject: [PATCH] [pre-commit.ci] pre-commit autoupdate (#1926) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/astral-sh/ruff-pre-commit: v0.8.6 → v0.9.4](https://github.com/astral-sh/ruff-pre-commit/compare/v0.8.6...v0.9.4) - [github.com/codespell-project/codespell: v2.3.0 → v2.4.1](https://github.com/codespell-project/codespell/compare/v2.3.0...v2.4.1) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix mkdocs * fix precommit --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Edoardo Abati <29585319+EdAbati@users.noreply.github.com> --- .pre-commit-config.yaml | 4 +-- mkdocs.yml | 5 ++- narwhals/_arrow/expr.py | 6 +--- narwhals/_arrow/selectors.py | 6 +--- narwhals/_dask/expr_dt.py | 2 +- narwhals/_dask/expr_str.py | 2 +- narwhals/_dask/selectors.py | 6 +--- narwhals/_duckdb/selectors.py | 2 +- narwhals/_pandas_like/expr.py | 5 +-- narwhals/_pandas_like/selectors.py | 4 +-- narwhals/_spark_like/selectors.py | 2 +- narwhals/dataframe.py | 51 +++++++++++++++++++++++------- narwhals/dtypes.py | 4 ++- narwhals/expr.py | 20 +++++++++--- narwhals/expr_dt.py | 8 +++-- narwhals/expr_name.py | 4 ++- narwhals/expr_str.py | 8 +++-- narwhals/functions.py | 18 ++++++++--- narwhals/group_by.py | 4 ++- narwhals/series.py | 16 +++++++--- narwhals/utils.py | 16 +++++----- pyproject.toml | 1 + tests/utils.py | 10 +++--- tpch/tests/queries_test.py | 6 ++-- 24 files changed, 132 insertions(+), 78 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5e6929f16..84d5a6df0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ ci: repos: - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: 'v0.8.6' + rev: 'v0.9.4' hooks: # Run the formatter. - id: ruff-format @@ -20,7 +20,7 @@ repos: additional_dependencies: ['polars==1.4.1', 'pytest==8.3.2'] files: ^(narwhals|tests)/ - repo: https://github.com/codespell-project/codespell - rev: 'v2.3.0' + rev: 'v2.4.1' hooks: - id: codespell files: \.(py|rst|md)$ diff --git a/mkdocs.yml b/mkdocs.yml index e31b98ab6..129c5d973 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -105,11 +105,10 @@ plugins: - mkdocstrings: handlers: python: - import: + inventories: - https://installer.readthedocs.io/en/stable/objects.inv - rendering: - show_signature_annotations: true options: + show_signature_annotations: true members_order: alphabetical enable_inventory: true diff --git a/narwhals/_arrow/expr.py b/narwhals/_arrow/expr.py index 28c54e573..b0b613494 100644 --- a/narwhals/_arrow/expr.py +++ b/narwhals/_arrow/expr.py @@ -56,11 +56,7 @@ def __init__( self._kwargs = kwargs def __repr__(self: Self) -> str: # pragma: no cover - return ( - f"ArrowExpr(" - f"depth={self._depth}, " - f"function_name={self._function_name}, " - ) + return f"ArrowExpr(depth={self._depth}, function_name={self._function_name}, " def __call__(self: Self, df: ArrowDataFrame) -> Sequence[ArrowSeries]: return self._call(df) diff --git a/narwhals/_arrow/selectors.py b/narwhals/_arrow/selectors.py index c48caeab7..15ce43395 100644 --- a/narwhals/_arrow/selectors.py +++ b/narwhals/_arrow/selectors.py @@ -111,11 +111,7 @@ def func(df: ArrowDataFrame) -> list[ArrowSeries]: class ArrowSelector(ArrowExpr): def __repr__(self: Self) -> str: # pragma: no cover - return ( - f"ArrowSelector(" - f"depth={self._depth}, " - f"function_name={self._function_name})" - ) + return f"ArrowSelector(depth={self._depth}, function_name={self._function_name})" def _to_expr(self: Self) -> ArrowExpr: return ArrowExpr( diff --git a/narwhals/_dask/expr_dt.py b/narwhals/_dask/expr_dt.py index e597805cc..e58e355e7 100644 --- a/narwhals/_dask/expr_dt.py +++ b/narwhals/_dask/expr_dt.py @@ -110,7 +110,7 @@ def weekday(self: Self) -> DaskExpr: def to_string(self: Self, format: str) -> DaskExpr: # noqa: A002 return self._compliant_expr._from_call( - lambda _input, format: _input.dt.strftime(format.replace("%.f", ".%f")), + lambda _input, format: _input.dt.strftime(format.replace("%.f", ".%f")), # noqa: A006 "strftime", format=format, returns_scalar=self._compliant_expr._returns_scalar, diff --git a/narwhals/_dask/expr_str.py b/narwhals/_dask/expr_str.py index 26f2f3ccc..1b4ad1b56 100644 --- a/narwhals/_dask/expr_str.py +++ b/narwhals/_dask/expr_str.py @@ -96,7 +96,7 @@ def slice(self: Self, offset: int, length: int | None) -> DaskExpr: def to_datetime(self: Self, format: str | None) -> DaskExpr: # noqa: A002 return self._compliant_expr._from_call( - lambda _input, format: dd.to_datetime(_input, format=format), + lambda _input, format: dd.to_datetime(_input, format=format), # noqa: A006 "to_datetime", format=format, returns_scalar=self._compliant_expr._returns_scalar, diff --git a/narwhals/_dask/selectors.py b/narwhals/_dask/selectors.py index 8c11bd890..f411d0a98 100644 --- a/narwhals/_dask/selectors.py +++ b/narwhals/_dask/selectors.py @@ -121,11 +121,7 @@ def func(df: DaskLazyFrame) -> list[dx.Series]: class DaskSelector(DaskExpr): def __repr__(self: Self) -> str: # pragma: no cover - return ( - f"DaskSelector(" - f"depth={self._depth}, " - f"function_name={self._function_name})" - ) + return f"DaskSelector(depth={self._depth}, function_name={self._function_name})" def _to_expr(self: Self) -> DaskExpr: return DaskExpr( diff --git a/narwhals/_duckdb/selectors.py b/narwhals/_duckdb/selectors.py index 4f1bcab13..28802c031 100644 --- a/narwhals/_duckdb/selectors.py +++ b/narwhals/_duckdb/selectors.py @@ -114,7 +114,7 @@ def func(df: DuckDBLazyFrame) -> list[duckdb.Expression]: class DuckDBSelector(DuckDBExpr): def __repr__(self: Self) -> str: # pragma: no cover - return f"DuckDBSelector(" f"function_name={self._function_name})" + return f"DuckDBSelector(function_name={self._function_name})" def _to_expr(self: Self) -> DuckDBExpr: return DuckDBExpr( diff --git a/narwhals/_pandas_like/expr.py b/narwhals/_pandas_like/expr.py index 2fb135dab..18398ce06 100644 --- a/narwhals/_pandas_like/expr.py +++ b/narwhals/_pandas_like/expr.py @@ -75,10 +75,7 @@ def __call__(self: Self, df: PandasLikeDataFrame) -> Sequence[PandasLikeSeries]: def __repr__(self) -> str: # pragma: no cover return ( - f"PandasLikeExpr(" - f"depth={self._depth}, " - f"function_name={self._function_name}, " - ")" + f"PandasLikeExpr(depth={self._depth}, function_name={self._function_name}, )" ) def __narwhals_namespace__(self: Self) -> PandasLikeNamespace: diff --git a/narwhals/_pandas_like/selectors.py b/narwhals/_pandas_like/selectors.py index bb9b9aee0..5074425ef 100644 --- a/narwhals/_pandas_like/selectors.py +++ b/narwhals/_pandas_like/selectors.py @@ -120,9 +120,7 @@ def func(df: PandasLikeDataFrame) -> list[PandasLikeSeries]: class PandasSelector(PandasLikeExpr): def __repr__(self) -> str: # pragma: no cover return ( - f"PandasSelector(" - f"depth={self._depth}, " - f"function_name={self._function_name}, " + f"PandasSelector(depth={self._depth}, function_name={self._function_name}, " ) def _to_expr(self: Self) -> PandasLikeExpr: diff --git a/narwhals/_spark_like/selectors.py b/narwhals/_spark_like/selectors.py index 244138516..f2425de5c 100644 --- a/narwhals/_spark_like/selectors.py +++ b/narwhals/_spark_like/selectors.py @@ -117,7 +117,7 @@ def func(df: SparkLikeLazyFrame) -> list[Column]: class SparkLikeSelector(SparkLikeExpr): def __repr__(self: Self) -> str: # pragma: no cover - return f"SparkLikeSelector(" f"function_name={self._function_name})" + return f"SparkLikeSelector(function_name={self._function_name})" def _to_expr(self: Self) -> SparkLikeExpr: return SparkLikeExpr( diff --git a/narwhals/dataframe.py b/narwhals/dataframe.py index c608ff32d..d55d53624 100644 --- a/narwhals/dataframe.py +++ b/narwhals/dataframe.py @@ -1699,7 +1699,9 @@ def iter_rows( We define a library agnostic function: >>> def agnostic_iter_rows(df_native: IntoDataFrame, *, named: bool): - ... return nw.from_native(df_native, eager_only=True).iter_rows(named=named) + ... return nw.from_native(df_native, eager_only=True).iter_rows( + ... named=named + ... ) We can then pass any supported library such as Pandas, Polars, or PyArrow to `agnostic_iter_rows`: @@ -2376,7 +2378,9 @@ def filter( >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... return df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).to_native() + ... return df.filter( + ... (nw.col("foo") < 3) & (nw.col("ham") == "a") + ... ).to_native() >>> agnostic_filter(df_pd) foo bar ham 0 1 6 a @@ -2555,7 +2559,12 @@ def group_by( >>> def agnostic_group_by_agg(df_native: IntoDataFrameT) -> IntoDataFrameT: ... df = nw.from_native(df_native, eager_only=True) - ... return df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b").to_native() + ... return ( + ... df.group_by(["a", "b"]) + ... .agg(nw.max("c")) + ... .sort("a", "b") + ... .to_native() + ... ) >>> agnostic_group_by_agg(df_pd) a b c @@ -2850,7 +2859,9 @@ def join_asof( ... ) -> IntoFrameT: ... df = nw.from_native(df_native) ... other = nw.from_native(other_native) - ... return df.join_asof(other, on="datetime", strategy=strategy).to_native() + ... return df.join_asof( + ... other, on="datetime", strategy=strategy + ... ).to_native() We can then pass any supported library such as Pandas or Polars to `agnostic_join_asof_datetime`: @@ -3055,14 +3066,22 @@ def is_empty(self: Self) -> bool: >>> df_pd = pd.DataFrame(data) >>> df_pl = pl.DataFrame(data) >>> df_pa = pa.table(data) - >>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa) + >>> ( + ... agnostic_is_empty(df_pd), + ... agnostic_is_empty(df_pl), + ... agnostic_is_empty(df_pa), + ... ) (True, True, True) >>> data = {"foo": [100, 2, 3], "bar": [4, 5, 6]} >>> df_pd = pd.DataFrame(data) >>> df_pl = pl.DataFrame(data) >>> df_pa = pa.table(data) - >>> agnostic_is_empty(df_pd), agnostic_is_empty(df_pl), agnostic_is_empty(df_pa) + >>> ( + ... agnostic_is_empty(df_pd), + ... agnostic_is_empty(df_pl), + ... agnostic_is_empty(df_pa), + ... ) (False, False, False) """ return self._compliant_frame.is_empty() # type: ignore[no-any-return] @@ -3395,7 +3414,9 @@ def pivot( >>> def agnostic_pivot(df_native: IntoDataFrameT) -> IntoDataFrameT: ... df = nw.from_native(df_native, eager_only=True) - ... return df.pivot("col", index="ix", aggregate_function="sum").to_native() + ... return df.pivot( + ... "col", index="ix", aggregate_function="sum" + ... ).to_native() We can then pass any supported library such as Pandas or Polars to `agnostic_pivot`: @@ -4306,7 +4327,9 @@ def with_columns( >>> def agnostic_with_columns(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) ... return ( - ... df.with_columns((nw.col("a") * 2).alias("2a")).collect().to_native() + ... df.with_columns((nw.col("a") * 2).alias("2a")) + ... .collect() + ... .to_native() ... ) We can then pass any supported library such as Polars or Dask to `agnostic_with_columns`: @@ -4422,7 +4445,9 @@ def select( >>> def agnostic_select(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo"), nw.col("bar") + 1).collect().to_native() + ... return ( + ... df.select(nw.col("foo"), nw.col("bar") + 1).collect().to_native() + ... ) >>> agnostic_select(lf_pl) shape: (3, 2) @@ -5047,7 +5072,9 @@ def sort( >>> def agnostic_sort(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... return df.sort("c", "a", descending=[False, True]).collect().to_native() + ... return ( + ... df.sort("c", "a", descending=[False, True]).collect().to_native() + ... ) We can then pass any supported library such as Polars or Dask to `agnostic_sort`: @@ -5262,7 +5289,9 @@ def join_asof( │ 2018-08-01 00:00:00 ┆ 82.66 ┆ 4566 │ │ 2019-01-01 00:00:00 ┆ 83.12 ┆ 4696 │ └─────────────────────┴────────────┴──────┘ - >>> agnostic_join_asof_datetime(population_dask, gdp_dask, strategy="backward") + >>> agnostic_join_asof_datetime( + ... population_dask, gdp_dask, strategy="backward" + ... ) datetime population gdp 0 2016-03-01 82.19 4164 1 2018-08-01 82.66 4566 diff --git a/narwhals/dtypes.py b/narwhals/dtypes.py index 8c75bc8ac..00e81ac9f 100644 --- a/narwhals/dtypes.py +++ b/narwhals/dtypes.py @@ -472,7 +472,9 @@ class Datetime(TemporalType): ... .astype("datetime64[ms, Africa/Accra]") ... ) >>> ser_pl = ( - ... pl.Series(data).cast(pl.Datetime("ms")).dt.replace_time_zone("Africa/Accra") + ... pl.Series(data) + ... .cast(pl.Datetime("ms")) + ... .dt.replace_time_zone("Africa/Accra") ... ) >>> ser_pa = pc.assume_timezone( ... pa.chunked_array([data], type=pa.timestamp("ms")), "Africa/Accra" diff --git a/narwhals/expr.py b/narwhals/expr.py index 1dd5654a2..71e921e1a 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -1437,7 +1437,9 @@ def is_nan(self: Self) -> Self: ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)" ... ) >>> df = nw.from_native(df_native) - >>> df.with_columns(a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()) + >>> df.with_columns( + ... a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan() + ... ) ┌────────────────────────────────────────┐ | Narwhals LazyFrame | |----------------------------------------| @@ -1507,7 +1509,9 @@ def fill_null( ... } ... ) >>> df = nw.from_native(df_native) - >>> df.with_columns(nw.col("a", "b").fill_null(0).name.suffix("_nulls_filled")) + >>> df.with_columns( + ... nw.col("a", "b").fill_null(0).name.suffix("_nulls_filled") + ... ) ┌────────────────────────────────────────────────┐ | Narwhals DataFrame | |------------------------------------------------| @@ -1771,7 +1775,9 @@ def null_count(self: Self) -> Self: Examples: >>> import pandas as pd >>> import narwhals as nw - >>> df_native = pd.DataFrame({"a": [1, 2, None, 1], "b": ["a", None, "b", None]}) + >>> df_native = pd.DataFrame( + ... {"a": [1, 2, None, 1], "b": ["a", None, "b", None]} + ... ) >>> df = nw.from_native(df_native) >>> df.select(nw.all().null_count()) ┌──────────────────┐ @@ -1830,7 +1836,9 @@ def is_last_distinct(self: Self) -> Self: >>> import narwhals as nw >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}) >>> df = nw.from_native(df_native) - >>> df.with_columns(nw.all().is_last_distinct().name.suffix("_is_last_distinct")) + >>> df.with_columns( + ... nw.all().is_last_distinct().name.suffix("_is_last_distinct") + ... ) ┌───────────────────────────────────────────────┐ | Narwhals DataFrame | |-----------------------------------------------| @@ -1872,7 +1880,9 @@ def quantile( Examples: >>> import pandas as pd >>> import narwhals as nw - >>> df_native = pd.DataFrame({"a": list(range(50)), "b": list(range(50, 100))}) + >>> df_native = pd.DataFrame( + ... {"a": list(range(50)), "b": list(range(50, 100))} + ... ) >>> df = nw.from_native(df_native) >>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear")) ┌──────────────────┐ diff --git a/narwhals/expr_dt.py b/narwhals/expr_dt.py index 6ea1fbbdd..9495bc4d4 100644 --- a/narwhals/expr_dt.py +++ b/narwhals/expr_dt.py @@ -34,7 +34,9 @@ def date(self: Self) -> ExprT: >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)]} + >>> data = { + ... "a": [datetime(2012, 1, 7, 10, 20), datetime(2023, 3, 10, 11, 32)] + ... } >>> df_pd = pd.DataFrame(data).convert_dtypes(dtype_backend="pyarrow") >>> df_pl = pl.DataFrame(data) >>> df_pa = pa.table(data) @@ -913,7 +915,9 @@ def total_seconds(self: Self) -> ExprT: >>> import narwhals as nw >>> from narwhals.typing import IntoFrameT >>> - >>> data = {"a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)]} + >>> data = { + ... "a": [timedelta(seconds=10), timedelta(seconds=20, milliseconds=40)] + ... } >>> df_pd = pd.DataFrame(data) >>> df_pl = pl.DataFrame(data) >>> df_pa = pa.table(data) diff --git a/narwhals/expr_name.py b/narwhals/expr_name.py index 706f9427d..60a1dd4be 100644 --- a/narwhals/expr_name.py +++ b/narwhals/expr_name.py @@ -44,7 +44,9 @@ def keep(self: Self) -> ExprT: >>> def agnostic_name_keep(df_native: IntoFrame) -> list[str]: ... df = nw.from_native(df_native) - ... return df.select(nw.col("foo").alias("alias_for_foo").name.keep()).columns + ... return df.select( + ... nw.col("foo").alias("alias_for_foo").name.keep() + ... ).columns We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_name_keep`: diff --git a/narwhals/expr_str.py b/narwhals/expr_str.py index 67de3b131..d82b2ec73 100644 --- a/narwhals/expr_str.py +++ b/narwhals/expr_str.py @@ -180,7 +180,9 @@ def replace_all( >>> def agnostic_str_replace_all(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... df = df.with_columns(replaced=nw.col("foo").str.replace_all("abc", "")) + ... df = df.with_columns( + ... replaced=nw.col("foo").str.replace_all("abc", "") + ... ) ... return df.to_native() We can then pass any supported library such as pandas, Polars, or @@ -429,7 +431,9 @@ def contains(self: Self, pattern: str, *, literal: bool = False) -> ExprT: ... df = nw.from_native(df_native) ... return df.with_columns( ... default_match=nw.col("pets").str.contains("parrot|Dove"), - ... case_insensitive_match=nw.col("pets").str.contains("(?i)parrot|Dove"), + ... case_insensitive_match=nw.col("pets").str.contains( + ... "(?i)parrot|Dove" + ... ), ... literal_match=nw.col("pets").str.contains( ... "parrot|Dove", literal=True ... ), diff --git a/narwhals/functions.py b/narwhals/functions.py index 7fd648dc1..b13bf51f0 100644 --- a/narwhals/functions.py +++ b/narwhals/functions.py @@ -357,7 +357,7 @@ def _new_series_impl( native_series = native_namespace.chunked_array([values], type=dtype) elif implementation is Implementation.DASK: # pragma: no cover - msg = "Dask support in Narwhals is lazy-only, so `new_series` is " "not supported" + msg = "Dask support in Narwhals is lazy-only, so `new_series` is not supported" raise NotImplementedError(msg) else: # pragma: no cover try: @@ -407,7 +407,9 @@ def from_dict( >>> def agnostic_from_dict(df_native: IntoFrameT) -> IntoFrameT: ... new_data = {"c": [5, 2], "d": [1, 4]} ... native_namespace = nw.get_native_namespace(df_native) - ... return nw.from_dict(new_data, native_namespace=native_namespace).to_native() + ... return nw.from_dict( + ... new_data, native_namespace=native_namespace + ... ).to_native() Let's see what happens when passing pandas, Polars or PyArrow input: @@ -583,7 +585,9 @@ def from_numpy( ... new_data = np.array([[5, 2, 1], [1, 4, 3]]) ... df = nw.from_native(df_native) ... native_namespace = nw.get_native_namespace(df) - ... return nw.from_numpy(new_data, native_namespace=native_namespace).to_native() + ... return nw.from_numpy( + ... new_data, native_namespace=native_namespace + ... ).to_native() Let's see what happens when passing pandas, Polars or PyArrow input: @@ -1029,7 +1033,9 @@ def read_csv( Let's create an agnostic function that reads a csv file with a specified native namespace: >>> def agnostic_read_csv(native_namespace: ModuleType) -> IntoDataFrame: - ... return nw.read_csv("file.csv", native_namespace=native_namespace).to_native() + ... return nw.read_csv( + ... "file.csv", native_namespace=native_namespace + ... ).to_native() Then we can read the file by passing pandas, Polars or PyArrow namespaces: @@ -1115,7 +1121,9 @@ def scan_csv( Let's create an agnostic function that lazily reads a csv file with a specified native namespace: >>> def agnostic_scan_csv(native_namespace: ModuleType) -> IntoFrame: - ... return nw.scan_csv("file.csv", native_namespace=native_namespace).to_native() + ... return nw.scan_csv( + ... "file.csv", native_namespace=native_namespace + ... ).to_native() Then we can read the file by passing, for example, Polars or Dask namespaces: diff --git a/narwhals/group_by.py b/narwhals/group_by.py index ae8185631..359eeb4c4 100644 --- a/narwhals/group_by.py +++ b/narwhals/group_by.py @@ -185,7 +185,9 @@ def agg(self: Self, *aggs: Expr | Iterable[Expr], **named_aggs: Expr) -> LazyFra >>> def agnostic_func_mult_col(lf_native: IntoFrameT) -> IntoFrameT: ... lf = nw.from_native(lf_native) - ... return nw.to_native(lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b")) + ... return nw.to_native( + ... lf.group_by("a", "b").agg(nw.sum("c")).sort("a", "b") + ... ) We can then pass a lazy frame and materialise it with `collect`: diff --git a/narwhals/series.py b/narwhals/series.py index f76944ed6..12ec95f12 100644 --- a/narwhals/series.py +++ b/narwhals/series.py @@ -326,7 +326,9 @@ def scatter(self: Self, indices: int | Sequence[int], values: Any) -> Self: >>> def agnostic_scatter(df_native: IntoFrameT) -> IntoFrameT: ... df = nw.from_native(df_native) - ... return df.with_columns(df["a"].scatter([0, 1], [999, 888])).to_native() + ... return df.with_columns( + ... df["a"].scatter([0, 1], [999, 888]) + ... ).to_native() We can then pass any supported library such as pandas, Polars, or PyArrow to `agnostic_scatter`: @@ -2248,7 +2250,9 @@ def replace_strict( >>> def agnostic_replace_strict(s_native: IntoSeriesT) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.replace_strict( - ... [0, 1, 2, 3], ["zero", "one", "two", "three"], return_dtype=nw.String + ... [0, 1, 2, 3], + ... ["zero", "one", "two", "three"], + ... return_dtype=nw.String, ... ).to_native() We can then pass any supported library such as pandas, Polars, or @@ -2570,7 +2574,9 @@ def fill_null( Using a strategy: - >>> def agnostic_fill_null_with_strategy(s_native: IntoSeriesT) -> IntoSeriesT: + >>> def agnostic_fill_null_with_strategy( + ... s_native: IntoSeriesT, + ... ) -> IntoSeriesT: ... s = nw.from_native(s_native, series_only=True) ... return s.fill_null(strategy="forward", limit=1).to_native() @@ -3576,7 +3582,9 @@ def zip_with(self: Self, mask: Self, other: Self) -> Self: Let's define a dataframe-agnostic function: >>> def agnostic_zip_with( - ... s1_native: IntoSeriesT, mask_native: IntoSeriesT, s2_native: IntoSeriesT + ... s1_native: IntoSeriesT, + ... mask_native: IntoSeriesT, + ... s2_native: IntoSeriesT, ... ) -> IntoSeriesT: ... s1 = nw.from_native(s1_native, series_only=True) ... mask = nw.from_native(mask_native, series_only=True) diff --git a/narwhals/utils.py b/narwhals/utils.py index ebdd66389..7f1229793 100644 --- a/narwhals/utils.py +++ b/narwhals/utils.py @@ -777,7 +777,9 @@ def maybe_convert_dtypes( ... } ... ) >>> df = nw.from_native(df_pd) - >>> nw.to_native(nw.maybe_convert_dtypes(df)).dtypes # doctest: +NORMALIZE_WHITESPACE + >>> nw.to_native( + ... nw.maybe_convert_dtypes(df) + ... ).dtypes # doctest: +NORMALIZE_WHITESPACE a Int32 b boolean dtype: object @@ -1089,23 +1091,21 @@ def generate_repr(header: str, native_repr: str) -> str: if max_native_width + 2 <= terminal_width: length = max(max_native_width, len(header)) - output = f"┌{'─'*length}┐\n" + output = f"┌{'─' * length}┐\n" header_extra = length - len(header) - output += ( - f"|{' '*(header_extra//2)}{header}{' '*(header_extra//2 + header_extra%2)}|\n" - ) - output += f"|{'-'*(length)}|\n" + output += f"|{' ' * (header_extra // 2)}{header}{' ' * (header_extra // 2 + header_extra % 2)}|\n" + output += f"|{'-' * (length)}|\n" start_extra = (length - max_native_width) // 2 end_extra = (length - max_native_width) // 2 + (length - max_native_width) % 2 for line in native_lines: - output += f"|{' '*(start_extra)}{line}{' '*(end_extra + max_native_width - len(line))}|\n" + output += f"|{' ' * (start_extra)}{line}{' ' * (end_extra + max_native_width - len(line))}|\n" output += f"└{'─' * length}┘" return output diff = 39 - len(header) return ( f"┌{'─' * (39)}┐\n" - f"|{' '*(diff//2)}{header}{' '*(diff//2+diff%2)}|\n" + f"|{' ' * (diff // 2)}{header}{' ' * (diff // 2 + diff % 2)}|\n" "| Use `.to_native` to see native output |\n└" f"{'─' * 39}┘" ) diff --git a/pyproject.toml b/pyproject.toml index c2a88bbe8..d5dcd6870 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -102,6 +102,7 @@ lint.select = [ lint.ignore = [ "A001", "A004", + "A005", "ARG002", "ANN401", "C901", diff --git a/tests/utils.py b/tests/utils.py index 31cd386d5..6aac5e1b8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -103,9 +103,9 @@ def assert_equal_data(result: Any, expected: dict[str, Any]) -> None: sort_key = next(iter(expected.keys())) expected = _sort_dict_by_key(expected, sort_key) result = _sort_dict_by_key(result, sort_key) - assert list(result.keys()) == list( - expected.keys() - ), f"Result keys {result.keys()}, expected keys: {expected.keys()}" + assert list(result.keys()) == list(expected.keys()), ( + f"Result keys {result.keys()}, expected keys: {expected.keys()}" + ) for key, expected_value in expected.items(): result_value = result[key] @@ -124,7 +124,9 @@ def assert_equal_data(result: Any, expected: dict[str, Any]) -> None: are_equivalent_values = pd.isna(rhs) else: are_equivalent_values = lhs == rhs - assert are_equivalent_values, f"Mismatch at index {i}: {lhs} != {rhs}\nExpected: {expected}\nGot: {result}" + assert are_equivalent_values, ( + f"Mismatch at index {i}: {lhs} != {rhs}\nExpected: {expected}\nGot: {result}" + ) def maybe_get_modin_df(df_pandas: pd.DataFrame) -> Any: diff --git a/tpch/tests/queries_test.py b/tpch/tests/queries_test.py index 2dfd7cc1c..2fb90f648 100644 --- a/tpch/tests/queries_test.py +++ b/tpch/tests/queries_test.py @@ -20,6 +20,6 @@ def test_execute_scripts(query_path: Path) -> None: text=True, check=False, ) - assert ( - result.returncode == 0 - ), f"Script {query_path} failed with error: {result.stderr}" + assert result.returncode == 0, ( + f"Script {query_path} failed with error: {result.stderr}" + )