diff --git a/narwhals/expr.py b/narwhals/expr.py index ea912807a..1dd5654a2 100644 --- a/narwhals/expr.py +++ b/narwhals/expr.py @@ -83,47 +83,17 @@ def alias(self: Self, name: str) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_alias(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select((nw.col("b") + 10).alias("c")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_alias`: - - >>> agnostic_alias(df_pd) - c - 0 14 - 1 15 - - >>> agnostic_alias(df_pl) - shape: (2, 1) - ┌─────┐ - │ c │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 14 │ - │ 15 │ - └─────┘ - - >>> agnostic_alias(df_pa) - pyarrow.Table - c: int64 - ---- - c: [[14,15]] - + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 5]}) + >>> df = nw.from_native(df_native) + >>> df.select((nw.col("b") + 10).alias("c")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | c | + | 0 14 | + | 1 15 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).alias(name), @@ -150,50 +120,19 @@ def pipe( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Lets define a library-agnostic function: - - >>> def agnostic_pipe(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").pipe(lambda x: x + 1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_pipe`: - - >>> agnostic_pipe(df_pd) - a - 0 2 - 1 3 - 2 4 - 3 5 - - >>> agnostic_pipe(df_pl) - shape: (4, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 3 │ - │ 4 │ - │ 5 │ - └─────┘ - - >>> agnostic_pipe(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,3,4,5]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_piped=nw.col("a").pipe(lambda x: x + 1)) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a a_piped | + | 0 1 2 | + | 1 2 3 | + | 2 3 4 | + | 3 4 5 | + └──────────────────┘ """ return function(self, *args, **kwargs) @@ -208,50 +147,18 @@ def cast(self: Self, dtype: DType | type[DType]) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_cast(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cast`: - - >>> agnostic_cast(df_pd) - foo bar - 0 1.0 6 - 1 2.0 7 - 2 3.0 8 - >>> agnostic_cast(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ foo ┆ bar │ - │ --- ┆ --- │ - │ f32 ┆ u8 │ - ╞═════╪═════╡ - │ 1.0 ┆ 6 │ - │ 2.0 ┆ 7 │ - │ 3.0 ┆ 8 │ - └─────┴─────┘ - >>> agnostic_cast(df_pa) - pyarrow.Table - foo: float - bar: uint8 - ---- - foo: [[1,2,3]] - bar: [[6,7,8]] + >>> df_native = pd.DataFrame({"foo": [1, 2, 3], "bar": [6.0, 7.0, 8.0]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("foo").cast(nw.Float32), nw.col("bar").cast(nw.UInt8)) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | foo bar | + | 0 1.0 6 | + | 1 2.0 7 | + | 2 3.0 8 | + └──────────────────┘ """ _validate_dtype(dtype) return self.__class__( @@ -546,46 +453,16 @@ def any(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [True, False], "b": [True, True]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a dataframe-agnostic function: - - >>> def agnostic_any(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").any()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_any`: - - >>> agnostic_any(df_pd) - a b - 0 True True - - >>> agnostic_any(df_pl) - shape: (1, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞══════╪══════╡ - │ true ┆ true │ - └──────┴──────┘ - - >>> agnostic_any(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[true]] - b: [[true]] + >>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").any()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 True True | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).any(), @@ -602,46 +479,16 @@ def all(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [True, False], "b": [True, True]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_all(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").all()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_all`: - - >>> agnostic_all(df_pd) - a b - 0 False True - - >>> agnostic_all(df_pl) - shape: (1, 2) - ┌───────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪══════╡ - │ false ┆ true │ - └───────┴──────┘ - - >>> agnostic_all(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[false]] - b: [[true]] + >>> df_native = pd.DataFrame({"a": [True, False], "b": [True, True]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").all()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 False True | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).all(), @@ -762,47 +609,17 @@ def mean(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [-1, 0, 1], "b": [2, 4, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").mean()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_mean`: - - >>> agnostic_mean(df_pd) - a b - 0 0.0 4.0 - - >>> agnostic_mean(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 0.0 ┆ 4.0 │ - └─────┴─────┘ - - >>> agnostic_mean(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[0]] - b: [[4]] + >>> df_native = pd.DataFrame({"a": [-1, 0, 1], "b": [2, 4, 6]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").mean()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 0.0 4.0 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).mean(), @@ -822,46 +639,16 @@ def median(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 8, 3], "b": [4, 5, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_median(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").median()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_median`: - - >>> agnostic_median(df_pd) - a b - 0 3.0 4.0 - - >>> agnostic_median(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 3.0 ┆ 4.0 │ - └─────┴─────┘ - - >>> agnostic_median(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[3]] - b: [[4]] + >>> df_native = pd.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").median()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 3.0 4.0 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).median(), @@ -881,46 +668,17 @@ def std(self: Self, *, ddof: int = 1) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_std(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").std(ddof=0)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_std`: - - >>> agnostic_std(df_pd) - a b - 0 17.79513 1.265789 - >>> agnostic_std(df_pl) - shape: (1, 2) - ┌──────────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════════╪══════════╡ - │ 17.79513 ┆ 1.265789 │ - └──────────┴──────────┘ - >>> agnostic_std(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[17.795130420052185]] - b: [[1.2657891697365016]] - + >>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").std(ddof=0)) + ┌─────────────────────┐ + | Narwhals DataFrame | + |---------------------| + | a b| + |0 17.79513 1.265789| + └─────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).std(ddof=ddof), @@ -940,47 +698,17 @@ def var(self: Self, *, ddof: int = 1) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [20, 25, 60], "b": [1.5, 1, -1.4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_var(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").var(ddof=0)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_var`: - - >>> agnostic_var(df_pd) - a b - 0 316.666667 1.602222 - - >>> agnostic_var(df_pl) - shape: (1, 2) - ┌────────────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞════════════╪══════════╡ - │ 316.666667 ┆ 1.602222 │ - └────────────┴──────────┘ - - >>> agnostic_var(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[316.6666666666667]] - b: [[1.6022222222222222]] + >>> df_native = pd.DataFrame({"a": [20, 25, 60], "b": [1.5, 1, -1.4]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").var(ddof=0)) + ┌───────────────────────┐ + | Narwhals DataFrame | + |-----------------------| + | a b| + |0 316.666667 1.602222| + └───────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).var(ddof=ddof), @@ -1010,53 +738,23 @@ def map_batches( A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [4, 5, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_map_batches(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").map_batches( - ... lambda s: s.to_numpy() + 1, return_dtype=nw.Float64 - ... ) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_map_batches`: - - >>> agnostic_map_batches(df_pd) - a b - 0 2.0 5.0 - 1 3.0 6.0 - 2 4.0 7.0 - >>> agnostic_map_batches(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪═════╡ - │ 2.0 ┆ 5.0 │ - │ 3.0 ┆ 6.0 │ - │ 4.0 ┆ 7.0 │ - └─────┴─────┘ - >>> agnostic_map_batches(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[2,3,4]] - b: [[5,6,7]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... nw.col("a", "b") + ... .map_batches(lambda s: s.to_numpy() + 1, return_dtype=nw.Float64) + ... .name.suffix("_mapped") + ... ) + ┌───────────────────────────┐ + | Narwhals DataFrame | + |---------------------------| + | a b a_mapped b_mapped| + |0 1 4 2.0 5.0| + |1 2 5 3.0 6.0| + |2 3 6 4.0 7.0| + └───────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).map_batches( @@ -1076,46 +774,16 @@ def skew(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_skew(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").skew()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_skew`: - - >>> agnostic_skew(df_pd) - a b - 0 0.0 1.472427 - - >>> agnostic_skew(df_pl) - shape: (1, 2) - ┌─────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞═════╪══════════╡ - │ 0.0 ┆ 1.472427 │ - └─────┴──────────┘ - - >>> agnostic_skew(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[0]] - b: [[1.4724267269058975]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 2, 10, 100]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").skew()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 0.0 1.472427 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).skew(), @@ -1131,45 +799,21 @@ def sum(self: Self) -> Expr: A new expression. Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa + >>> import duckdb >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [5, 10], "b": [50, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").sum()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_sum`: - - >>> agnostic_sum(df_pd) - a b - 0 15 150 - >>> agnostic_sum(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 15 ┆ 150 │ - └─────┴─────┘ - >>> agnostic_sum(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[15]] - b: [[150]] + >>> df_native = duckdb.sql("SELECT * FROM VALUES (5, 50), (10, 100) df(a, b)") + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").sum()) + ┌───────────────────┐ + |Narwhals LazyFrame | + |-------------------| + |┌────────┬────────┐| + |│ a │ b │| + |│ int128 │ int128 │| + |├────────┼────────┤| + |│ 15 │ 150 │| + |└────────┴────────┘| + └───────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).sum(), @@ -1186,46 +830,16 @@ def min(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2], "b": [4, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.min("a", "b")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_min`: - - >>> agnostic_min(df_pd) - a b - 0 1 3 - - >>> agnostic_min(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - └─────┴─────┘ - - >>> agnostic_min(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1]] - b: [[3]] + >>> df_native = pd.DataFrame({"a": [1, 2], "b": [4, 3]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.min("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 3 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).min(), @@ -1241,47 +855,17 @@ def max(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [10, 20], "b": [50, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.max("a", "b")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_max`: - - >>> agnostic_max(df_pd) - a b - 0 20 100 - - >>> agnostic_max(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 20 ┆ 100 │ - └─────┴─────┘ - - >>> agnostic_max(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[20]] - b: [[100]] + >>> df_native = pd.DataFrame({"a": [10, 20], "b": [50, 100]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.max("a", "b")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 20 100 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).max(), @@ -1297,49 +881,17 @@ def arg_min(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [10, 20], "b": [150, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_arg_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").arg_min().name.suffix("_arg_min") - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_arg_min`: - - >>> agnostic_arg_min(df_pd) - a_arg_min b_arg_min - 0 0 1 - - >>> agnostic_arg_min(df_pl) - shape: (1, 2) - ┌───────────┬───────────┐ - │ a_arg_min ┆ b_arg_min │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═══════════╪═══════════╡ - │ 0 ┆ 1 │ - └───────────┴───────────┘ - - >>> agnostic_arg_min(df_pa) - pyarrow.Table - a_arg_min: int64 - b_arg_min: int64 - ---- - a_arg_min: [[0]] - b_arg_min: [[1]] + >>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").arg_min().name.suffix("_arg_min")) + ┌───────────────────────┐ + | Narwhals DataFrame | + |-----------------------| + | a_arg_min b_arg_min| + |0 0 1| + └───────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).arg_min(), @@ -1355,49 +907,17 @@ def arg_max(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [10, 20], "b": [150, 100]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_arg_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").arg_max().name.suffix("_arg_max") - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_arg_max`: - - >>> agnostic_arg_max(df_pd) - a_arg_max b_arg_max - 0 1 0 - - >>> agnostic_arg_max(df_pl) - shape: (1, 2) - ┌───────────┬───────────┐ - │ a_arg_max ┆ b_arg_max │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═══════════╪═══════════╡ - │ 1 ┆ 0 │ - └───────────┴───────────┘ - - >>> agnostic_arg_max(df_pa) - pyarrow.Table - a_arg_max: int64 - b_arg_max: int64 - ---- - a_arg_max: [[1]] - b_arg_max: [[0]] + >>> df_native = pd.DataFrame({"a": [10, 20], "b": [150, 100]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").arg_max().name.suffix("_arg_max")) + ┌───────────────────────┐ + | Narwhals DataFrame | + |-----------------------| + | a_arg_max b_arg_max| + |0 1 0| + └───────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).arg_max(), @@ -1413,47 +933,17 @@ def count(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [None, 4, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().count()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_count`: - - >>> agnostic_count(df_pd) - a b - 0 3 2 - - >>> agnostic_count(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 3 ┆ 2 │ - └─────┴─────┘ - - >>> agnostic_count(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[3]] - b: [[2]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3], "b": [None, 4, 4]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.all().count()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 3 2 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).count(), @@ -1469,45 +959,17 @@ def n_unique(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_n_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").n_unique()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_n_unique`: - - >>> agnostic_n_unique(df_pd) - a b - 0 5 3 - >>> agnostic_n_unique(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 5 ┆ 3 │ - └─────┴─────┘ - >>> agnostic_n_unique(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[5]] - b: [[3]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5], "b": [1, 1, 3, 3, 5]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").n_unique()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 5 3 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).n_unique(), @@ -1523,47 +985,17 @@ def unique(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").unique().sum()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_unique`: - - >>> agnostic_unique(df_pd) - a b - 0 9 12 - - >>> agnostic_unique(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 9 ┆ 12 │ - └─────┴─────┘ - - >>> agnostic_unique(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[9]] - b: [[12]] + >>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").unique().sum()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 9 12 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).unique(), @@ -1579,49 +1011,18 @@ def abs(self: Self) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, -2], "b": [-3, 4]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_abs(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").abs()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_abs`: - - >>> agnostic_abs(df_pd) - a b - 0 1 3 - 1 2 4 - - >>> agnostic_abs(df_pl) - shape: (2, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 3 │ - │ 2 ┆ 4 │ - └─────┴─────┘ - - >>> agnostic_abs(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2]] - b: [[3,4]] + >>> df_native = pd.DataFrame({"a": [1, -2], "b": [-3, 4]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(nw.col("a", "b").abs().name.suffix("_abs")) + ┌─────────────────────┐ + | Narwhals DataFrame | + |---------------------| + | a b a_abs b_abs| + |0 1 -3 1 3| + |1 -2 4 2 4| + └─────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).abs(), @@ -1640,53 +1041,21 @@ def cum_sum(self: Self, *, reverse: bool = False) -> Self: A new expression. Examples: - >>> import polars as pl >>> import pandas as pd - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a", "b").cum_sum()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_sum`: - - >>> agnostic_cum_sum(df_pd) - a b - 0 1 2 - 1 2 6 - 2 5 10 - 3 10 16 - 4 15 22 - >>> agnostic_cum_sum(df_pl) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 1 ┆ 2 │ - │ 2 ┆ 6 │ - │ 5 ┆ 10 │ - │ 10 ┆ 16 │ - │ 15 ┆ 22 │ - └─────┴─────┘ - >>> agnostic_cum_sum(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1,2,5,10,15]] - b: [[2,6,10,16,22]] + >>> df_native = pd.DataFrame({"a": [1, 1, 3, 5, 5], "b": [2, 4, 4, 6, 6]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_cum_sum=nw.col("a").cum_sum()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b a_cum_sum| + |0 1 2 1| + |1 1 4 2| + |2 3 4 5| + |3 5 6 10| + |4 5 6 15| + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_sum(reverse=reverse), @@ -1711,53 +1080,27 @@ def diff(self: Self) -> Self: nw.col("a").diff().fill_null(0).cast(nw.Int64) Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_diff(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(a_diff=nw.col("a").diff()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_diff`: - - >>> agnostic_diff(df_pd) - a_diff - 0 NaN - 1 0.0 - 2 2.0 - 3 2.0 - 4 0.0 - - >>> agnostic_diff(df_pl) - shape: (5, 1) - ┌────────┐ - │ a_diff │ - │ --- │ - │ i64 │ - ╞════════╡ - │ null │ - │ 0 │ - │ 2 │ - │ 2 │ - │ 0 │ - └────────┘ - - >>> agnostic_diff(df_pa) - pyarrow.Table - a_diff: int64 - ---- - a_diff: [[null,0,2,2,0]] + >>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_diff=nw.col("a").diff()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (5, 2) | + | ┌─────┬────────┐ | + | │ a ┆ a_diff │ | + | │ --- ┆ --- │ | + | │ i64 ┆ i64 │ | + | ╞═════╪════════╡ | + | │ 1 ┆ null │ | + | │ 1 ┆ 0 │ | + | │ 3 ┆ 2 │ | + | │ 5 ┆ 2 │ | + | │ 5 ┆ 0 │ | + | └─────┴────────┘ | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).diff(), @@ -1785,53 +1128,27 @@ def shift(self: Self, n: int) -> Self: nw.col("a").shift(1).fill_null(0).cast(nw.Int64) Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 1, 3, 5, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_shift(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(a_shift=nw.col("a").shift(n=1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_shift`: - - >>> agnostic_shift(df_pd) - a_shift - 0 NaN - 1 1.0 - 2 1.0 - 3 3.0 - 4 5.0 - - >>> agnostic_shift(df_pl) - shape: (5, 1) - ┌─────────┐ - │ a_shift │ - │ --- │ - │ i64 │ - ╞═════════╡ - │ null │ - │ 1 │ - │ 1 │ - │ 3 │ - │ 5 │ - └─────────┘ - - >>> agnostic_shift(df_pa) - pyarrow.Table - a_shift: int64 - ---- - a_shift: [[null,1,1,3,5]] + >>> df_native = pl.DataFrame({"a": [1, 1, 3, 5, 5]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_shift=nw.col("a").shift(n=1)) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + |shape: (5, 2) | + |┌─────┬─────────┐ | + |│ a ┆ a_shift │ | + |│ --- ┆ --- │ | + |│ i64 ┆ i64 │ | + |╞═════╪═════════╡ | + |│ 1 ┆ null │ | + |│ 1 ┆ 1 │ | + |│ 3 ┆ 1 │ | + |│ 5 ┆ 3 │ | + |│ 5 ┆ 5 │ | + |└─────┴─────────┘ | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).shift(n), @@ -1865,58 +1182,25 @@ def replace_strict( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [3, 0, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define dataframe-agnostic functions: - - >>> def agnostic_replace_strict(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").replace_strict( - ... [0, 1, 2, 3], - ... ["zero", "one", "two", "three"], - ... return_dtype=nw.String, - ... ) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_replace_strict`: - - >>> agnostic_replace_strict(df_pd) - a b - 0 3 three - 1 0 zero - 2 1 one - 3 2 two - - >>> agnostic_replace_strict(df_pl) - shape: (4, 2) - ┌─────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ str │ - ╞═════╪═══════╡ - │ 3 ┆ three │ - │ 0 ┆ zero │ - │ 1 ┆ one │ - │ 2 ┆ two │ - └─────┴───────┘ - - >>> agnostic_replace_strict(df_pa) - pyarrow.Table - a: int64 - b: string - ---- - a: [[3,0,1,2]] - b: [["three","zero","one","two"]] + >>> df_native = pd.DataFrame({"a": [3, 0, 1, 2]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... b=nw.col("a").replace_strict( + ... [0, 1, 2, 3], + ... ["zero", "one", "two", "three"], + ... return_dtype=nw.String, + ... ) + ... ) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 3 three | + | 1 0 zero | + | 2 1 one | + | 3 2 two | + └──────────────────┘ """ if new is None: if not isinstance(old, Mapping): @@ -1987,52 +1271,20 @@ def is_between( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 4, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_between(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").is_between(2, 4, "right")).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_between`: - - >>> agnostic_is_between(df_pd) - a - 0 False - 1 False - 2 True - 3 True - 4 False - - >>> agnostic_is_between(df_pl) - shape: (5, 1) - ┌───────┐ - │ a │ - │ --- │ - │ bool │ - ╞═══════╡ - │ false │ - │ false │ - │ true │ - │ true │ - │ false │ - └───────┘ - - >>> agnostic_is_between(df_pa) - pyarrow.Table - a: bool - ---- - a: [[false,false,true,true,false]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 4, 5]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(b=nw.col("a").is_between(2, 4, "right")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 False | + | 1 2 False | + | 2 3 True | + | 3 4 True | + | 4 5 False | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_between( @@ -2058,52 +1310,19 @@ def is_in(self: Self, other: Any) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 9, 10]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_in(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(b=nw.col("a").is_in([1, 2])).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_in`: - - >>> agnostic_is_in(df_pd) - a b - 0 1 True - 1 2 True - 2 9 False - 3 10 False - - >>> agnostic_is_in(df_pl) - shape: (4, 2) - ┌─────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ bool │ - ╞═════╪═══════╡ - │ 1 ┆ true │ - │ 2 ┆ true │ - │ 9 ┆ false │ - │ 10 ┆ false │ - └─────┴───────┘ - - >>> agnostic_is_in(df_pa) - pyarrow.Table - a: int64 - b: bool - ---- - a: [[1,2,9,10]] - b: [[true,true,false,false]] + >>> df_native = pd.DataFrame({"a": [1, 2, 9, 10]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(b=nw.col("a").is_in([1, 2])) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 True | + | 1 2 True | + | 2 9 False | + | 3 10 False | + └──────────────────┘ """ if isinstance(other, Iterable) and not isinstance(other, (str, bytes)): return self.__class__( @@ -2129,53 +1348,23 @@ def filter(self: Self, *predicates: Any) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_filter(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").filter(nw.col("a") > 4), - ... nw.col("b").filter(nw.col("b") < 13), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_filter`: - - >>> agnostic_filter(df_pd) - a b - 3 5 10 - 4 6 11 - 5 7 12 - - >>> agnostic_filter(df_pl) - shape: (3, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ i64 │ - ╞═════╪═════╡ - │ 5 ┆ 10 │ - │ 6 ┆ 11 │ - │ 7 ┆ 12 │ - └─────┴─────┘ - - >>> agnostic_filter(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[5,6,7]] - b: [[10,11,12]] + >>> df_native = pd.DataFrame( + ... {"a": [2, 3, 4, 5, 6, 7], "b": [10, 11, 12, 13, 14, 15]} + ... ) + >>> df = nw.from_native(df_native) + >>> df.select( + ... nw.col("a").filter(nw.col("a") > 4), + ... nw.col("b").filter(nw.col("b") < 13), + ... ) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 3 5 10 | + | 4 6 11 | + | 5 7 12 | + └──────────────────┘ """ flat_predicates = flatten(predicates) return self.__class__( @@ -2202,69 +1391,26 @@ def is_null(self: Self) -> Self: for reference. Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa + >>> import duckdb >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame( - ... { - ... "a": [2, 4, None, 3, 5], - ... "b": [2.0, 4.0, float("nan"), 3.0, 5.0], - ... } + >>> df_native = duckdb.sql( + ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)" ... ) - >>> data = { - ... "a": [2, 4, None, 3, 5], - ... "b": [2.0, 4.0, None, 3.0, 5.0], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_null(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null() - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_is_null`: - - >>> agnostic_is_null(df_pd) - a b a_is_null b_is_null - 0 2.0 2.0 False False - 1 4.0 4.0 False False - 2 NaN NaN True True - 3 3.0 3.0 False False - 4 5.0 5.0 False False - - >>> agnostic_is_null(df_pl) - shape: (5, 4) - ┌──────┬──────┬───────────┬───────────┐ - │ a ┆ b ┆ a_is_null ┆ b_is_null │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ bool ┆ bool │ - ╞══════╪══════╪═══════════╪═══════════╡ - │ 2 ┆ 2.0 ┆ false ┆ false │ - │ 4 ┆ 4.0 ┆ false ┆ false │ - │ null ┆ null ┆ true ┆ true │ - │ 3 ┆ 3.0 ┆ false ┆ false │ - │ 5 ┆ 5.0 ┆ false ┆ false │ - └──────┴──────┴───────────┴───────────┘ - - >>> agnostic_is_null(df_pa) - pyarrow.Table - a: int64 - b: double - a_is_null: bool - b_is_null: bool - ---- - a: [[2,4,null,3,5]] - b: [[2,4,null,3,5]] - a_is_null: [[false,false,true,false,false]] - b_is_null: [[false,false,true,false,false]] + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... a_is_null=nw.col("a").is_null(), b_is_null=nw.col("b").is_null() + ... ) + ┌──────────────────────────────────────────┐ + | Narwhals LazyFrame | + |------------------------------------------| + |┌───────┬────────┬───────────┬───────────┐| + |│ a │ b │ a_is_null │ b_is_null │| + |│ int32 │ double │ boolean │ boolean │| + |├───────┼────────┼───────────┼───────────┤| + |│ NULL │ nan │ true │ false │| + |│ 2 │ 2.0 │ false │ false │| + |└───────┴────────┴───────────┴───────────┘| + └──────────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_null(), @@ -2285,56 +1431,24 @@ def is_nan(self: Self) -> Self: for reference. Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa + >>> import duckdb >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"orig": [0.0, None, 2.0]} - >>> df_pd = pd.DataFrame(data).astype({"orig": "Float64"}) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_self_div_is_nan(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... divided=nw.col("orig") / nw.col("orig"), - ... divided_is_nan=(nw.col("orig") / nw.col("orig")).is_nan(), - ... ).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_self_div_is_nan`: - - >>> print(agnostic_self_div_is_nan(df_pd)) - orig divided divided_is_nan - 0 0.0 NaN True - 1 - 2 2.0 1.0 False - - >>> print(agnostic_self_div_is_nan(df_pl)) - shape: (3, 3) - ┌──────┬─────────┬────────────────┐ - │ orig ┆ divided ┆ divided_is_nan │ - │ --- ┆ --- ┆ --- │ - │ f64 ┆ f64 ┆ bool │ - ╞══════╪═════════╪════════════════╡ - │ 0.0 ┆ NaN ┆ true │ - │ null ┆ null ┆ null │ - │ 2.0 ┆ 1.0 ┆ false │ - └──────┴─────────┴────────────────┘ - - >>> print(agnostic_self_div_is_nan(df_pa)) - pyarrow.Table - orig: double - divided: double - divided_is_nan: bool - ---- - orig: [[0,null,2]] - divided: [[nan,null,1]] - divided_is_nan: [[true,null,false]] + >>> df_native = duckdb.sql( + ... "SELECT * FROM VALUES (null, CAST('NaN' AS DOUBLE)), (2, 2.) df(a, b)" + ... ) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_is_nan=nw.col("a").is_nan(), b_is_nan=nw.col("b").is_nan()) + ┌────────────────────────────────────────┐ + | Narwhals LazyFrame | + |----------------------------------------| + |┌───────┬────────┬──────────┬──────────┐| + |│ a │ b │ a_is_nan │ b_is_nan │| + |│ int32 │ double │ boolean │ boolean │| + |├───────┼────────┼──────────┼──────────┤| + |│ NULL │ nan │ NULL │ true │| + |│ 2 │ 2.0 │ false │ false │| + |└───────┴────────┴──────────┴──────────┘| + └────────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_nan(), @@ -2384,111 +1498,54 @@ def fill_null( for reference. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame( + >>> df_native = pl.DataFrame( ... { - ... "a": [2, 4, None, None, 3, 5], - ... "b": [2.0, 4.0, float("nan"), float("nan"), 3.0, 5.0], + ... "a": [2, None, None, 3], + ... "b": [2.0, float("nan"), float("nan"), 3.0], ... } ... ) - >>> data = { - ... "a": [2, 4, None, None, 3, 5], - ... "b": [2.0, 4.0, None, None, 3.0, 5.0], - ... } - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_fill_null(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(nw.col("a", "b").fill_null(0)).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_fill_null`: - - >>> agnostic_fill_null(df_pd) - a b - 0 2.0 2.0 - 1 4.0 4.0 - 2 0.0 0.0 - 3 0.0 0.0 - 4 3.0 3.0 - 5 5.0 5.0 - - >>> agnostic_fill_null(df_pl) - shape: (6, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ i64 ┆ f64 │ - ╞═════╪═════╡ - │ 2 ┆ 2.0 │ - │ 4 ┆ 4.0 │ - │ 0 ┆ 0.0 │ - │ 0 ┆ 0.0 │ - │ 3 ┆ 3.0 │ - │ 5 ┆ 5.0 │ - └─────┴─────┘ - - >>> agnostic_fill_null(df_pa) - pyarrow.Table - a: int64 - b: double - ---- - a: [[2,4,0,0,3,5]] - b: [[2,4,0,0,3,5]] + >>> df = nw.from_native(df_native) + >>> df.with_columns(nw.col("a", "b").fill_null(0).name.suffix("_nulls_filled")) + ┌────────────────────────────────────────────────┐ + | Narwhals DataFrame | + |------------------------------------------------| + |shape: (4, 4) | + |┌──────┬─────┬────────────────┬────────────────┐| + |│ a ┆ b ┆ a_nulls_filled ┆ b_nulls_filled │| + |│ --- ┆ --- ┆ --- ┆ --- │| + |│ i64 ┆ f64 ┆ i64 ┆ f64 │| + |╞══════╪═════╪════════════════╪════════════════╡| + |│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │| + |│ null ┆ NaN ┆ 0 ┆ NaN │| + |│ null ┆ NaN ┆ 0 ┆ NaN │| + |│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │| + |└──────┴─────┴────────────────┴────────────────┘| + └────────────────────────────────────────────────┘ Using a strategy: - >>> def agnostic_fill_null_with_strategy(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a", "b") - ... .fill_null(strategy="forward", limit=1) - ... .name.suffix("_filled") - ... ).to_native() - - >>> agnostic_fill_null_with_strategy(df_pd) - a b a_filled b_filled - 0 2.0 2.0 2.0 2.0 - 1 4.0 4.0 4.0 4.0 - 2 NaN NaN 4.0 4.0 - 3 NaN NaN NaN NaN - 4 3.0 3.0 3.0 3.0 - 5 5.0 5.0 5.0 5.0 - - >>> agnostic_fill_null_with_strategy(df_pl) - shape: (6, 4) - ┌──────┬──────┬──────────┬──────────┐ - │ a ┆ b ┆ a_filled ┆ b_filled │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ i64 ┆ f64 │ - ╞══════╪══════╪══════════╪══════════╡ - │ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │ - │ 4 ┆ 4.0 ┆ 4 ┆ 4.0 │ - │ null ┆ null ┆ 4 ┆ 4.0 │ - │ null ┆ null ┆ null ┆ null │ - │ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │ - │ 5 ┆ 5.0 ┆ 5 ┆ 5.0 │ - └──────┴──────┴──────────┴──────────┘ - - >>> agnostic_fill_null_with_strategy(df_pa) - pyarrow.Table - a: int64 - b: double - a_filled: int64 - b_filled: double - ---- - a: [[2,4,null,null,3,5]] - b: [[2,4,null,null,3,5]] - a_filled: [[2,4,4,null,3,5]] - b_filled: [[2,4,4,null,3,5]] + >>> df.with_columns( + ... nw.col("a", "b") + ... .fill_null(strategy="forward", limit=1) + ... .name.suffix("_nulls_forward_filled") + ... ) + ┌────────────────────────────────────────────────────────────────┐ + | Narwhals DataFrame | + |----------------------------------------------------------------| + |shape: (4, 4) | + |┌──────┬─────┬────────────────────────┬────────────────────────┐| + |│ a ┆ b ┆ a_nulls_forward_filled ┆ b_nulls_forward_filled │| + |│ --- ┆ --- ┆ --- ┆ --- │| + |│ i64 ┆ f64 ┆ i64 ┆ f64 │| + |╞══════╪═════╪════════════════════════╪════════════════════════╡| + |│ 2 ┆ 2.0 ┆ 2 ┆ 2.0 │| + |│ null ┆ NaN ┆ 2 ┆ NaN │| + |│ null ┆ NaN ┆ null ┆ NaN │| + |│ 3 ┆ 3.0 ┆ 3 ┆ 3.0 │| + |└──────┴─────┴────────────────────────┴────────────────────────┘| + └────────────────────────────────────────────────────────────────┘ """ if value is not None and strategy is not None: msg = "cannot specify both `value` and `strategy`" @@ -2521,50 +1578,27 @@ def drop_nulls(self: Self) -> Self: for reference. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> df_pd = pd.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]}) - >>> df_pl = pl.DataFrame({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) - >>> df_pa = pa.table({"a": [2.0, 4.0, None, 3.0, None, 5.0]}) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_drop_nulls(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").drop_nulls()).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_drop_nulls`: - - >>> agnostic_drop_nulls(df_pd) - a - 0 2.0 - 1 4.0 - 3 3.0 - 5 5.0 - - >>> agnostic_drop_nulls(df_pl) - shape: (4, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 2.0 │ - │ 4.0 │ - │ 3.0 │ - │ 5.0 │ - └─────┘ - - >>> agnostic_drop_nulls(df_pa) - pyarrow.Table - a: double - ---- - a: [[2,4,3,5]] + >>> df_native = pl.DataFrame({"a": [2.0, 4.0, float("nan"), 3.0, None, 5.0]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a").drop_nulls()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | shape: (5, 1) | + | ┌─────┐ | + | │ a │ | + | │ --- │ | + | │ f64 │ | + | ╞═════╡ | + | │ 2.0 │ | + | │ 4.0 │ | + | │ NaN │ | + | │ 3.0 │ | + | │ 5.0 │ | + | └─────┘ | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).drop_nulls(), @@ -2629,79 +1663,31 @@ def over(self: Self, *keys: str | Iterable[str]) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3], "b": [1, 1, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_min_over_b(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... a_min_per_group=nw.col("a").min().over("b") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_min_over_b`: - - >>> agnostic_min_over_b(df_pd) - a b a_min_per_group - 0 1 1 1 - 1 2 1 1 - 2 3 2 3 - - >>> agnostic_min_over_b(df_pl) - shape: (3, 3) - ┌─────┬─────┬─────────────────┐ - │ a ┆ b ┆ a_min_per_group │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════════════════╡ - │ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1 ┆ 1 │ - │ 3 ┆ 2 ┆ 3 │ - └─────┴─────┴─────────────────┘ - - >>> agnostic_min_over_b(df_pa) - pyarrow.Table - a: int64 - b: int64 - a_min_per_group: int64 - ---- - a: [[1,2,3]] - b: [[1,1,2]] - a_min_per_group: [[1,1,3]] + >>> df_native = pd.DataFrame({"a": [1, 2, 4], "b": ["x", "x", "y"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_min_per_group=nw.col("a").min().over("b")) + ┌────────────────────────┐ + | Narwhals DataFrame | + |------------------------| + | a b a_min_per_group| + |0 1 x 1| + |1 2 x 1| + |2 4 y 4| + └────────────────────────┘ Cumulative operations are also supported, but (currently) only for pandas and Polars: - >>> def agnostic_cum_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns(c=nw.col("a").cum_sum().over("b")).to_native() - - >>> agnostic_cum_sum(df_pd) - a b c - 0 1 1 1 - 1 2 1 3 - 2 3 2 3 - - >>> agnostic_cum_sum(df_pl) - shape: (3, 3) - ┌─────┬─────┬─────┐ - │ a ┆ b ┆ c │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞═════╪═════╪═════╡ - │ 1 ┆ 1 ┆ 1 │ - │ 2 ┆ 1 ┆ 3 │ - │ 3 ┆ 2 ┆ 3 │ - └─────┴─────┴─────┘ + >>> df.with_columns(a_cum_sum_per_group=nw.col("a").cum_sum().over("b")) + ┌────────────────────────────┐ + | Narwhals DataFrame | + |----------------------------| + | a b a_cum_sum_per_group| + |0 1 x 1| + |1 2 x 3| + |2 4 y 4| + └────────────────────────────┘ """ if self._changes_length: msg = "`.over()` can not be used for expressions which change length." @@ -2721,52 +1707,19 @@ def is_duplicated(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_duplicated(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_duplicated()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_duplicated`: - - >>> agnostic_is_duplicated(df_pd) - a b - 0 True True - 1 False True - 2 False False - 3 True False - - >>> agnostic_is_duplicated(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ true ┆ true │ - │ false ┆ true │ - │ false ┆ false │ - │ true ┆ false │ - └───────┴───────┘ - - >>> agnostic_is_duplicated(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[true,false,false,true]] - b: [[true,true,false,false]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(nw.all().is_duplicated().name.suffix("_is_duplicated")) + ┌─────────────────────────────────────────┐ + | Narwhals DataFrame | + |-----------------------------------------| + | a b a_is_duplicated b_is_duplicated| + |0 1 a True True| + |1 2 a False True| + |2 3 b False False| + |3 1 c True False| + └─────────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_duplicated(), @@ -2783,52 +1736,19 @@ def is_unique(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_unique(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_unique()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_unique`: - - >>> agnostic_is_unique(df_pd) - a b - 0 False False - 1 True False - 2 True True - 3 False True - - >>> agnostic_is_unique(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ false ┆ false │ - │ true ┆ false │ - │ true ┆ true │ - │ false ┆ true │ - └───────┴───────┘ - - >>> agnostic_is_unique(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[false,true,true,false]] - b: [[false,false,true,true]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(nw.all().is_unique().name.suffix("_is_unique")) + ┌─────────────────────────────────┐ + | Narwhals DataFrame | + |---------------------------------| + | a b a_is_unique b_is_unique| + |0 1 a False False| + |1 2 a True False| + |2 3 b True True| + |3 1 c False True| + └─────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_unique(), @@ -2850,46 +1770,16 @@ def null_count(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, None, 1], "b": ["a", None, "b", None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_null_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().null_count()).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_null_count`: - - >>> agnostic_null_count(df_pd) - a b - 0 1 2 - - >>> agnostic_null_count(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 1 ┆ 2 │ - └─────┴─────┘ - - >>> agnostic_null_count(df_pa) - pyarrow.Table - a: int64 - b: int64 - ---- - a: [[1]] - b: [[2]] + >>> df_native = pd.DataFrame({"a": [1, 2, None, 1], "b": ["a", None, "b", None]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.all().null_count()) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 1 2 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).null_count(), @@ -2906,52 +1796,21 @@ def is_first_distinct(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_first_distinct(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_first_distinct()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_first_distinct`: - - >>> agnostic_is_first_distinct(df_pd) - a b - 0 True True - 1 True False - 2 True True - 3 False True - - >>> agnostic_is_first_distinct(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ true ┆ true │ - │ true ┆ false │ - │ true ┆ true │ - │ false ┆ true │ - └───────┴───────┘ - - >>> agnostic_is_first_distinct(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[true,true,true,false]] - b: [[true,false,true,true]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... nw.all().is_first_distinct().name.suffix("_is_first_distinct") + ... ) + ┌─────────────────────────────────────────────────┐ + | Narwhals DataFrame | + |-------------------------------------------------| + | a b a_is_first_distinct b_is_first_distinct| + |0 1 a True True| + |1 2 a True False| + |2 3 b True True| + |3 1 c False True| + └─────────────────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_first_distinct(), @@ -2968,52 +1827,19 @@ def is_last_distinct(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_is_last_distinct(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.all().is_last_distinct()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_last_distinct`: - - >>> agnostic_is_last_distinct(df_pd) - a b - 0 False False - 1 True True - 2 True True - 3 True True - - >>> agnostic_is_last_distinct(df_pl) - shape: (4, 2) - ┌───────┬───────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ bool ┆ bool │ - ╞═══════╪═══════╡ - │ false ┆ false │ - │ true ┆ true │ - │ true ┆ true │ - │ true ┆ true │ - └───────┴───────┘ - - >>> agnostic_is_last_distinct(df_pa) - pyarrow.Table - a: bool - b: bool - ---- - a: [[false,true,true,true]] - b: [[false,true,true,true]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3, 1], "b": ["a", "a", "b", "c"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(nw.all().is_last_distinct().name.suffix("_is_last_distinct")) + ┌───────────────────────────────────────────────┐ + | Narwhals DataFrame | + |-----------------------------------------------| + | a b a_is_last_distinct b_is_last_distinct| + |0 1 a False False| + |1 2 a True True| + |2 3 b True True| + |3 1 c True True| + └───────────────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_last_distinct(), @@ -3045,48 +1871,16 @@ def quantile( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": list(range(50)), "b": list(range(50, 100))} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function: - - >>> def agnostic_quantile(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a", "b").quantile(0.5, interpolation="linear") - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_quantile`: - - >>> agnostic_quantile(df_pd) - a b - 0 24.5 74.5 - - >>> agnostic_quantile(df_pl) - shape: (1, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════╡ - │ 24.5 ┆ 74.5 │ - └──────┴──────┘ - - >>> agnostic_quantile(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[24.5]] - b: [[74.5]] + >>> df_native = pd.DataFrame({"a": list(range(50)), "b": list(range(50, 100))}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a", "b").quantile(0.5, interpolation="linear")) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a b | + | 0 24.5 74.5 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).quantile(quantile, interpolation), @@ -3175,48 +1969,18 @@ def round(self: Self, decimals: int = 0) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.12345, 2.56789, 3.901234]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that rounds to the first decimal: - - >>> def agnostic_round(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").round(1)).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_round`: - - >>> agnostic_round(df_pd) - a - 0 1.1 - 1 2.6 - 2 3.9 - - >>> agnostic_round(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ f64 │ - ╞═════╡ - │ 1.1 │ - │ 2.6 │ - │ 3.9 │ - └─────┘ - - >>> agnostic_round(df_pa) - pyarrow.Table - a: double - ---- - a: [[1.1,2.6,3.9]] + >>> df_native = pd.DataFrame({"a": [1.12345, 2.56789, 3.901234]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_rounded=nw.col("a").round(1)) + ┌──────────────────────┐ + | Narwhals DataFrame | + |----------------------| + | a a_rounded| + |0 1.123450 1.1| + |1 2.567890 2.6| + |2 3.901234 3.9| + └──────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).round(decimals), @@ -3235,50 +1999,19 @@ def len(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": ["x", "y", "z"], "b": [1, 2, 1]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - Let's define a dataframe-agnostic function that computes the len over - different values of "b" column: - - >>> def agnostic_len(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select( - ... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"), - ... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_len`: - - >>> agnostic_len(df_pd) - a1 a2 - 0 2 1 - - >>> agnostic_len(df_pl) - shape: (1, 2) - ┌─────┬─────┐ - │ a1 ┆ a2 │ - │ --- ┆ --- │ - │ u32 ┆ u32 │ - ╞═════╪═════╡ - │ 2 ┆ 1 │ - └─────┴─────┘ - - >>> agnostic_len(df_pa) - pyarrow.Table - a1: int64 - a2: int64 - ---- - a1: [[2]] - a2: [[1]] + >>> df_native = pd.DataFrame({"a": ["x", "y", "z"], "b": [1, 2, 1]}) + >>> df = nw.from_native(df_native) + >>> df.select( + ... nw.col("a").filter(nw.col("b") == 1).len().alias("a1"), + ... nw.col("a").filter(nw.col("b") == 2).len().alias("a2"), + ... ) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a1 a2 | + | 0 2 1 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).len(), @@ -3336,127 +2069,18 @@ def clip( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 2, 3]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_clip_lower(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").clip(2)).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_clip_lower`: - - >>> agnostic_clip_lower(df_pd) - a - 0 2 - 1 2 - 2 3 - - >>> agnostic_clip_lower(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 2 │ - │ 2 │ - │ 3 │ - └─────┘ - - >>> agnostic_clip_lower(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[2,2,3]] - - We define another library agnostic function: - - >>> def agnostic_clip_upper(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").clip(upper_bound=2)).to_native() - - We can then pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_clip_upper`: - - >>> agnostic_clip_upper(df_pd) - a - 0 1 - 1 2 - 2 2 - - >>> agnostic_clip_upper(df_pl) - shape: (3, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - │ 2 │ - │ 2 │ - └─────┘ - - >>> agnostic_clip_upper(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[1,2,2]] - - We can have both at the same time - - >>> data = {"a": [-1, 1, -3, 3, -5, 5]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_clip(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").clip(-1, 3)).to_native() - - We can pass any supported library such as Pandas, Polars, or - PyArrow to `agnostic_clip`: - - >>> agnostic_clip(df_pd) - a - 0 -1 - 1 1 - 2 -1 - 3 3 - 4 -1 - 5 3 - - >>> agnostic_clip(df_pl) - shape: (6, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ -1 │ - │ 1 │ - │ -1 │ - │ 3 │ - │ -1 │ - │ 3 │ - └─────┘ - - >>> agnostic_clip(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[-1,1,-1,3,-1,3]] + >>> df_native = pd.DataFrame({"a": [1, 2, 3]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_clipped=nw.col("a").clip(-1, 3)) + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a a_clipped | + | 0 1 1 | + | 1 2 2 | + | 2 3 3 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).clip( @@ -3480,47 +2104,16 @@ def mode(self: Self) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = { - ... "a": [1, 1, 2, 3], - ... "b": [1, 1, 2, 2], - ... } - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_mode(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").mode()).sort("a").to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_mode`: - - >>> agnostic_mode(df_pd) - a - 0 1 - - >>> agnostic_mode(df_pl) - shape: (1, 1) - ┌─────┐ - │ a │ - │ --- │ - │ i64 │ - ╞═════╡ - │ 1 │ - └─────┘ - - >>> agnostic_mode(df_pa) - pyarrow.Table - a: int64 - ---- - a: [[1]] + >>> df_native = pd.DataFrame({"a": [1, 1, 2, 3], "b": [1, 1, 2, 2]}) + >>> df = nw.from_native(df_native) + >>> df.select(nw.col("a").mode()).sort("a") + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a | + | 0 1 | + └──────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).mode(), @@ -3533,59 +2126,37 @@ def is_finite(self: Self) -> Self: """Returns boolean values indicating which original values are finite. Warning: - Different backend handle null values differently. `is_finite` will return - False for NaN and Null's in the Dask and pandas non-nullable backend, while - for Polars, PyArrow and pandas nullable backends null values are kept as such. + pandas handles null values differently from Polars and PyArrow. + See [null_handling](../pandas_like_concepts/null_handling.md/) + for reference. + `is_finite` will return False for NaN and Null's in the Dask and + pandas non-nullable backend, while for Polars, PyArrow and pandas + nullable backends null values are kept as such. Returns: Expression of `Boolean` data type. Examples: - >>> import pandas as pd >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [float("nan"), float("inf"), 2.0, None]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_is_finite(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.select(nw.col("a").is_finite()).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_is_finite`: - - >>> agnostic_is_finite(df_pd) - a - 0 False - 1 False - 2 True - 3 False - - >>> agnostic_is_finite(df_pl) - shape: (4, 1) - ┌───────┐ - │ a │ - │ --- │ - │ bool │ - ╞═══════╡ - │ false │ - │ false │ - │ true │ - │ null │ - └───────┘ - - >>> agnostic_is_finite(df_pa) - pyarrow.Table - a: bool - ---- - a: [[false,false,true,null]] + >>> df_native = pl.DataFrame({"a": [float("nan"), float("inf"), 2.0, None]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns(a_is_finite=nw.col("a").is_finite()) + ┌──────────────────────┐ + | Narwhals DataFrame | + |----------------------| + |shape: (4, 2) | + |┌──────┬─────────────┐| + |│ a ┆ a_is_finite │| + |│ --- ┆ --- │| + |│ f64 ┆ bool │| + |╞══════╪═════════════╡| + |│ NaN ┆ false │| + |│ inf ┆ false │| + |│ 2.0 ┆ true │| + |│ null ┆ null │| + |└──────┴─────────────┘| + └──────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).is_finite(), @@ -3605,57 +2176,22 @@ def cum_count(self: Self, *, reverse: bool = False) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": ["x", "k", None, "d"]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_count(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_count().alias("cum_count"), - ... nw.col("a").cum_count(reverse=True).alias("cum_count_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_count`: - - >>> agnostic_cum_count(df_pd) - a cum_count cum_count_reverse - 0 x 1 3 - 1 k 2 2 - 2 None 2 1 - 3 d 3 1 - - >>> agnostic_cum_count(df_pl) - shape: (4, 3) - ┌──────┬───────────┬───────────────────┐ - │ a ┆ cum_count ┆ cum_count_reverse │ - │ --- ┆ --- ┆ --- │ - │ str ┆ u32 ┆ u32 │ - ╞══════╪═══════════╪═══════════════════╡ - │ x ┆ 1 ┆ 3 │ - │ k ┆ 2 ┆ 2 │ - │ null ┆ 2 ┆ 1 │ - │ d ┆ 3 ┆ 1 │ - └──────┴───────────┴───────────────────┘ - - >>> agnostic_cum_count(df_pa) - pyarrow.Table - a: string - cum_count: uint32 - cum_count_reverse: uint32 - ---- - a: [["x","k",null,"d"]] - cum_count: [[1,2,2,3]] - cum_count_reverse: [[3,2,1,1]] + >>> df_native = pd.DataFrame({"a": ["x", "k", None, "d"]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... nw.col("a").cum_count().alias("a_cum_count"), + ... nw.col("a").cum_count(reverse=True).alias("a_cum_count_reverse"), + ... ) + ┌─────────────────────────────────────────┐ + | Narwhals DataFrame | + |-----------------------------------------| + | a a_cum_count a_cum_count_reverse| + |0 x 1 3| + |1 k 2 2| + |2 None 2 1| + |3 d 3 1| + └─────────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_count(reverse=reverse), @@ -3675,57 +2211,22 @@ def cum_min(self: Self, *, reverse: bool = False) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [3, 1, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_min(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_min().alias("cum_min"), - ... nw.col("a").cum_min(reverse=True).alias("cum_min_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_min`: - - >>> agnostic_cum_min(df_pd) - a cum_min cum_min_reverse - 0 3.0 3.0 1.0 - 1 1.0 1.0 1.0 - 2 NaN NaN NaN - 3 2.0 1.0 2.0 - - >>> agnostic_cum_min(df_pl) - shape: (4, 3) - ┌──────┬─────────┬─────────────────┐ - │ a ┆ cum_min ┆ cum_min_reverse │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════╪═════════╪═════════════════╡ - │ 3 ┆ 3 ┆ 1 │ - │ 1 ┆ 1 ┆ 1 │ - │ null ┆ null ┆ null │ - │ 2 ┆ 1 ┆ 2 │ - └──────┴─────────┴─────────────────┘ - - >>> agnostic_cum_min(df_pa) - pyarrow.Table - a: int64 - cum_min: int64 - cum_min_reverse: int64 - ---- - a: [[3,1,null,2]] - cum_min: [[3,1,null,1]] - cum_min_reverse: [[1,1,null,2]] + >>> df_native = pd.DataFrame({"a": [3, 1, None, 2]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... nw.col("a").cum_min().alias("a_cum_min"), + ... nw.col("a").cum_min(reverse=True).alias("a_cum_min_reverse"), + ... ) + ┌────────────────────────────────────┐ + | Narwhals DataFrame | + |------------------------------------| + | a a_cum_min a_cum_min_reverse| + |0 3.0 3.0 1.0| + |1 1.0 1.0 1.0| + |2 NaN NaN NaN| + |3 2.0 1.0 2.0| + └────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_min(reverse=reverse), @@ -3745,57 +2246,22 @@ def cum_max(self: Self, *, reverse: bool = False) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 3, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_max(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_max().alias("cum_max"), - ... nw.col("a").cum_max(reverse=True).alias("cum_max_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_`: - - >>> agnostic_cum_max(df_pd) - a cum_max cum_max_reverse - 0 1.0 1.0 3.0 - 1 3.0 3.0 3.0 - 2 NaN NaN NaN - 3 2.0 3.0 2.0 - - >>> agnostic_cum_max(df_pl) - shape: (4, 3) - ┌──────┬─────────┬─────────────────┐ - │ a ┆ cum_max ┆ cum_max_reverse │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════╪═════════╪═════════════════╡ - │ 1 ┆ 1 ┆ 3 │ - │ 3 ┆ 3 ┆ 3 │ - │ null ┆ null ┆ null │ - │ 2 ┆ 3 ┆ 2 │ - └──────┴─────────┴─────────────────┘ - - >>> agnostic_cum_max(df_pa) - pyarrow.Table - a: int64 - cum_max: int64 - cum_max_reverse: int64 - ---- - a: [[1,3,null,2]] - cum_max: [[1,3,null,3]] - cum_max_reverse: [[3,3,null,2]] + >>> df_native = pd.DataFrame({"a": [1, 3, None, 2]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... nw.col("a").cum_max().alias("a_cum_max"), + ... nw.col("a").cum_max(reverse=True).alias("a_cum_max_reverse"), + ... ) + ┌────────────────────────────────────┐ + | Narwhals DataFrame | + |------------------------------------| + | a a_cum_max a_cum_max_reverse| + |0 1.0 1.0 3.0| + |1 3.0 3.0 3.0| + |2 NaN NaN NaN| + |3 2.0 3.0 2.0| + └────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_max(reverse=reverse), @@ -3815,57 +2281,22 @@ def cum_prod(self: Self, *, reverse: bool = False) -> Self: Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1, 3, None, 2]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_cum_prod(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... nw.col("a").cum_prod().alias("cum_prod"), - ... nw.col("a").cum_prod(reverse=True).alias("cum_prod_reverse"), - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_cum_prod`: - - >>> agnostic_cum_prod(df_pd) - a cum_prod cum_prod_reverse - 0 1.0 1.0 6.0 - 1 3.0 3.0 6.0 - 2 NaN NaN NaN - 3 2.0 6.0 2.0 - - >>> agnostic_cum_prod(df_pl) - shape: (4, 3) - ┌──────┬──────────┬──────────────────┐ - │ a ┆ cum_prod ┆ cum_prod_reverse │ - │ --- ┆ --- ┆ --- │ - │ i64 ┆ i64 ┆ i64 │ - ╞══════╪══════════╪══════════════════╡ - │ 1 ┆ 1 ┆ 6 │ - │ 3 ┆ 3 ┆ 6 │ - │ null ┆ null ┆ null │ - │ 2 ┆ 6 ┆ 2 │ - └──────┴──────────┴──────────────────┘ - - >>> agnostic_cum_prod(df_pa) - pyarrow.Table - a: int64 - cum_prod: int64 - cum_prod_reverse: int64 - ---- - a: [[1,3,null,2]] - cum_prod: [[1,3,null,6]] - cum_prod_reverse: [[6,6,null,2]] + >>> df_native = pd.DataFrame({"a": [1, 3, None, 2]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... nw.col("a").cum_prod().alias("a_cum_prod"), + ... nw.col("a").cum_prod(reverse=True).alias("a_cum_prod_reverse"), + ... ) + ┌──────────────────────────────────────┐ + | Narwhals DataFrame | + |--------------------------------------| + | a a_cum_prod a_cum_prod_reverse| + |0 1.0 1.0 6.0| + |1 3.0 3.0 6.0| + |2 NaN NaN NaN| + |3 2.0 6.0 2.0| + └──────────────────────────────────────┘ """ return self.__class__( lambda plx: self._to_compliant_expr(plx).cum_prod(reverse=reverse), @@ -3907,54 +2338,21 @@ def rolling_sum( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_sum(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_sum(window_size=3, min_samples=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_sum`: - - >>> agnostic_rolling_sum(df_pd) - a b - 0 1.0 1.0 - 1 2.0 3.0 - 2 NaN 3.0 - 3 4.0 6.0 - - >>> agnostic_rolling_sum(df_pl) - shape: (4, 2) - ┌──────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪═════╡ - │ 1.0 ┆ 1.0 │ - │ 2.0 ┆ 3.0 │ - │ null ┆ 3.0 │ - │ 4.0 ┆ 6.0 │ - └──────┴─────┘ - - >>> agnostic_rolling_sum(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[1,3,3,6]] + >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... a_rolling_sum=nw.col("a").rolling_sum(window_size=3, min_samples=1) + ... ) + ┌─────────────────────┐ + | Narwhals DataFrame | + |---------------------| + | a a_rolling_sum| + |0 1.0 1.0| + |1 2.0 3.0| + |2 NaN 3.0| + |3 4.0 6.0| + └─────────────────────┘ """ window_size, min_samples = _validate_rolling_arguments( window_size=window_size, min_samples=min_samples @@ -4004,54 +2402,21 @@ def rolling_mean( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_mean(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_mean(window_size=3, min_samples=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_mean`: - - >>> agnostic_rolling_mean(df_pd) - a b - 0 1.0 1.0 - 1 2.0 1.5 - 2 NaN 1.5 - 3 4.0 3.0 - - >>> agnostic_rolling_mean(df_pl) - shape: (4, 2) - ┌──────┬─────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪═════╡ - │ 1.0 ┆ 1.0 │ - │ 2.0 ┆ 1.5 │ - │ null ┆ 1.5 │ - │ 4.0 ┆ 3.0 │ - └──────┴─────┘ - - >>> agnostic_rolling_mean(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[1,1.5,1.5,3]] + >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... a_rolling_mean=nw.col("a").rolling_mean(window_size=3, min_samples=1) + ... ) + ┌──────────────────────┐ + | Narwhals DataFrame | + |----------------------| + | a a_rolling_mean| + |0 1.0 1.0| + |1 2.0 1.5| + |2 NaN 1.5| + |3 4.0 3.0| + └──────────────────────┘ """ window_size, min_samples = _validate_rolling_arguments( window_size=window_size, min_samples=min_samples @@ -4103,54 +2468,21 @@ def rolling_var( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_var(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_var(window_size=3, min_samples=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_var`: - - >>> agnostic_rolling_var(df_pd) - a b - 0 1.0 NaN - 1 2.0 0.5 - 2 NaN 0.5 - 3 4.0 2.0 - - >>> agnostic_rolling_var(df_pl) # doctest:+SKIP - shape: (4, 2) - ┌──────┬──────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.5 │ - │ null ┆ 0.5 │ - │ 4.0 ┆ 2.0 │ - └──────┴──────┘ - - >>> agnostic_rolling_var(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[nan,0.5,0.5,2]] + >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... a_rolling_var=nw.col("a").rolling_var(window_size=3, min_samples=1) + ... ) + ┌─────────────────────┐ + | Narwhals DataFrame | + |---------------------| + | a a_rolling_var| + |0 1.0 NaN| + |1 2.0 0.5| + |2 NaN 0.5| + |3 4.0 2.0| + └─────────────────────┘ """ window_size, min_samples = _validate_rolling_arguments( window_size=window_size, min_samples=min_samples @@ -4200,54 +2532,21 @@ def rolling_std( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [1.0, 2.0, None, 4.0]} - >>> df_pd = pd.DataFrame(data) - >>> df_pl = pl.DataFrame(data) - >>> df_pa = pa.table(data) - - We define a library agnostic function: - - >>> def agnostic_rolling_std(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... return df.with_columns( - ... b=nw.col("a").rolling_std(window_size=3, min_samples=1) - ... ).to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_rolling_std`: - - >>> agnostic_rolling_std(df_pd) - a b - 0 1.0 NaN - 1 2.0 0.707107 - 2 NaN 0.707107 - 3 4.0 1.414214 - - >>> agnostic_rolling_std(df_pl) # doctest:+SKIP - shape: (4, 2) - ┌──────┬──────────┐ - │ a ┆ b │ - │ --- ┆ --- │ - │ f64 ┆ f64 │ - ╞══════╪══════════╡ - │ 1.0 ┆ null │ - │ 2.0 ┆ 0.707107 │ - │ null ┆ 0.707107 │ - │ 4.0 ┆ 1.414214 │ - └──────┴──────────┘ - - >>> agnostic_rolling_std(df_pa) - pyarrow.Table - a: double - b: double - ---- - a: [[1,2,null,4]] - b: [[nan,0.7071067811865476,0.7071067811865476,1.4142135623730951]] + >>> df_native = pd.DataFrame({"a": [1.0, 2.0, None, 4.0]}) + >>> df = nw.from_native(df_native) + >>> df.with_columns( + ... a_rolling_std=nw.col("a").rolling_std(window_size=3, min_samples=1) + ... ) + ┌─────────────────────┐ + | Narwhals DataFrame | + |---------------------| + | a a_rolling_std| + |0 1.0 NaN| + |1 2.0 0.707107| + |2 NaN 0.707107| + |3 4.0 1.414214| + └─────────────────────┘ """ window_size, min_samples = _validate_rolling_arguments( window_size=window_size, min_samples=min_samples @@ -4300,53 +2599,21 @@ def rank( Examples: >>> import pandas as pd - >>> import polars as pl - >>> import pyarrow as pa >>> import narwhals as nw - >>> from narwhals.typing import IntoFrameT - >>> - >>> data = {"a": [3, 6, 1, 1, 6]} - - We define a dataframe-agnostic function that computes the dense rank for - the data: - - >>> def agnostic_dense_rank(df_native: IntoFrameT) -> IntoFrameT: - ... df = nw.from_native(df_native) - ... result = df.with_columns(rnk=nw.col("a").rank(method="dense")) - ... return result.to_native() - - We can then pass any supported library such as pandas, Polars, or - PyArrow to `agnostic_dense_rank`: - - >>> agnostic_dense_rank(pd.DataFrame(data)) - a rnk - 0 3 2.0 - 1 6 3.0 - 2 1 1.0 - 3 1 1.0 - 4 6 3.0 - - >>> agnostic_dense_rank(pl.DataFrame(data)) - shape: (5, 2) - ┌─────┬─────┐ - │ a ┆ rnk │ - │ --- ┆ --- │ - │ i64 ┆ u32 │ - ╞═════╪═════╡ - │ 3 ┆ 2 │ - │ 6 ┆ 3 │ - │ 1 ┆ 1 │ - │ 1 ┆ 1 │ - │ 6 ┆ 3 │ - └─────┴─────┘ - - >>> agnostic_dense_rank(pa.table(data)) - pyarrow.Table - a: int64 - rnk: uint64 - ---- - a: [[3,6,1,1,6]] - rnk: [[2,3,1,1,3]] + >>> df_native = pd.DataFrame({"a": [3, 6, 1, 1, 6]}) + >>> df = nw.from_native(df_native) + >>> result = df.with_columns(rank=nw.col("a").rank(method="dense")) + >>> result + ┌──────────────────┐ + |Narwhals DataFrame| + |------------------| + | a rank | + | 0 3 2.0 | + | 1 6 3.0 | + | 2 1 1.0 | + | 3 1 1.0 | + | 4 6 3.0 | + └──────────────────┘ """ supported_rank_methods = {"average", "min", "max", "dense", "ordinal"} if method not in supported_rank_methods: