From 973b499906c7c8ec8f23d440648ccbd49adb8e88 Mon Sep 17 00:00:00 2001 From: Dhanunjaya Elluri Date: Mon, 20 Jan 2025 07:42:47 +0100 Subject: [PATCH] feat: add `Expr.dt` methods to `PySpark` (#1835) --- narwhals/_spark_like/expr.py | 5 + narwhals/_spark_like/expr_dt.py | 135 ++++++++++++++++++ .../dt/datetime_attributes_test.py | 2 - 3 files changed, 140 insertions(+), 2 deletions(-) create mode 100644 narwhals/_spark_like/expr_dt.py diff --git a/narwhals/_spark_like/expr.py b/narwhals/_spark_like/expr.py index 98a1d2d5b..ba063c642 100644 --- a/narwhals/_spark_like/expr.py +++ b/narwhals/_spark_like/expr.py @@ -7,6 +7,7 @@ from typing import Sequence from narwhals._expression_parsing import infer_new_root_output_names +from narwhals._spark_like.expr_dt import SparkLikeExprDateTimeNamespace from narwhals._spark_like.expr_name import SparkLikeExprNameNamespace from narwhals._spark_like.expr_str import SparkLikeExprStringNamespace from narwhals._spark_like.utils import get_column_name @@ -541,3 +542,7 @@ def str(self: Self) -> SparkLikeExprStringNamespace: @property def name(self: Self) -> SparkLikeExprNameNamespace: return SparkLikeExprNameNamespace(self) + + @property + def dt(self: Self) -> SparkLikeExprDateTimeNamespace: + return SparkLikeExprDateTimeNamespace(self) diff --git a/narwhals/_spark_like/expr_dt.py b/narwhals/_spark_like/expr_dt.py new file mode 100644 index 000000000..4c31d6b67 --- /dev/null +++ b/narwhals/_spark_like/expr_dt.py @@ -0,0 +1,135 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from pyspark.sql import Column + from typing_extensions import Self + + from narwhals._spark_like.expr import SparkLikeExpr + + +class SparkLikeExprDateTimeNamespace: + def __init__(self: Self, expr: SparkLikeExpr) -> None: + self._compliant_expr = expr + + def date(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.to_date, + "date", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def year(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.year, + "year", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def month(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.month, + "month", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def day(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.dayofmonth, + "day", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def hour(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.hour, + "hour", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def minute(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.minute, + "minute", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def second(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.second, + "second", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def millisecond(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + def _millisecond(_input: Column) -> Column: + return F.floor((F.unix_micros(_input) % 1_000_000) / 1000) + + return self._compliant_expr._from_call( + _millisecond, + "millisecond", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def microsecond(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + def _microsecond(_input: Column) -> Column: + return F.unix_micros(_input) % 1_000_000 + + return self._compliant_expr._from_call( + _microsecond, + "microsecond", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def nanosecond(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + def _nanosecond(_input: Column) -> Column: + return (F.unix_micros(_input) % 1_000_000) * 1000 + + return self._compliant_expr._from_call( + _nanosecond, + "nanosecond", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def ordinal_day(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + return self._compliant_expr._from_call( + F.dayofyear, + "ordinal_day", + returns_scalar=self._compliant_expr._returns_scalar, + ) + + def weekday(self: Self) -> SparkLikeExpr: + from pyspark.sql import functions as F # noqa: N812 + + def _weekday(_input: Column) -> Column: + # PySpark's dayofweek returns 1-7 for Sunday-Saturday + return (F.dayofweek(_input) + 6) % 7 + + return self._compliant_expr._from_call( + _weekday, + "weekday", + returns_scalar=self._compliant_expr._returns_scalar, + ) diff --git a/tests/expr_and_series/dt/datetime_attributes_test.py b/tests/expr_and_series/dt/datetime_attributes_test.py index b1d09406e..3c8a16b7d 100644 --- a/tests/expr_and_series/dt/datetime_attributes_test.py +++ b/tests/expr_and_series/dt/datetime_attributes_test.py @@ -49,8 +49,6 @@ def test_datetime_attributes( request.applymarker(pytest.mark.xfail) if attribute == "date" and "cudf" in str(constructor): request.applymarker(pytest.mark.xfail) - if "pyspark" in str(constructor): - request.applymarker(pytest.mark.xfail) df = nw.from_native(constructor(data)) result = df.select(getattr(nw.col("a").dt, attribute)())