Skip to content

Commit

Permalink
feat: add pyspark str namespace to_datetime (#1826)
Browse files Browse the repository at this point in the history
feat: add pyspark str to_datetime
  • Loading branch information
FBruzzesi authored Jan 19, 2025
1 parent b4ce9d1 commit e7ca81e
Show file tree
Hide file tree
Showing 2 changed files with 59 additions and 3 deletions.
54 changes: 54 additions & 0 deletions narwhals/_spark_like/expr_str.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import overload

if TYPE_CHECKING:
from pyspark.sql import Column
Expand Down Expand Up @@ -128,3 +129,56 @@ def to_lowercase(self: Self) -> SparkLikeExpr:
"to_lowercase",
returns_scalar=self._compliant_expr._returns_scalar,
)

def to_datetime(self: Self, format: str | None) -> SparkLikeExpr: # noqa: A002
from pyspark.sql import functions as F # noqa: N812

return self._compliant_expr._from_call(
lambda _input: F.to_timestamp(
F.replace(_input, F.lit("T"), F.lit(" ")),
format=strptime_to_pyspark_format(format),
),
"to_datetime",
returns_scalar=self._compliant_expr._returns_scalar,
)


@overload
def strptime_to_pyspark_format(format: None) -> None: ...


@overload
def strptime_to_pyspark_format(format: str) -> str: ...


def strptime_to_pyspark_format(format: str | None) -> str | None: # noqa: A002
"""Converts a Python strptime datetime format string to a PySpark datetime format string."""
# Mapping from Python strptime format to PySpark format
if format is None:
return None

# see https://spark.apache.org/docs/latest/sql-ref-datetime-pattern.html
# and https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior
format_mapping = {
"%Y": "y", # Year with century
"%y": "y", # Year without century
"%m": "M", # Month
"%d": "d", # Day of the month
"%H": "H", # Hour (24-hour clock) 0-23
"%I": "h", # Hour (12-hour clock) 1-12
"%M": "m", # Minute
"%S": "s", # Second
"%f": "S", # Microseconds -> Milliseconds
"%p": "a", # AM/PM
"%a": "E", # Abbreviated weekday name
"%A": "E", # Full weekday name
"%j": "D", # Day of the year
"%z": "Z", # Timezone offset
"%s": "X", # Unix timestamp
}

# Replace Python format specifiers with PySpark specifiers
pyspark_format = format
for py_format, spark_format in format_mapping.items():
pyspark_format = pyspark_format.replace(py_format, spark_format)
return pyspark_format.replace("T", " ")
8 changes: 5 additions & 3 deletions tests/expr_and_series/str/to_datetime_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@


def test_to_datetime(constructor: Constructor, request: pytest.FixtureRequest) -> None:
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
if "cudf" in str(constructor):
expected = "2020-01-01T12:34:56.000000000"
Expand Down Expand Up @@ -80,7 +80,9 @@ def test_to_datetime_infer_fmt(
request.applymarker(pytest.mark.xfail)
if "cudf" in str(constructor):
expected = expected_cudf
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
if "pyspark" in str(constructor) and data["a"][0] == "20240101123456":
request.applymarker(pytest.mark.xfail)
result = (
nw.from_native(constructor(data))
Expand Down Expand Up @@ -133,7 +135,7 @@ def test_to_datetime_series_infer_fmt(
def test_to_datetime_infer_fmt_from_date(
constructor: Constructor, request: pytest.FixtureRequest
) -> None:
if ("pyspark" in str(constructor)) or "duckdb" in str(constructor):
if "duckdb" in str(constructor):
request.applymarker(pytest.mark.xfail)
data = {"z": ["2020-01-01", "2020-01-02", None]}
expected = [datetime(2020, 1, 1), datetime(2020, 1, 2), None]
Expand Down

0 comments on commit e7ca81e

Please sign in to comment.