From da6401b695807a525a2d51bab846dca272d7ec03 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sat, 16 Nov 2024 22:12:34 -0500 Subject: [PATCH 01/16] Add dt.replace --- crates/polars-plan/src/dsl/dt.rs | 30 +++ .../src/dsl/function_expr/datetime.rs | 45 ++++ .../src/dsl/function_expr/temporal.rs | 91 +++---- crates/polars-python/src/expr/datetime.rs | 27 +++ .../src/lazyframe/visitor/expr_nodes.rs | 2 + crates/polars-time/src/chunkedarray/date.rs | 28 ++- .../polars-time/src/chunkedarray/datetime.rs | 63 +++++ crates/polars-time/src/lib.rs | 4 + crates/polars-time/src/replace.rs | 227 ++++++++++++++++++ .../source/reference/expressions/temporal.rst | 1 + .../docs/source/reference/series/temporal.rst | 1 + py-polars/polars/expr/datetime.py | 34 ++- py-polars/polars/series/datetime.py | 13 + .../namespaces/temporal/test_datetime.py | 163 +++++++++++++ 14 files changed, 666 insertions(+), 63 deletions(-) create mode 100644 crates/polars-time/src/replace.rs diff --git a/crates/polars-plan/src/dsl/dt.rs b/crates/polars-plan/src/dsl/dt.rs index 6ee60ed9774d..de77370ab4b9 100644 --- a/crates/polars-plan/src/dsl/dt.rs +++ b/crates/polars-plan/src/dsl/dt.rs @@ -331,4 +331,34 @@ impl DateLikeNameSpace { TemporalFunction::TotalNanoseconds, )) } + + /// Replace the time units of a value + #[allow(clippy::too_many_arguments)] + pub fn replace( + self, + day: Expr, + month: Expr, + year: Expr, + hour: Expr, + minute: Expr, + second: Expr, + microsecond: Expr, + ambiguous: Expr, + ) -> Expr { + self.0.map_many_private( + FunctionExpr::TemporalExpr(TemporalFunction::Replace), + &[ + day, + month, + year, + hour, + minute, + second, + microsecond, + ambiguous, + ], + false, + None, + ) + } } diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index bfbe7f8994e2..a6a616b83522 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -62,6 +62,7 @@ pub enum TemporalFunction { #[cfg(feature = "timezones")] DSTOffset, Round, + Replace, #[cfg(feature = "timezones")] ReplaceTimeZone(Option, NonExistent), Combine(TimeUnit), @@ -117,6 +118,7 @@ impl TemporalFunction { #[cfg(feature = "timezones")] DSTOffset => mapper.with_dtype(DataType::Duration(TimeUnit::Milliseconds)), Round => mapper.with_same_dtype(), + Replace => mapper.with_same_dtype(), #[cfg(feature = "timezones")] ReplaceTimeZone(tz, _non_existent) => mapper.map_datetime_dtype_timezone(tz.as_ref()), DatetimeFunction { @@ -187,6 +189,7 @@ impl Display for TemporalFunction { #[cfg(feature = "timezones")] DSTOffset => "dst_offset", Round => "round", + Replace => "replace", #[cfg(feature = "timezones")] ReplaceTimeZone(_, _) => "replace_time_zone", DatetimeFunction { .. } => return write!(f, "dt.datetime"), @@ -555,3 +558,45 @@ pub(super) fn round(s: &[Column]) -> PolarsResult { dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), }) } + +pub(super) fn replace(s: &[Column]) -> PolarsResult { + let time_series = &s[0]; + let s_year = &s[1].strict_cast(&DataType::Int32)?; + let s_month = &s[2].strict_cast(&DataType::Int8)?; + let s_day = &s[3].strict_cast(&DataType::Int8)?; + let year = s_year.i32()?; + let month = s_month.i8()?; + let day = s_day.i8()?; + + match time_series.dtype() { + DataType::Datetime(_, _) => { + let s_hour = &s[4].strict_cast(&DataType::Int8)?; + let s_minute = &s[5].strict_cast(&DataType::Int8)?; + let s_second = &s[6].strict_cast(&DataType::Int8)?; + let s_microsecond = &s[7].strict_cast(&DataType::Int32)?; + let hour = s_hour.i8()?; + let minute = s_minute.i8()?; + let second = s_second.i8()?; + let microsecond = s_microsecond.i32()?; + let s_ambiguous = &s[8].strict_cast(&DataType::String)?; + let ambiguous = s_ambiguous.str()?; + + let out = time_series.datetime().unwrap().replace( + year, + month, + day, + hour, + minute, + second, + microsecond, + ambiguous, + ); + out.map(|s| s.into_column()) + }, + DataType::Date => { + let out = time_series.date().unwrap().replace(year, month, day); + out.map(|s| s.into_column()) + }, + dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), + } +} diff --git a/crates/polars-plan/src/dsl/function_expr/temporal.rs b/crates/polars-plan/src/dsl/function_expr/temporal.rs index dcb5005ae4b1..23c59c6dc258 100644 --- a/crates/polars-plan/src/dsl/function_expr/temporal.rs +++ b/crates/polars-plan/src/dsl/function_expr/temporal.rs @@ -55,6 +55,7 @@ impl From for SpecialEq> { #[cfg(feature = "timezones")] DSTOffset => map!(datetime::dst_offset), Round => map_as_slice!(datetime::round), + Replace => map_as_slice!(datetime::replace), #[cfg(feature = "timezones")] ReplaceTimeZone(tz, non_existent) => { map_as_slice!(dispatch::replace_time_zone, tz.as_deref(), non_existent) @@ -70,14 +71,12 @@ impl From for SpecialEq> { } } +#[cfg(feature = "dtype-datetime")] pub(super) fn datetime( s: &[Column], time_unit: &TimeUnit, time_zone: Option<&str>, ) -> PolarsResult { - use polars_core::export::chrono::NaiveDate; - use polars_core::utils::CustomIterTools; - let year = &s[0]; let month = &s[1]; let day = &s[2]; @@ -95,91 +94,61 @@ pub(super) fn datetime( } let year = year.i32()?; - let mut month = month.cast(&DataType::UInt32)?; + let mut month = month.cast(&DataType::Int8)?; if month.len() < max_len { month = month.new_from_index(0, max_len); } - let month = month.u32()?; + let month = month.i8()?; - let mut day = day.cast(&DataType::UInt32)?; + let mut day = day.cast(&DataType::Int8)?; if day.len() < max_len { day = day.new_from_index(0, max_len); } - let day = day.u32()?; + let day = day.i8()?; - let mut hour = hour.cast(&DataType::UInt32)?; + let mut hour = hour.cast(&DataType::Int8)?; if hour.len() < max_len { hour = hour.new_from_index(0, max_len); } - let hour = hour.u32()?; + let hour = hour.i8()?; - let mut minute = minute.cast(&DataType::UInt32)?; + let mut minute = minute.cast(&DataType::Int8)?; if minute.len() < max_len { minute = minute.new_from_index(0, max_len); } - let minute = minute.u32()?; + let minute = minute.i8()?; - let mut second = second.cast(&DataType::UInt32)?; + let mut second = second.cast(&DataType::Int8)?; if second.len() < max_len { second = second.new_from_index(0, max_len); } - let second = second.u32()?; + let second = second.i8()?; - let mut microsecond = microsecond.cast(&DataType::UInt32)?; + let mut microsecond = microsecond.cast(&DataType::Int32)?; if microsecond.len() < max_len { microsecond = microsecond.new_from_index(0, max_len); } - let microsecond = microsecond.u32()?; + let microsecond = microsecond.i32()?; let mut _ambiguous = ambiguous.cast(&DataType::String)?; if _ambiguous.len() < max_len { _ambiguous = _ambiguous.new_from_index(0, max_len); } - let _ambiguous = _ambiguous.str()?; - - let ca: Int64Chunked = year - .into_iter() - .zip(month) - .zip(day) - .zip(hour) - .zip(minute) - .zip(second) - .zip(microsecond) - .map(|((((((y, m), d), h), mnt), s), us)| { - if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) = - (y, m, d, h, mnt, s, us) - { - NaiveDate::from_ymd_opt(y, m, d) - .and_then(|nd| nd.and_hms_micro_opt(h, mnt, s, us)) - .map(|ndt| match time_unit { - TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(), - TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(), - TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(), - }) - } else { - None - } - }) - .collect_trusted(); - - let ca = match time_zone { - #[cfg(feature = "timezones")] - Some(_) => { - let mut ca = ca.into_datetime(*time_unit, None); - ca = replace_time_zone(&ca, time_zone, _ambiguous, NonExistent::Raise)?; - ca - }, - _ => { - polars_ensure!( - time_zone.is_none(), - ComputeError: "cannot make use of the `time_zone` argument without the 'timezones' feature enabled." - ); - ca.into_datetime(*time_unit, None) - }, - }; - - let mut s = ca.into_column(); - s.rename(PlSmallStr::from_static("datetime")); - Ok(s) + let ambiguous = _ambiguous.str()?; + + let ca = DatetimeChunked::from_parts( + year, + month, + day, + hour, + minute, + second, + microsecond, + ambiguous, + time_unit, + time_zone, + PlSmallStr::from_static("datetime"), + ); + ca.map(|s| s.into_column()) } pub(super) fn combine(s: &[Column], tu: TimeUnit) -> PolarsResult { diff --git a/crates/polars-python/src/expr/datetime.rs b/crates/polars-python/src/expr/datetime.rs index 31052e6189d4..c7d9020b8b69 100644 --- a/crates/polars-python/src/expr/datetime.rs +++ b/crates/polars-python/src/expr/datetime.rs @@ -99,6 +99,33 @@ impl PyExpr { self.inner.clone().dt().round(every.inner).into() } + fn dt_replace( + &self, + day: Self, + month: Self, + year: Self, + hour: Self, + minute: Self, + second: Self, + microsecond: Self, + ambiguous: Self, + ) -> Self { + self.inner + .clone() + .dt() + .replace( + day.inner, + month.inner, + year.inner, + hour.inner, + minute.inner, + second.inner, + microsecond.inner, + ambiguous.inner, + ) + .into() + } + fn dt_combine(&self, time: Self, time_unit: Wrap) -> Self { self.inner .clone() diff --git a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs index 2958302811b8..99ac9a405509 100644 --- a/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs +++ b/crates/polars-python/src/lazyframe/visitor/expr_nodes.rs @@ -252,6 +252,7 @@ pub enum PyTemporalFunction { BaseUtcOffset, DSTOffset, Round, + Replace, ReplaceTimeZone, Combine, DatetimeFunction, @@ -1024,6 +1025,7 @@ pub(crate) fn into_py(py: Python<'_>, expr: &AExpr) -> PyResult { #[cfg(feature = "timezones")] TemporalFunction::DSTOffset => (PyTemporalFunction::DSTOffset,).into_py_any(py), TemporalFunction::Round => (PyTemporalFunction::Round,).into_py_any(py), + TemporalFunction::Replace => (PyTemporalFunction::Replace).into_py_any(py), #[cfg(feature = "timezones")] TemporalFunction::ReplaceTimeZone(time_zone, non_existent) => ( PyTemporalFunction::ReplaceTimeZone, diff --git a/crates/polars-time/src/chunkedarray/date.rs b/crates/polars-time/src/chunkedarray/date.rs index 8132f1ea2bba..7f9b5bccc035 100644 --- a/crates/polars-time/src/chunkedarray/date.rs +++ b/crates/polars-time/src/chunkedarray/date.rs @@ -1,4 +1,6 @@ -use arrow::temporal_conversions::{MILLISECONDS, SECONDS_IN_DAY}; +use arrow::temporal_conversions::{EPOCH_DAYS_FROM_CE, MILLISECONDS, SECONDS_IN_DAY}; +use polars_core::export::chrono::{Datelike, NaiveDate}; +use polars_core::utils::CustomIterTools; use super::*; @@ -74,6 +76,30 @@ pub trait DateMethods: AsDate { } fn parse_from_str_slice(name: PlSmallStr, v: &[&str], fmt: &str) -> DateChunked; + + /// Construct a date ChunkedArray from individual time components. + fn from_parts( + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + name: PlSmallStr, + ) -> PolarsResult { + let mut ca: Int32Chunked = year + .into_iter() + .zip(month) + .zip(day) + .map(|((y, m), d)| { + if let (Some(y), Some(m), Some(d)) = (y, m, d) { + NaiveDate::from_ymd_opt(y, m as u32, d as u32) + .map(|t| t.num_days_from_ce() - EPOCH_DAYS_FROM_CE) + } else { + None + } + }) + .collect_trusted(); + ca.rename(name); + Ok(ca.into_date()) + } } impl DateMethods for DateChunked { diff --git a/crates/polars-time/src/chunkedarray/datetime.rs b/crates/polars-time/src/chunkedarray/datetime.rs index 7942f97eb2f2..3a29151cf524 100644 --- a/crates/polars-time/src/chunkedarray/datetime.rs +++ b/crates/polars-time/src/chunkedarray/datetime.rs @@ -2,6 +2,8 @@ use arrow::array::{Array, PrimitiveArray}; use arrow::compute::temporal; use polars_compute::cast::{cast, CastOptionsImpl}; use polars_core::prelude::*; +#[cfg(feature = "timezones")] +use polars_ops::chunked_array::datetime::replace_time_zone; use super::*; @@ -149,6 +151,67 @@ pub trait DatetimeMethods: AsDatetime { ) .into_datetime(tu, None) } + + /// Construct a datetime ChunkedArray from individual time components. + #[allow(clippy::too_many_arguments)] + fn from_parts( + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + hour: &Int8Chunked, + minute: &Int8Chunked, + second: &Int8Chunked, + microsecond: &Int32Chunked, + ambiguous: &StringChunked, + time_unit: &TimeUnit, + time_zone: Option<&str>, + name: PlSmallStr, + ) -> PolarsResult { + let ca: Int64Chunked = year + .into_iter() + .zip(month) + .zip(day) + .zip(hour) + .zip(minute) + .zip(second) + .zip(microsecond) + .map(|((((((y, m), d), h), mnt), s), us)| { + if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) = + (y, m, d, h, mnt, s, us) + { + NaiveDate::from_ymd_opt(y, m as u32, d as u32) + .and_then(|nd| { + nd.and_hms_micro_opt(h as u32, mnt as u32, s as u32, us as u32) + }) + .map(|ndt| match time_unit { + TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(), + TimeUnit::Microseconds => ndt.and_utc().timestamp_micros(), + TimeUnit::Nanoseconds => ndt.and_utc().timestamp_nanos_opt().unwrap(), + }) + } else { + None + } + }) + .collect_trusted(); + + let mut ca = match time_zone { + #[cfg(feature = "timezones")] + Some(_) => { + let mut ca = ca.into_datetime(*time_unit, None); + ca = replace_time_zone(&ca, time_zone, ambiguous, NonExistent::Raise)?; + ca + }, + _ => { + polars_ensure!( + time_zone.is_none(), + ComputeError: "cannot make use of the `time_zone` argument without the 'timezones' feature enabled." + ); + ca.into_datetime(*time_unit, None) + }, + }; + ca.rename(name); + Ok(ca) + } } pub trait AsDatetime { diff --git a/crates/polars-time/src/lib.rs b/crates/polars-time/src/lib.rs index 81d531b8f07c..540f3a0cb253 100644 --- a/crates/polars-time/src/lib.rs +++ b/crates/polars-time/src/lib.rs @@ -13,6 +13,7 @@ mod month_start; #[cfg(feature = "offset_by")] mod offset_by; pub mod prelude; +mod replace; mod round; pub mod series; mod truncate; @@ -33,7 +34,10 @@ pub use month_end::*; pub use month_start::*; #[cfg(feature = "offset_by")] pub use offset_by::*; +#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))] +pub use replace::*; pub use round::*; +#[cfg(feature = "dtype-date")] pub use truncate::*; pub use upsample::*; pub use windows::duration::Duration; diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs new file mode 100644 index 000000000000..44d37f58fcad --- /dev/null +++ b/crates/polars-time/src/replace.rs @@ -0,0 +1,227 @@ +use polars_core::prelude::*; + +use crate::prelude::*; + +#[cfg(feature = "dtype-datetime")] +pub trait PolarsReplaceDatetime: DatetimeMethods { + #[allow(clippy::too_many_arguments)] + fn replace( + &self, + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + hour: &Int8Chunked, + minute: &Int8Chunked, + second: &Int8Chunked, + microsecond: &Int32Chunked, + ambiguous: &StringChunked, + ) -> PolarsResult + where + Self: Sized; +} + +#[cfg(feature = "dtype-date")] +pub trait PolarsReplaceDate: DateMethods { + fn replace( + &self, + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + ) -> PolarsResult + where + Self: Sized; +} + +#[cfg(feature = "dtype-datetime")] +impl PolarsReplaceDatetime for DatetimeChunked { + #[allow(clippy::too_many_arguments)] + fn replace( + &self, + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + hour: &Int8Chunked, + minute: &Int8Chunked, + second: &Int8Chunked, + microsecond: &Int32Chunked, + ambiguous: &StringChunked, + ) -> PolarsResult { + let n = self.len(); + + // For each argument, we must check if: + // 1. No value was supplied (None) --> Use existing year from Series + // 2. Value was supplied and is a Scalar --> Create full Series of value + // 3. Value was supplied and is Series --> Update all elements with the non-null values + let year = if year.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { year.get_unchecked(0) } { + if n == 1 { + year + } else { + &Int32Chunked::full("".into(), value, n) + } + } else { + &self.year() + } + } else { + &year.zip_with(&year.is_not_null(), &self.year())? + }; + let month = if month.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { month.get_unchecked(0) } { + if n == 1 { + month + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.month() + } + } else { + &month.zip_with(&month.is_not_null(), &self.month())? + }; + let day = if day.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { day.get_unchecked(0) } { + if n == 1 { + day + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.day() + } + } else { + &day.zip_with(&day.is_not_null(), &self.day())? + }; + let hour = if hour.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { hour.get_unchecked(0) } { + if n == 1 { + hour + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.hour() + } + } else { + &hour.zip_with(&hour.is_not_null(), &self.hour())? + }; + let minute = if minute.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { minute.get_unchecked(0) } { + if n == 1 { + minute + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.minute() + } + } else { + &minute.zip_with(&minute.is_not_null(), &self.minute())? + }; + let second = if second.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { second.get_unchecked(0) } { + if n == 1 { + second + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.second() + } + } else { + &second.zip_with(&second.is_not_null(), &self.second())? + }; + let microsecond = if microsecond.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { microsecond.get_unchecked(0) } { + if n == 1 { + microsecond + } else { + &Int32Chunked::full("".into(), value, n) + } + } else { + &(self.nanosecond() / 1000) + } + } else { + µsecond.zip_with(µsecond.is_not_null(), &(self.nanosecond() / 1000))? + }; + + let out = DatetimeChunked::from_parts( + year, + month, + day, + hour, + minute, + second, + microsecond, + ambiguous, + &self.time_unit(), + self.time_zone().as_deref(), + self.name().clone(), + )?; + Ok(out) + } +} + +#[cfg(feature = "dtype-date")] +impl PolarsReplaceDate for DateChunked { + fn replace( + &self, + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + ) -> PolarsResult { + let n = self.len(); + + let year = if year.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { year.get_unchecked(0) } { + if n == 1 { + year + } else { + &Int32Chunked::full("".into(), value, n) + } + } else { + &self.year() + } + } else { + &year.zip_with(&year.is_not_null(), &self.year())? + }; + let month = if month.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { month.get_unchecked(0) } { + if n == 1 { + month + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.month() + } + } else { + &month.zip_with(&month.is_not_null(), &self.month())? + }; + let day = if day.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = unsafe { day.get_unchecked(0) } { + if n == 1 { + day + } else { + &Int8Chunked::full("".into(), value, n) + } + } else { + &self.day() + } + } else { + &day.zip_with(&day.is_not_null(), &self.day())? + }; + + let out = DateChunked::from_parts(year, month, day, self.name().clone())?; + Ok(out) + } +} diff --git a/py-polars/docs/source/reference/expressions/temporal.rst b/py-polars/docs/source/reference/expressions/temporal.rst index a82303ba737b..79db2f54ad88 100644 --- a/py-polars/docs/source/reference/expressions/temporal.rst +++ b/py-polars/docs/source/reference/expressions/temporal.rst @@ -34,6 +34,7 @@ The following methods are available under the `expr.dt` attribute. Expr.dt.offset_by Expr.dt.ordinal_day Expr.dt.quarter + Expr.dt.replace Expr.dt.replace_time_zone Expr.dt.round Expr.dt.second diff --git a/py-polars/docs/source/reference/series/temporal.rst b/py-polars/docs/source/reference/series/temporal.rst index 78f62c2edbd7..2811ae9e3c18 100644 --- a/py-polars/docs/source/reference/series/temporal.rst +++ b/py-polars/docs/source/reference/series/temporal.rst @@ -38,6 +38,7 @@ The following methods are available under the `Series.dt` attribute. Series.dt.offset_by Series.dt.ordinal_day Series.dt.quarter + Series.dt.replace Series.dt.replace_time_zone Series.dt.round Series.dt.second diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 4d91448f0730..e9e8006813ea 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -7,7 +7,7 @@ from polars import functions as F from polars._utils.convert import parse_as_duration_string from polars._utils.deprecation import deprecate_function, deprecate_nonkeyword_arguments -from polars._utils.parse import parse_into_expression +from polars._utils.parse import parse_into_expression, parse_into_list_of_expressions from polars._utils.unstable import unstable from polars._utils.wrap import wrap_expr from polars.datatypes import DTYPE_TEMPORAL_UNITS, Date, Int32 @@ -390,6 +390,38 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Expr: every = parse_into_expression(every, str_as_lit=True) return wrap_expr(self._pyexpr.dt_round(every)) + def replace( + self, + year: int | IntoExpr | None = None, + month: int | IntoExpr | None = None, + day: int | IntoExpr | None = None, + hour: int | IntoExpr | None = None, + minute: int | IntoExpr | None = None, + second: int | IntoExpr | None = None, + microsecond: int | IntoExpr | None = None, + ambiguous: Ambiguous | Expr = "raise", + ) -> Expr: + """Replace time unit.""" + day, month, year, hour, minute, second, microsecond = ( + parse_into_list_of_expressions( + day, month, year, hour, minute, second, microsecond + ) + ) + if not isinstance(ambiguous, pl.Expr): + ambiguous = F.lit(ambiguous) + return wrap_expr( + self._pyexpr.dt_replace( + year, + month, + day, + hour, + minute, + second, + microsecond, + ambiguous._pyexpr, + ) + ) + def combine(self, time: dt.time | Expr, time_unit: TimeUnit = "us") -> Expr: """ Create a naive Datetime from an existing Date/Datetime expression and a Time. diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 0056b80abdbd..20064b8d5556 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -2096,3 +2096,16 @@ def dst_offset(self) -> Series: 0ms ] """ + + def replace( + self, + year: int | IntoExpr | None = None, + month: int | IntoExpr | None = None, + day: int | IntoExpr | None = None, + hour: int | IntoExpr | None = None, + minute: int | IntoExpr | None = None, + second: int | IntoExpr | None = None, + microsecond: int | IntoExpr | None = None, + ambiguous: Ambiguous | Expr = "raise", + ) -> Series: + """Replace time component for a Series of type Date or Datetime.""" diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index 715dc1b8efb5..f3e6169c8b25 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -650,6 +650,169 @@ def test_epoch_matches_timestamp() -> None: ) +def test_replace_expr() -> None: + df = pl.DataFrame( + { + "dates": [ + datetime(2088, 8, 8, 8, 8, 8, 8), + datetime(2088, 8, 8, 8, 8, 8, 8), + datetime(2088, 8, 8, 8, 8, 8, 8), + datetime(2088, 8, 8, 8, 8, 8, 8), + datetime(2088, 8, 8, 8, 8, 8, 8), + datetime(2088, 8, 8, 8, 8, 8, 8), + datetime(2088, 8, 8, 8, 8, 8, 8), + None, + None, + ], + "year": [None, 2, 3, 4, 5, 6, 7, 8, 1], + "month": [1, None, 3, 4, 5, 6, 7, 8, 1], + "day": [1, 2, None, 4, 5, 6, 7, 8, 1], + "hour": [1, 2, 3, None, 5, 6, 7, 8, 1], + "minute": [1, 2, 3, 4, None, 6, 7, 8, 1], + "second": [1, 2, 3, 4, 5, None, 7, 8, 1], + "microsecond": [1, 2, 3, 4, 5, 6, None, 8, None], + } + ) + + result = df.select( + pl.col("dates").dt.replace( + year="year", + month="month", + day="day", + hour="hour", + minute="minute", + second="second", + microsecond="microsecond", + ) + ) + + expected = pl.DataFrame( + { + "dates": [ + datetime(2088, 1, 1, 1, 1, 1, 1), + datetime(2, 8, 2, 2, 2, 2, 2), + datetime(3, 3, 8, 3, 3, 3, 3), + datetime(4, 4, 4, 8, 4, 4, 4), + datetime(5, 5, 5, 5, 8, 5, 5), + datetime(6, 6, 6, 6, 6, 8, 6), + datetime(7, 7, 7, 7, 7, 7, 8), + datetime(8, 8, 8, 8, 8, 8, 8), + None, + ] + } + ) + + assert_frame_equal(result, expected) + + +def test_replace_int() -> None: + df = pl.DataFrame( + { + "a": [ + datetime(1, 1, 1, 1, 1, 1, 1), + datetime(2, 2, 2, 2, 2, 2, 2), + datetime(3, 3, 3, 3, 3, 3, 3), + ] + } + ) + result = df.select( + pl.col("a").dt.replace().alias("no_change"), + pl.col("a").dt.replace(year=9).alias("year"), + pl.col("a").dt.replace(month=9).alias("month"), + pl.col("a").dt.replace(day=9).alias("day"), + pl.col("a").dt.replace(hour=9).alias("hour"), + pl.col("a").dt.replace(minute=9).alias("minute"), + pl.col("a").dt.replace(second=9).alias("second"), + pl.col("a").dt.replace(microsecond=9).alias("microsecond"), + ) + expected = pl.DataFrame( + { + "no_change": [ + datetime(1, 1, 1, 1, 1, 1, 1), + datetime(2, 2, 2, 2, 2, 2, 2), + datetime(3, 3, 3, 3, 3, 3, 3), + ], + "year": [ + datetime(9, 1, 1, 1, 1, 1, 1), + datetime(9, 2, 2, 2, 2, 2, 2), + datetime(9, 3, 3, 3, 3, 3, 3), + ], + "month": [ + datetime(1, 9, 1, 1, 1, 1, 1), + datetime(2, 9, 2, 2, 2, 2, 2), + datetime(3, 9, 3, 3, 3, 3, 3), + ], + "day": [ + datetime(1, 1, 9, 1, 1, 1, 1), + datetime(2, 2, 9, 2, 2, 2, 2), + datetime(3, 3, 9, 3, 3, 3, 3), + ], + "hour": [ + datetime(1, 1, 1, 9, 1, 1, 1), + datetime(2, 2, 2, 9, 2, 2, 2), + datetime(3, 3, 3, 9, 3, 3, 3), + ], + "minute": [ + datetime(1, 1, 1, 1, 9, 1, 1), + datetime(2, 2, 2, 2, 9, 2, 2), + datetime(3, 3, 3, 3, 9, 3, 3), + ], + "second": [ + datetime(1, 1, 1, 1, 1, 9, 1), + datetime(2, 2, 2, 2, 2, 9, 2), + datetime(3, 3, 3, 3, 3, 9, 3), + ], + "microsecond": [ + datetime(1, 1, 1, 1, 1, 1, 9), + datetime(2, 2, 2, 2, 2, 2, 9), + datetime(3, 3, 3, 3, 3, 3, 9), + ], + } + ) + assert_frame_equal(result, expected) + + +def test_replace_ambiguous() -> None: + # Value to be replaced by an ambiguous hour. + value = pl.select( + pl.datetime(2020, 10, 25, 5, time_zone="Europe/London") + ).to_series() + + input = [2020, 10, 25, 1] + tz = "Europe/London" + + # earliest + expected = pl.select( + pl.datetime(*input, time_zone=tz, ambiguous="earliest") + ).to_series() + result = value.dt.replace(hour=1, ambiguous="earliest") + assert_series_equal(result, expected) + + # latest + expected = pl.select( + pl.datetime(*input, time_zone=tz, ambiguous="latest") + ).to_series() + result = value.dt.replace(hour=1, ambiguous="latest") + assert_series_equal(result, expected) + + # null + expected = pl.select( + pl.datetime(*input, time_zone=tz, ambiguous="null") + ).to_series() + result = value.dt.replace(hour=1, ambiguous="null") + assert_series_equal(result, expected) + + # raise + with pytest.raises( + ComputeError, + match=( + "datetime '2020-10-25 01:00:00' is ambiguous in time zone 'Europe/London'. " + "Please use `ambiguous` to tell how it should be localized." + ), + ): + value.dt.replace(hour=1, ambiguous="raise") + + @pytest.mark.parametrize( ("tzinfo", "time_zone"), [(None, None), (ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu")], From 0ed5a2ca0bae9e80e9e44ce689658afee70bbfc3 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sat, 16 Nov 2024 23:25:13 -0500 Subject: [PATCH 02/16] Simplify --- crates/polars-time/src/replace.rs | 60 ++++++------------------------- 1 file changed, 10 insertions(+), 50 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 44d37f58fcad..e54fcdf588e8 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -55,11 +55,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let year = if year.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { year.get_unchecked(0) } { - if n == 1 { - year - } else { - &Int32Chunked::full("".into(), value, n) - } + &Int32Chunked::full("".into(), value, n) } else { &self.year() } @@ -69,11 +65,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let month = if month.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { month.get_unchecked(0) } { - if n == 1 { - month - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.month() } @@ -83,11 +75,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let day = if day.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { day.get_unchecked(0) } { - if n == 1 { - day - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.day() } @@ -97,11 +85,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let hour = if hour.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { hour.get_unchecked(0) } { - if n == 1 { - hour - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.hour() } @@ -111,11 +95,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let minute = if minute.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { minute.get_unchecked(0) } { - if n == 1 { - minute - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.minute() } @@ -125,11 +105,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let second = if second.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { second.get_unchecked(0) } { - if n == 1 { - second - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.second() } @@ -139,11 +115,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { let microsecond = if microsecond.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { microsecond.get_unchecked(0) } { - if n == 1 { - microsecond - } else { - &Int32Chunked::full("".into(), value, n) - } + &Int32Chunked::full("".into(), value, n) } else { &(self.nanosecond() / 1000) } @@ -181,11 +153,7 @@ impl PolarsReplaceDate for DateChunked { let year = if year.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { year.get_unchecked(0) } { - if n == 1 { - year - } else { - &Int32Chunked::full("".into(), value, n) - } + &Int32Chunked::full("".into(), value, n) } else { &self.year() } @@ -195,11 +163,7 @@ impl PolarsReplaceDate for DateChunked { let month = if month.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { month.get_unchecked(0) } { - if n == 1 { - month - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.month() } @@ -209,11 +173,7 @@ impl PolarsReplaceDate for DateChunked { let day = if day.len() == 1 { // SAFETY: array has one value. if let Some(value) = unsafe { day.get_unchecked(0) } { - if n == 1 { - day - } else { - &Int8Chunked::full("".into(), value, n) - } + &Int8Chunked::full("".into(), value, n) } else { &self.day() } From cfff994f06c70ba74f0086adb5f6984b80365bf3 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sun, 17 Nov 2024 09:26:39 -0500 Subject: [PATCH 03/16] Rename create fn --- crates/polars-plan/src/dsl/function_expr/temporal.rs | 2 +- crates/polars-time/src/chunkedarray/date.rs | 2 +- crates/polars-time/src/chunkedarray/datetime.rs | 2 +- crates/polars-time/src/replace.rs | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/temporal.rs b/crates/polars-plan/src/dsl/function_expr/temporal.rs index 23c59c6dc258..d5b53ab4f0b9 100644 --- a/crates/polars-plan/src/dsl/function_expr/temporal.rs +++ b/crates/polars-plan/src/dsl/function_expr/temporal.rs @@ -135,7 +135,7 @@ pub(super) fn datetime( } let ambiguous = _ambiguous.str()?; - let ca = DatetimeChunked::from_parts( + let ca = DatetimeChunked::new_from_parts( year, month, day, diff --git a/crates/polars-time/src/chunkedarray/date.rs b/crates/polars-time/src/chunkedarray/date.rs index 7f9b5bccc035..205382e08d47 100644 --- a/crates/polars-time/src/chunkedarray/date.rs +++ b/crates/polars-time/src/chunkedarray/date.rs @@ -78,7 +78,7 @@ pub trait DateMethods: AsDate { fn parse_from_str_slice(name: PlSmallStr, v: &[&str], fmt: &str) -> DateChunked; /// Construct a date ChunkedArray from individual time components. - fn from_parts( + fn new_from_parts( year: &Int32Chunked, month: &Int8Chunked, day: &Int8Chunked, diff --git a/crates/polars-time/src/chunkedarray/datetime.rs b/crates/polars-time/src/chunkedarray/datetime.rs index 3a29151cf524..64b8d931a2e3 100644 --- a/crates/polars-time/src/chunkedarray/datetime.rs +++ b/crates/polars-time/src/chunkedarray/datetime.rs @@ -154,7 +154,7 @@ pub trait DatetimeMethods: AsDatetime { /// Construct a datetime ChunkedArray from individual time components. #[allow(clippy::too_many_arguments)] - fn from_parts( + fn new_from_parts( year: &Int32Chunked, month: &Int8Chunked, day: &Int8Chunked, diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index e54fcdf588e8..14ae6059857f 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -123,7 +123,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { µsecond.zip_with(µsecond.is_not_null(), &(self.nanosecond() / 1000))? }; - let out = DatetimeChunked::from_parts( + let out = DatetimeChunked::new_from_parts( year, month, day, @@ -181,7 +181,7 @@ impl PolarsReplaceDate for DateChunked { &day.zip_with(&day.is_not_null(), &self.day())? }; - let out = DateChunked::from_parts(year, month, day, self.name().clone())?; + let out = DateChunked::new_from_parts(year, month, day, self.name().clone())?; Ok(out) } } From 4293a67132e8824b2623210051b73880c39025ff Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sun, 17 Nov 2024 10:15:46 -0500 Subject: [PATCH 04/16] Make args keyword-only --- py-polars/polars/expr/datetime.py | 1 + py-polars/polars/series/datetime.py | 1 + 2 files changed, 2 insertions(+) diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index e9e8006813ea..12acb3dba8ee 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -392,6 +392,7 @@ def round(self, every: str | dt.timedelta | IntoExprColumn) -> Expr: def replace( self, + *, year: int | IntoExpr | None = None, month: int | IntoExpr | None = None, day: int | IntoExpr | None = None, diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 20064b8d5556..06bef81a0cfc 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -2099,6 +2099,7 @@ def dst_offset(self) -> Series: def replace( self, + *, year: int | IntoExpr | None = None, month: int | IntoExpr | None = None, day: int | IntoExpr | None = None, From 9466a9f899cbc2ac0f368d0309d5a69672446db0 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sun, 17 Nov 2024 12:42:52 -0500 Subject: [PATCH 05/16] Add docstrings and fix signatures --- py-polars/polars/expr/datetime.py | 67 +++++++++++++++++++++++++++-- py-polars/polars/series/datetime.py | 62 ++++++++++++++++++++++---- 2 files changed, 116 insertions(+), 13 deletions(-) diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 12acb3dba8ee..00c929a2fc78 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -402,14 +402,73 @@ def replace( microsecond: int | IntoExpr | None = None, ambiguous: Ambiguous | Expr = "raise", ) -> Expr: - """Replace time unit.""" + """ + Replace time unit. + + Parameters + ---------- + year + Column or literal. + month + Column or literal, ranging from 1-12. + day + Column or literal, ranging from 1-31. + hour + Column or literal, ranging from 0-23. + minute + Column or literal, ranging from 0-59. + second + Column or literal, ranging from 0-59. + microsecond + Column or literal, ranging from 0-999999. + ambiguous + Determine how to deal with ambiguous datetimes: + + - `'raise'` (default): raise + - `'earliest'`: use the earliest datetime + - `'latest'`: use the latest datetime + - `'null'`: set to null + + Returns + ------- + Expr + Expression of data type :class:`Date` or :class:`Datetime` with the + specified time units replaced. + + Examples + -------- + >>> from datetime import datetime + >>> df = pl.DataFrame( + ... { + ... "date": [datetime(2024, 1, 1), datetime(2024, 1, 2)], + ... "year": [2022, 2016], + ... "month": [1, 2], + ... "day": [4, 5], + ... "hour": [12, 13], + ... "minute": [15, 30], + ... } + ... ) + >>> df.with_columns( + ... pl.col("date").dt.replace( + ... year="year", month="month", day="day", hour="hour", minute="minute" + ... ) + ... ) + shape: (2, 6) + ┌─────────────────────┬──────┬───────┬─────┬──────┬────────┐ + │ date ┆ year ┆ month ┆ day ┆ hour ┆ minute │ + │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ + │ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ + ╞═════════════════════╪══════╪═══════╪═════╪══════╪════════╡ + │ 2022-01-04 12:15:00 ┆ 2022 ┆ 1 ┆ 4 ┆ 12 ┆ 15 │ + │ 2016-02-05 13:30:00 ┆ 2016 ┆ 2 ┆ 5 ┆ 13 ┆ 30 │ + └─────────────────────┴──────┴───────┴─────┴──────┴────────┘ + """ day, month, year, hour, minute, second, microsecond = ( parse_into_list_of_expressions( day, month, year, hour, minute, second, microsecond ) ) - if not isinstance(ambiguous, pl.Expr): - ambiguous = F.lit(ambiguous) + ambiguous_expr = parse_into_expression(ambiguous, str_as_lit=True) return wrap_expr( self._pyexpr.dt_replace( year, @@ -419,7 +478,7 @@ def replace( minute, second, microsecond, - ambiguous._pyexpr, + ambiguous_expr, ) ) diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 06bef81a0cfc..12372e278964 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -2100,13 +2100,57 @@ def dst_offset(self) -> Series: def replace( self, *, - year: int | IntoExpr | None = None, - month: int | IntoExpr | None = None, - day: int | IntoExpr | None = None, - hour: int | IntoExpr | None = None, - minute: int | IntoExpr | None = None, - second: int | IntoExpr | None = None, - microsecond: int | IntoExpr | None = None, - ambiguous: Ambiguous | Expr = "raise", + year: int | Series | None = None, + month: int | Series | None = None, + day: int | Series | None = None, + hour: int | Series | None = None, + minute: int | Series | None = None, + second: int | Series | None = None, + microsecond: int | Series | None = None, + ambiguous: Ambiguous | Series = "raise", ) -> Series: - """Replace time component for a Series of type Date or Datetime.""" + """ + Replace time unit. + + Parameters + ---------- + year + Literal or Series. + month + Literal or Series, ranging from 1-12. + day + Literal or Series, ranging from 1-31. + hour + Literal or Series, ranging from 0-23. + minute + Literal or Series, ranging from 0-59. + second + Literal or Series, ranging from 0-59. + microsecond + Literal or Series, ranging from 0-999999. + ambiguous + Determine how to deal with ambiguous datetimes: + + - `'raise'` (default): raise + - `'earliest'`: use the earliest datetime + - `'latest'`: use the latest datetime + - `'null'`: set to null + + Returns + ------- + Series + Series of data type :class:`Date` or :class:`Datetime` with the specified + time units replaced. + + Examples + -------- + >>> from datetime import datetime + >>> s = pl.Series("datetime", [datetime(2024, 1, 1), datetime(2024, 1, 2)]) + >>> s.dt.replace(year=2022, month=1, day=4, hour=12, minute=15) + shape: (2,) + Series: 'datetime' [datetime[μs]] + [ + 2022-01-04 12:15:00 + 2022-01-04 12:15:00 + ] + """ From 4a1539c1e3b78103084c4d9d78e867cbbb6ce208 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Wed, 27 Nov 2024 09:16:59 -0500 Subject: [PATCH 06/16] Remove unsafe --- crates/polars-time/src/replace.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 14ae6059857f..0b5f8ee40286 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -54,7 +54,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { // 3. Value was supplied and is Series --> Update all elements with the non-null values let year = if year.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { year.get_unchecked(0) } { + if let Some(value) = year.get(0) { &Int32Chunked::full("".into(), value, n) } else { &self.year() @@ -64,7 +64,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { }; let month = if month.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { month.get_unchecked(0) } { + if let Some(value) = month.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.month() @@ -74,7 +74,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { }; let day = if day.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { day.get_unchecked(0) } { + if let Some(value) = day.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.day() @@ -84,7 +84,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { }; let hour = if hour.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { hour.get_unchecked(0) } { + if let Some(value) = hour.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.hour() @@ -94,7 +94,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { }; let minute = if minute.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { minute.get_unchecked(0) } { + if let Some(value) = minute.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.minute() @@ -104,7 +104,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { }; let second = if second.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { second.get_unchecked(0) } { + if let Some(value) = second.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.second() @@ -114,7 +114,7 @@ impl PolarsReplaceDatetime for DatetimeChunked { }; let microsecond = if microsecond.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { microsecond.get_unchecked(0) } { + if let Some(value) = microsecond.get(0) { &Int32Chunked::full("".into(), value, n) } else { &(self.nanosecond() / 1000) @@ -152,7 +152,7 @@ impl PolarsReplaceDate for DateChunked { let year = if year.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { year.get_unchecked(0) } { + if let Some(value) = year.get(0) { &Int32Chunked::full("".into(), value, n) } else { &self.year() @@ -162,7 +162,7 @@ impl PolarsReplaceDate for DateChunked { }; let month = if month.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { month.get_unchecked(0) } { + if let Some(value) = month.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.month() @@ -172,7 +172,7 @@ impl PolarsReplaceDate for DateChunked { }; let day = if day.len() == 1 { // SAFETY: array has one value. - if let Some(value) = unsafe { day.get_unchecked(0) } { + if let Some(value) = day.get(0) { &Int8Chunked::full("".into(), value, n) } else { &self.day() From 88a4ca250f6da1c3f68919795b397cdfd3b83c49 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Wed, 27 Nov 2024 10:05:42 -0500 Subject: [PATCH 07/16] Make function --- .../src/dsl/function_expr/datetime.rs | 7 +- crates/polars-time/src/lib.rs | 3 +- crates/polars-time/src/replace.rs | 307 ++++++++---------- 3 files changed, 150 insertions(+), 167 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index a6a616b83522..7fddf4c3b673 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -8,6 +8,8 @@ use polars_time::base_utc_offset as base_utc_offset_fn; use polars_time::dst_offset as dst_offset_fn; #[cfg(feature = "offset_by")] use polars_time::impl_offset_by; +#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))] +use polars_time::replace::{replace_date, replace_datetime}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -581,7 +583,8 @@ pub(super) fn replace(s: &[Column]) -> PolarsResult { let s_ambiguous = &s[8].strict_cast(&DataType::String)?; let ambiguous = s_ambiguous.str()?; - let out = time_series.datetime().unwrap().replace( + let out = replace_datetime( + time_series.datetime().unwrap(), year, month, day, @@ -594,7 +597,7 @@ pub(super) fn replace(s: &[Column]) -> PolarsResult { out.map(|s| s.into_column()) }, DataType::Date => { - let out = time_series.date().unwrap().replace(year, month, day); + let out = replace_date(time_series.date().unwrap(), year, month, day); out.map(|s| s.into_column()) }, dt => polars_bail!(opq = round, got = dt, expected = "date/datetime"), diff --git a/crates/polars-time/src/lib.rs b/crates/polars-time/src/lib.rs index 540f3a0cb253..7b35591edd5f 100644 --- a/crates/polars-time/src/lib.rs +++ b/crates/polars-time/src/lib.rs @@ -13,7 +13,8 @@ mod month_start; #[cfg(feature = "offset_by")] mod offset_by; pub mod prelude; -mod replace; +#[cfg(any(feature = "dtype-date", feature = "dtype-datetime"))] +pub mod replace; mod round; pub mod series; mod truncate; diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 0b5f8ee40286..9ac7342c830e 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -2,186 +2,165 @@ use polars_core::prelude::*; use crate::prelude::*; +/// Replace specific time component of a `ChunkedArray` with a specified value. +/// # Example +/// ```rust +/// use polars_arrow::array::BooleanArray; +/// use polars_arrow::compute::boolean::or_scalar; +/// use polars_arrow::scalar::BooleanScalar; +/// # fn main() { +/// let array = BooleanArray::from_slice(&[false, false, true, true]); +/// let scalar = BooleanScalar::new(Some(true)); +/// let result = or_scalar(&array, &scalar); +/// assert_eq!(result, BooleanArray::from_slice(&[true, true, true, true])); +/// # } +/// ``` #[cfg(feature = "dtype-datetime")] -pub trait PolarsReplaceDatetime: DatetimeMethods { - #[allow(clippy::too_many_arguments)] - fn replace( - &self, - year: &Int32Chunked, - month: &Int8Chunked, - day: &Int8Chunked, - hour: &Int8Chunked, - minute: &Int8Chunked, - second: &Int8Chunked, - microsecond: &Int32Chunked, - ambiguous: &StringChunked, - ) -> PolarsResult - where - Self: Sized; -} - -#[cfg(feature = "dtype-date")] -pub trait PolarsReplaceDate: DateMethods { - fn replace( - &self, - year: &Int32Chunked, - month: &Int8Chunked, - day: &Int8Chunked, - ) -> PolarsResult - where - Self: Sized; -} - -#[cfg(feature = "dtype-datetime")] -impl PolarsReplaceDatetime for DatetimeChunked { - #[allow(clippy::too_many_arguments)] - fn replace( - &self, - year: &Int32Chunked, - month: &Int8Chunked, - day: &Int8Chunked, - hour: &Int8Chunked, - minute: &Int8Chunked, - second: &Int8Chunked, - microsecond: &Int32Chunked, - ambiguous: &StringChunked, - ) -> PolarsResult { - let n = self.len(); +#[allow(clippy::too_many_arguments)] +pub fn replace_datetime( + ca: &DatetimeChunked, + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, + hour: &Int8Chunked, + minute: &Int8Chunked, + second: &Int8Chunked, + microsecond: &Int32Chunked, + ambiguous: &StringChunked, +) -> PolarsResult { + let n = ca.len(); - // For each argument, we must check if: - // 1. No value was supplied (None) --> Use existing year from Series - // 2. Value was supplied and is a Scalar --> Create full Series of value - // 3. Value was supplied and is Series --> Update all elements with the non-null values - let year = if year.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = year.get(0) { - &Int32Chunked::full("".into(), value, n) - } else { - &self.year() - } + // For each argument, we must check if: + // 1. No value was supplied (None) --> Use existing year from Series + // 2. Value was supplied and is a Scalar --> Create full Series of value + // 3. Value was supplied and is Series --> Update all elements with the non-null values + let year = if year.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = year.get(0) { + &Int32Chunked::full("".into(), value, n) } else { - &year.zip_with(&year.is_not_null(), &self.year())? - }; - let month = if month.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = month.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.month() - } + &ca.year() + } + } else { + &year.zip_with(&year.is_not_null(), &ca.year())? + }; + let month = if month.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = month.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &month.zip_with(&month.is_not_null(), &self.month())? - }; - let day = if day.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = day.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.day() - } + &ca.month() + } + } else { + &month.zip_with(&month.is_not_null(), &ca.month())? + }; + let day = if day.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = day.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &day.zip_with(&day.is_not_null(), &self.day())? - }; - let hour = if hour.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = hour.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.hour() - } + &ca.day() + } + } else { + &day.zip_with(&day.is_not_null(), &ca.day())? + }; + let hour = if hour.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = hour.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &hour.zip_with(&hour.is_not_null(), &self.hour())? - }; - let minute = if minute.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = minute.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.minute() - } + &ca.hour() + } + } else { + &hour.zip_with(&hour.is_not_null(), &ca.hour())? + }; + let minute = if minute.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = minute.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &minute.zip_with(&minute.is_not_null(), &self.minute())? - }; - let second = if second.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = second.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.second() - } + &ca.minute() + } + } else { + &minute.zip_with(&minute.is_not_null(), &ca.minute())? + }; + let second = if second.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = second.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &second.zip_with(&second.is_not_null(), &self.second())? - }; - let microsecond = if microsecond.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = microsecond.get(0) { - &Int32Chunked::full("".into(), value, n) - } else { - &(self.nanosecond() / 1000) - } + &ca.second() + } + } else { + &second.zip_with(&second.is_not_null(), &ca.second())? + }; + let microsecond = if microsecond.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = microsecond.get(0) { + &Int32Chunked::full("".into(), value, n) } else { - µsecond.zip_with(µsecond.is_not_null(), &(self.nanosecond() / 1000))? - }; + &(ca.nanosecond() / 1000) + } + } else { + µsecond.zip_with(µsecond.is_not_null(), &(ca.nanosecond() / 1000))? + }; - let out = DatetimeChunked::new_from_parts( - year, - month, - day, - hour, - minute, - second, - microsecond, - ambiguous, - &self.time_unit(), - self.time_zone().as_deref(), - self.name().clone(), - )?; - Ok(out) - } + let out = DatetimeChunked::new_from_parts( + year, + month, + day, + hour, + minute, + second, + microsecond, + ambiguous, + &ca.time_unit(), + ca.time_zone().as_deref(), + ca.name().clone(), + )?; + Ok(out) } #[cfg(feature = "dtype-date")] -impl PolarsReplaceDate for DateChunked { - fn replace( - &self, - year: &Int32Chunked, - month: &Int8Chunked, - day: &Int8Chunked, - ) -> PolarsResult { - let n = self.len(); +pub fn replace_date( + ca: &DateChunked, + year: &Int32Chunked, + month: &Int8Chunked, + day: &Int8Chunked, +) -> PolarsResult { + let n = ca.len(); - let year = if year.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = year.get(0) { - &Int32Chunked::full("".into(), value, n) - } else { - &self.year() - } + let year = if year.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = year.get(0) { + &Int32Chunked::full("".into(), value, n) } else { - &year.zip_with(&year.is_not_null(), &self.year())? - }; - let month = if month.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = month.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.month() - } + &ca.year() + } + } else { + &year.zip_with(&year.is_not_null(), &ca.year())? + }; + let month = if month.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = month.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &month.zip_with(&month.is_not_null(), &self.month())? - }; - let day = if day.len() == 1 { - // SAFETY: array has one value. - if let Some(value) = day.get(0) { - &Int8Chunked::full("".into(), value, n) - } else { - &self.day() - } + &ca.month() + } + } else { + &month.zip_with(&month.is_not_null(), &ca.month())? + }; + let day = if day.len() == 1 { + // SAFETY: array has one value. + if let Some(value) = day.get(0) { + &Int8Chunked::full("".into(), value, n) } else { - &day.zip_with(&day.is_not_null(), &self.day())? - }; + &ca.day() + } + } else { + &day.zip_with(&day.is_not_null(), &ca.day())? + }; - let out = DateChunked::new_from_parts(year, month, day, self.name().clone())?; - Ok(out) - } + let out = DateChunked::new_from_parts(year, month, day, ca.name().clone())?; + Ok(out) } From 2188102e282fe7cb5b55436587d30a41b625d18f Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Wed, 27 Nov 2024 10:09:58 -0500 Subject: [PATCH 08/16] Fix docstring --- crates/polars-time/src/replace.rs | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 9ac7342c830e..f093c4269ee0 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -2,19 +2,7 @@ use polars_core::prelude::*; use crate::prelude::*; -/// Replace specific time component of a `ChunkedArray` with a specified value. -/// # Example -/// ```rust -/// use polars_arrow::array::BooleanArray; -/// use polars_arrow::compute::boolean::or_scalar; -/// use polars_arrow::scalar::BooleanScalar; -/// # fn main() { -/// let array = BooleanArray::from_slice(&[false, false, true, true]); -/// let scalar = BooleanScalar::new(Some(true)); -/// let result = or_scalar(&array, &scalar); -/// assert_eq!(result, BooleanArray::from_slice(&[true, true, true, true])); -/// # } -/// ``` +/// Replace specific time component of a `DatetimeChunked` with a specified value. #[cfg(feature = "dtype-datetime")] #[allow(clippy::too_many_arguments)] pub fn replace_datetime( @@ -121,6 +109,7 @@ pub fn replace_datetime( Ok(out) } +/// Replace specific time component of a `DateChunked` with a specified value. #[cfg(feature = "dtype-date")] pub fn replace_date( ca: &DateChunked, From e80b06d76b70caed9dc4afb43bc1569bc54c2a5c Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Fri, 29 Nov 2024 11:24:04 -0500 Subject: [PATCH 09/16] Remove safety comment --- crates/polars-time/src/replace.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index f093c4269ee0..0e25f393acc9 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -23,7 +23,6 @@ pub fn replace_datetime( // 2. Value was supplied and is a Scalar --> Create full Series of value // 3. Value was supplied and is Series --> Update all elements with the non-null values let year = if year.len() == 1 { - // SAFETY: array has one value. if let Some(value) = year.get(0) { &Int32Chunked::full("".into(), value, n) } else { @@ -33,7 +32,6 @@ pub fn replace_datetime( &year.zip_with(&year.is_not_null(), &ca.year())? }; let month = if month.len() == 1 { - // SAFETY: array has one value. if let Some(value) = month.get(0) { &Int8Chunked::full("".into(), value, n) } else { @@ -43,7 +41,6 @@ pub fn replace_datetime( &month.zip_with(&month.is_not_null(), &ca.month())? }; let day = if day.len() == 1 { - // SAFETY: array has one value. if let Some(value) = day.get(0) { &Int8Chunked::full("".into(), value, n) } else { @@ -53,7 +50,6 @@ pub fn replace_datetime( &day.zip_with(&day.is_not_null(), &ca.day())? }; let hour = if hour.len() == 1 { - // SAFETY: array has one value. if let Some(value) = hour.get(0) { &Int8Chunked::full("".into(), value, n) } else { @@ -63,7 +59,6 @@ pub fn replace_datetime( &hour.zip_with(&hour.is_not_null(), &ca.hour())? }; let minute = if minute.len() == 1 { - // SAFETY: array has one value. if let Some(value) = minute.get(0) { &Int8Chunked::full("".into(), value, n) } else { @@ -73,7 +68,6 @@ pub fn replace_datetime( &minute.zip_with(&minute.is_not_null(), &ca.minute())? }; let second = if second.len() == 1 { - // SAFETY: array has one value. if let Some(value) = second.get(0) { &Int8Chunked::full("".into(), value, n) } else { @@ -83,7 +77,6 @@ pub fn replace_datetime( &second.zip_with(&second.is_not_null(), &ca.second())? }; let microsecond = if microsecond.len() == 1 { - // SAFETY: array has one value. if let Some(value) = microsecond.get(0) { &Int32Chunked::full("".into(), value, n) } else { @@ -120,7 +113,6 @@ pub fn replace_date( let n = ca.len(); let year = if year.len() == 1 { - // SAFETY: array has one value. if let Some(value) = year.get(0) { &Int32Chunked::full("".into(), value, n) } else { @@ -130,7 +122,6 @@ pub fn replace_date( &year.zip_with(&year.is_not_null(), &ca.year())? }; let month = if month.len() == 1 { - // SAFETY: array has one value. if let Some(value) = month.get(0) { &Int8Chunked::full("".into(), value, n) } else { @@ -140,7 +131,6 @@ pub fn replace_date( &month.zip_with(&month.is_not_null(), &ca.month())? }; let day = if day.len() == 1 { - // SAFETY: array has one value. if let Some(value) = day.get(0) { &Int8Chunked::full("".into(), value, n) } else { From f5c8e5d75fe1ff58811a38443a1bc22a792cd0de Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 5 Dec 2024 08:55:17 -0500 Subject: [PATCH 10/16] Reorder args --- crates/polars-plan/src/dsl/dt.rs | 8 ++++---- crates/polars-python/src/expr/datetime.rs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/polars-plan/src/dsl/dt.rs b/crates/polars-plan/src/dsl/dt.rs index de77370ab4b9..9b5e4552f1ff 100644 --- a/crates/polars-plan/src/dsl/dt.rs +++ b/crates/polars-plan/src/dsl/dt.rs @@ -336,9 +336,9 @@ impl DateLikeNameSpace { #[allow(clippy::too_many_arguments)] pub fn replace( self, - day: Expr, - month: Expr, year: Expr, + month: Expr, + day: Expr, hour: Expr, minute: Expr, second: Expr, @@ -348,9 +348,9 @@ impl DateLikeNameSpace { self.0.map_many_private( FunctionExpr::TemporalExpr(TemporalFunction::Replace), &[ - day, - month, year, + month, + day, hour, minute, second, diff --git a/crates/polars-python/src/expr/datetime.rs b/crates/polars-python/src/expr/datetime.rs index c7d9020b8b69..ce475fc70337 100644 --- a/crates/polars-python/src/expr/datetime.rs +++ b/crates/polars-python/src/expr/datetime.rs @@ -101,9 +101,9 @@ impl PyExpr { fn dt_replace( &self, - day: Self, - month: Self, year: Self, + month: Self, + day: Self, hour: Self, minute: Self, second: Self, @@ -114,9 +114,9 @@ impl PyExpr { .clone() .dt() .replace( - day.inner, - month.inner, year.inner, + month.inner, + day.inner, hour.inner, minute.inner, second.inner, From 681ee8e44558a493a307c7c4289ebc7b3c09dc7a Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 5 Dec 2024 09:30:25 -0500 Subject: [PATCH 11/16] Improve examples --- py-polars/polars/expr/datetime.py | 44 +++++++++++++++-------------- py-polars/polars/series/datetime.py | 12 ++++---- 2 files changed, 29 insertions(+), 27 deletions(-) diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 00c929a2fc78..4c6ac86c7951 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -437,31 +437,33 @@ def replace( Examples -------- - >>> from datetime import datetime + >>> from datetime import date >>> df = pl.DataFrame( ... { - ... "date": [datetime(2024, 1, 1), datetime(2024, 1, 2)], - ... "year": [2022, 2016], - ... "month": [1, 2], - ... "day": [4, 5], - ... "hour": [12, 13], - ... "minute": [15, 30], + ... "date": [date(2024, 4, 1), date(2025, 3, 16)], + ... "new_day": [10, 15], ... } ... ) - >>> df.with_columns( - ... pl.col("date").dt.replace( - ... year="year", month="month", day="day", hour="hour", minute="minute" - ... ) - ... ) - shape: (2, 6) - ┌─────────────────────┬──────┬───────┬─────┬──────┬────────┐ - │ date ┆ year ┆ month ┆ day ┆ hour ┆ minute │ - │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │ - │ datetime[μs] ┆ i64 ┆ i64 ┆ i64 ┆ i64 ┆ i64 │ - ╞═════════════════════╪══════╪═══════╪═════╪══════╪════════╡ - │ 2022-01-04 12:15:00 ┆ 2022 ┆ 1 ┆ 4 ┆ 12 ┆ 15 │ - │ 2016-02-05 13:30:00 ┆ 2016 ┆ 2 ┆ 5 ┆ 13 ┆ 30 │ - └─────────────────────┴──────┴───────┴─────┴──────┴────────┘ + >>> df.with_columns(pl.col("date").dt.replace(day="new_day").alias("replaced")) + shape: (2, 3) + ┌────────────┬─────────┬────────────┐ + │ date ┆ new_day ┆ replaced │ + │ --- ┆ --- ┆ --- │ + │ date ┆ i64 ┆ date │ + ╞════════════╪═════════╪════════════╡ + │ 2024-04-01 ┆ 10 ┆ 2024-04-10 │ + │ 2025-03-16 ┆ 15 ┆ 2025-03-15 │ + └────────────┴─────────┴────────────┘ + >>> df.with_columns(pl.col("date").dt.replace(year=1800).alias("replaced")) + shape: (2, 3) + ┌────────────┬─────────┬────────────┐ + │ date ┆ new_day ┆ replaced │ + │ --- ┆ --- ┆ --- │ + │ date ┆ i64 ┆ date │ + ╞════════════╪═════════╪════════════╡ + │ 2024-04-01 ┆ 10 ┆ 1800-04-01 │ + │ 2025-03-16 ┆ 15 ┆ 1800-03-16 │ + └────────────┴─────────┴────────────┘ """ day, month, year, hour, minute, second, microsecond = ( parse_into_list_of_expressions( diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 12372e278964..1ea8af605485 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -2144,13 +2144,13 @@ def replace( Examples -------- - >>> from datetime import datetime - >>> s = pl.Series("datetime", [datetime(2024, 1, 1), datetime(2024, 1, 2)]) - >>> s.dt.replace(year=2022, month=1, day=4, hour=12, minute=15) + >>> from datetime import date + >>> s = pl.Series("date", [date(2013, 1, 1), date(2024, 1, 2)]) + >>> s.dt.replace(year=1800) shape: (2,) - Series: 'datetime' [datetime[μs]] + Series: 'date' [date] [ - 2022-01-04 12:15:00 - 2022-01-04 12:15:00 + 1800-01-01 + 1800-01-02 ] """ From 3e3959542f5b7d7e622844940dfc34a58e0d190e Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Thu, 5 Dec 2024 22:32:34 -0500 Subject: [PATCH 12/16] Ensure propagation of nulls --- crates/polars-time/src/replace.rs | 18 +++- .../namespaces/temporal/test_datetime.py | 98 ++++++++++++++++--- 2 files changed, 103 insertions(+), 13 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 0e25f393acc9..89da471f1284 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -86,7 +86,7 @@ pub fn replace_datetime( µsecond.zip_with(µsecond.is_not_null(), &(ca.nanosecond() / 1000))? }; - let out = DatetimeChunked::new_from_parts( + let new_dt = DatetimeChunked::new_from_parts( year, month, day, @@ -99,6 +99,14 @@ pub fn replace_datetime( ca.time_zone().as_deref(), ca.name().clone(), )?; + + // Ensure nulls are propagated. + let out = new_dt.physical(); + let mask = &ca.is_not_null(); + let null = &Int64Chunked::full_null(PlSmallStr::EMPTY, n); + let out = out + .zip_with(mask, null)? + .into_datetime(ca.time_unit(), ca.time_zone().clone()); Ok(out) } @@ -139,7 +147,13 @@ pub fn replace_date( } else { &day.zip_with(&day.is_not_null(), &ca.day())? }; + let new_dt = DateChunked::new_from_parts(year, month, day, ca.name().clone())?; + + // Ensure nulls are propagated. + let out = new_dt.physical(); + let mask = &ca.is_not_null(); + let null = &Int32Chunked::full_null(PlSmallStr::EMPTY, n); + let out = out.zip_with(mask, null)?.into_date(); - let out = DateChunked::new_from_parts(year, month, day, ca.name().clone())?; Ok(out) } diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index f3e6169c8b25..12994ea40fcf 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -650,7 +650,7 @@ def test_epoch_matches_timestamp() -> None: ) -def test_replace_expr() -> None: +def test_replace_expr_datetime() -> None: df = pl.DataFrame( { "dates": [ @@ -662,15 +662,14 @@ def test_replace_expr() -> None: datetime(2088, 8, 8, 8, 8, 8, 8), datetime(2088, 8, 8, 8, 8, 8, 8), None, - None, ], - "year": [None, 2, 3, 4, 5, 6, 7, 8, 1], - "month": [1, None, 3, 4, 5, 6, 7, 8, 1], - "day": [1, 2, None, 4, 5, 6, 7, 8, 1], - "hour": [1, 2, 3, None, 5, 6, 7, 8, 1], - "minute": [1, 2, 3, 4, None, 6, 7, 8, 1], - "second": [1, 2, 3, 4, 5, None, 7, 8, 1], - "microsecond": [1, 2, 3, 4, 5, 6, None, 8, None], + "year": [None, 2, 3, 4, 5, 6, 7, 8], + "month": [1, None, 3, 4, 5, 6, 7, 8], + "day": [1, 2, None, 4, 5, 6, 7, 8], + "hour": [1, 2, 3, None, 5, 6, 7, 8], + "minute": [1, 2, 3, 4, None, 6, 7, 8], + "second": [1, 2, 3, 4, 5, None, 7, 8], + "microsecond": [1, 2, 3, 4, 5, 6, None, 8], } ) @@ -696,7 +695,6 @@ def test_replace_expr() -> None: datetime(5, 5, 5, 5, 8, 5, 5), datetime(6, 6, 6, 6, 6, 8, 6), datetime(7, 7, 7, 7, 7, 7, 8), - datetime(8, 8, 8, 8, 8, 8, 8), None, ] } @@ -705,13 +703,35 @@ def test_replace_expr() -> None: assert_frame_equal(result, expected) -def test_replace_int() -> None: +def test_replace_expr_date() -> None: + df = pl.DataFrame( + { + "dates": [date(2088, 8, 8), date(2088, 8, 8), date(2088, 8, 8), None], + "year": [None, 2, 3, 4], + "month": [1, None, 3, 4], + "day": [1, 2, None, 4], + } + ) + + result = df.select( + pl.col("dates").dt.replace(year="year", month="month", day="day") + ) + + expected = pl.DataFrame( + {"dates": [date(2088, 1, 1), date(2, 8, 2), date(3, 3, 8), None]} + ) + + assert_frame_equal(result, expected) + + +def test_replace_int_datetime() -> None: df = pl.DataFrame( { "a": [ datetime(1, 1, 1, 1, 1, 1, 1), datetime(2, 2, 2, 2, 2, 2, 2), datetime(3, 3, 3, 3, 3, 3, 3), + None, ] } ) @@ -731,41 +751,97 @@ def test_replace_int() -> None: datetime(1, 1, 1, 1, 1, 1, 1), datetime(2, 2, 2, 2, 2, 2, 2), datetime(3, 3, 3, 3, 3, 3, 3), + None, ], "year": [ datetime(9, 1, 1, 1, 1, 1, 1), datetime(9, 2, 2, 2, 2, 2, 2), datetime(9, 3, 3, 3, 3, 3, 3), + None, ], "month": [ datetime(1, 9, 1, 1, 1, 1, 1), datetime(2, 9, 2, 2, 2, 2, 2), datetime(3, 9, 3, 3, 3, 3, 3), + None, ], "day": [ datetime(1, 1, 9, 1, 1, 1, 1), datetime(2, 2, 9, 2, 2, 2, 2), datetime(3, 3, 9, 3, 3, 3, 3), + None, ], "hour": [ datetime(1, 1, 1, 9, 1, 1, 1), datetime(2, 2, 2, 9, 2, 2, 2), datetime(3, 3, 3, 9, 3, 3, 3), + None, ], "minute": [ datetime(1, 1, 1, 1, 9, 1, 1), datetime(2, 2, 2, 2, 9, 2, 2), datetime(3, 3, 3, 3, 9, 3, 3), + None, ], "second": [ datetime(1, 1, 1, 1, 1, 9, 1), datetime(2, 2, 2, 2, 2, 9, 2), datetime(3, 3, 3, 3, 3, 9, 3), + None, ], "microsecond": [ datetime(1, 1, 1, 1, 1, 1, 9), datetime(2, 2, 2, 2, 2, 2, 9), datetime(3, 3, 3, 3, 3, 3, 9), + None, + ], + } + ) + assert_frame_equal(result, expected) + + +def test_replace_int_date() -> None: + df = pl.DataFrame( + { + "a": [ + date(1, 1, 1), + date(2, 2, 2), + date(3, 3, 3), + None, + ] + } + ) + result = df.select( + pl.col("a").dt.replace().alias("no_change"), + pl.col("a").dt.replace(year=9).alias("year"), + pl.col("a").dt.replace(month=9).alias("month"), + pl.col("a").dt.replace(day=9).alias("day"), + ) + expected = pl.DataFrame( + { + "no_change": [ + date(1, 1, 1), + date(2, 2, 2), + date(3, 3, 3), + None, + ], + "year": [ + date(9, 1, 1), + date(9, 2, 2), + date(9, 3, 3), + None, + ], + "month": [ + date(1, 9, 1), + date(2, 9, 2), + date(3, 9, 3), + None, + ], + "day": [ + date(1, 1, 9), + date(2, 2, 9), + date(3, 3, 9), + None, ], } ) From 6ed521c6b2de7ac9cc0aa97144c8769e43727072 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Fri, 6 Dec 2024 09:35:14 -0500 Subject: [PATCH 13/16] Minor perf: only propagate if nulls present --- crates/polars-time/src/replace.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 89da471f1284..5200acde1cba 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -86,7 +86,7 @@ pub fn replace_datetime( µsecond.zip_with(µsecond.is_not_null(), &(ca.nanosecond() / 1000))? }; - let new_dt = DatetimeChunked::new_from_parts( + let mut out = DatetimeChunked::new_from_parts( year, month, day, @@ -101,12 +101,15 @@ pub fn replace_datetime( )?; // Ensure nulls are propagated. - let out = new_dt.physical(); - let mask = &ca.is_not_null(); - let null = &Int64Chunked::full_null(PlSmallStr::EMPTY, n); - let out = out - .zip_with(mask, null)? - .into_datetime(ca.time_unit(), ca.time_zone().clone()); + if ca.has_nulls() { + let mask = &ca.is_not_null(); + let null = &Int64Chunked::full_null(PlSmallStr::EMPTY, n); + out = out + .physical() + .zip_with(mask, null)? + .into_datetime(ca.time_unit(), ca.time_zone().clone()); + } + Ok(out) } @@ -147,13 +150,14 @@ pub fn replace_date( } else { &day.zip_with(&day.is_not_null(), &ca.day())? }; - let new_dt = DateChunked::new_from_parts(year, month, day, ca.name().clone())?; + let mut out = DateChunked::new_from_parts(year, month, day, ca.name().clone())?; // Ensure nulls are propagated. - let out = new_dt.physical(); - let mask = &ca.is_not_null(); - let null = &Int32Chunked::full_null(PlSmallStr::EMPTY, n); - let out = out.zip_with(mask, null)?.into_date(); + if ca.has_nulls() { + let mask = &ca.is_not_null(); + let null = &Int32Chunked::full_null(PlSmallStr::EMPTY, n); + out = out.physical().zip_with(mask, null)?.into_date(); + } Ok(out) } From b651f5ad9581322dd012c8d6825cda77b9386c10 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sat, 7 Dec 2024 15:10:18 -0500 Subject: [PATCH 14/16] Use faster validity zip --- crates/polars-time/src/replace.rs | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 5200acde1cba..e9c49f757af3 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -102,12 +102,7 @@ pub fn replace_datetime( // Ensure nulls are propagated. if ca.has_nulls() { - let mask = &ca.is_not_null(); - let null = &Int64Chunked::full_null(PlSmallStr::EMPTY, n); - out = out - .physical() - .zip_with(mask, null)? - .into_datetime(ca.time_unit(), ca.time_zone().clone()); + out.merge_validities(ca.chunks()); } Ok(out) @@ -154,9 +149,7 @@ pub fn replace_date( // Ensure nulls are propagated. if ca.has_nulls() { - let mask = &ca.is_not_null(); - let null = &Int32Chunked::full_null(PlSmallStr::EMPTY, n); - out = out.physical().zip_with(mask, null)?.into_date(); + out.merge_validities(ca.chunks()); } Ok(out) From 7bc79a00ec497c7fcd29405c4426d24b90754e34 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sun, 8 Dec 2024 20:00:49 -0500 Subject: [PATCH 15/16] Fix ns --- .../src/dsl/function_expr/datetime.rs | 4 +- .../src/dsl/function_expr/temporal.rs | 11 +++-- .../polars-time/src/chunkedarray/datetime.rs | 12 ++--- crates/polars-time/src/replace.rs | 27 ++++++----- .../namespaces/temporal/test_datetime.py | 48 +++++++++++++++++++ 5 files changed, 76 insertions(+), 26 deletions(-) diff --git a/crates/polars-plan/src/dsl/function_expr/datetime.rs b/crates/polars-plan/src/dsl/function_expr/datetime.rs index 7fddf4c3b673..30db9d0ffa04 100644 --- a/crates/polars-plan/src/dsl/function_expr/datetime.rs +++ b/crates/polars-plan/src/dsl/function_expr/datetime.rs @@ -579,7 +579,7 @@ pub(super) fn replace(s: &[Column]) -> PolarsResult { let hour = s_hour.i8()?; let minute = s_minute.i8()?; let second = s_second.i8()?; - let microsecond = s_microsecond.i32()?; + let nanosecond = &(s_microsecond.i32()? * 1_000); let s_ambiguous = &s[8].strict_cast(&DataType::String)?; let ambiguous = s_ambiguous.str()?; @@ -591,7 +591,7 @@ pub(super) fn replace(s: &[Column]) -> PolarsResult { hour, minute, second, - microsecond, + nanosecond, ambiguous, ); out.map(|s| s.into_column()) diff --git a/crates/polars-plan/src/dsl/function_expr/temporal.rs b/crates/polars-plan/src/dsl/function_expr/temporal.rs index d5b53ab4f0b9..e0be041e9a40 100644 --- a/crates/polars-plan/src/dsl/function_expr/temporal.rs +++ b/crates/polars-plan/src/dsl/function_expr/temporal.rs @@ -124,11 +124,12 @@ pub(super) fn datetime( } let second = second.i8()?; - let mut microsecond = microsecond.cast(&DataType::Int32)?; - if microsecond.len() < max_len { - microsecond = microsecond.new_from_index(0, max_len); + let mut nanosecond = microsecond.cast(&DataType::Int32)? * 1_000; + if nanosecond.len() < max_len { + nanosecond = nanosecond.new_from_index(0, max_len); } - let microsecond = microsecond.i32()?; + let nanosecond = nanosecond.i32()?; + let mut _ambiguous = ambiguous.cast(&DataType::String)?; if _ambiguous.len() < max_len { _ambiguous = _ambiguous.new_from_index(0, max_len); @@ -142,7 +143,7 @@ pub(super) fn datetime( hour, minute, second, - microsecond, + nanosecond, ambiguous, time_unit, time_zone, diff --git a/crates/polars-time/src/chunkedarray/datetime.rs b/crates/polars-time/src/chunkedarray/datetime.rs index 64b8d931a2e3..ca4691cc6280 100644 --- a/crates/polars-time/src/chunkedarray/datetime.rs +++ b/crates/polars-time/src/chunkedarray/datetime.rs @@ -161,7 +161,7 @@ pub trait DatetimeMethods: AsDatetime { hour: &Int8Chunked, minute: &Int8Chunked, second: &Int8Chunked, - microsecond: &Int32Chunked, + nanosecond: &Int32Chunked, ambiguous: &StringChunked, time_unit: &TimeUnit, time_zone: Option<&str>, @@ -174,14 +174,14 @@ pub trait DatetimeMethods: AsDatetime { .zip(hour) .zip(minute) .zip(second) - .zip(microsecond) - .map(|((((((y, m), d), h), mnt), s), us)| { - if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(us)) = - (y, m, d, h, mnt, s, us) + .zip(nanosecond) + .map(|((((((y, m), d), h), mnt), s), ns)| { + if let (Some(y), Some(m), Some(d), Some(h), Some(mnt), Some(s), Some(ns)) = + (y, m, d, h, mnt, s, ns) { NaiveDate::from_ymd_opt(y, m as u32, d as u32) .and_then(|nd| { - nd.and_hms_micro_opt(h as u32, mnt as u32, s as u32, us as u32) + nd.and_hms_nano_opt(h as u32, mnt as u32, s as u32, ns as u32) }) .map(|ndt| match time_unit { TimeUnit::Milliseconds => ndt.and_utc().timestamp_millis(), diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index e9c49f757af3..3c03c142164c 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -13,7 +13,7 @@ pub fn replace_datetime( hour: &Int8Chunked, minute: &Int8Chunked, second: &Int8Chunked, - microsecond: &Int32Chunked, + nanosecond: &Int32Chunked, ambiguous: &StringChunked, ) -> PolarsResult { let n = ca.len(); @@ -24,7 +24,7 @@ pub fn replace_datetime( // 3. Value was supplied and is Series --> Update all elements with the non-null values let year = if year.len() == 1 { if let Some(value) = year.get(0) { - &Int32Chunked::full("".into(), value, n) + &Int32Chunked::full(PlSmallStr::EMPTY, value, n) } else { &ca.year() } @@ -33,7 +33,7 @@ pub fn replace_datetime( }; let month = if month.len() == 1 { if let Some(value) = month.get(0) { - &Int8Chunked::full("".into(), value, n) + &Int8Chunked::full(PlSmallStr::EMPTY, value, n) } else { &ca.month() } @@ -42,7 +42,7 @@ pub fn replace_datetime( }; let day = if day.len() == 1 { if let Some(value) = day.get(0) { - &Int8Chunked::full("".into(), value, n) + &Int8Chunked::full(PlSmallStr::EMPTY, value, n) } else { &ca.day() } @@ -51,7 +51,7 @@ pub fn replace_datetime( }; let hour = if hour.len() == 1 { if let Some(value) = hour.get(0) { - &Int8Chunked::full("".into(), value, n) + &Int8Chunked::full(PlSmallStr::EMPTY, value, n) } else { &ca.hour() } @@ -60,7 +60,7 @@ pub fn replace_datetime( }; let minute = if minute.len() == 1 { if let Some(value) = minute.get(0) { - &Int8Chunked::full("".into(), value, n) + &Int8Chunked::full(PlSmallStr::EMPTY, value, n) } else { &ca.minute() } @@ -69,23 +69,24 @@ pub fn replace_datetime( }; let second = if second.len() == 1 { if let Some(value) = second.get(0) { - &Int8Chunked::full("".into(), value, n) + &Int8Chunked::full(PlSmallStr::EMPTY, value, n) } else { &ca.second() } } else { &second.zip_with(&second.is_not_null(), &ca.second())? }; - let microsecond = if microsecond.len() == 1 { - if let Some(value) = microsecond.get(0) { - &Int32Chunked::full("".into(), value, n) + let nanosecond = if nanosecond.len() == 1 { + if let Some(value) = nanosecond.get(0) { + &Int32Chunked::full(PlSmallStr::EMPTY, value, n) } else { - &(ca.nanosecond() / 1000) + &ca.nanosecond() } } else { - µsecond.zip_with(µsecond.is_not_null(), &(ca.nanosecond() / 1000))? + &nanosecond.zip_with(&nanosecond.is_not_null(), &ca.nanosecond())? }; + println!("nanosecond: {:?}", nanosecond); let mut out = DatetimeChunked::new_from_parts( year, month, @@ -93,7 +94,7 @@ pub fn replace_datetime( hour, minute, second, - microsecond, + nanosecond, ambiguous, &ca.time_unit(), ca.time_zone().as_deref(), diff --git a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py index 12994ea40fcf..bac4626b5cfd 100644 --- a/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py +++ b/py-polars/tests/unit/operations/namespaces/temporal/test_datetime.py @@ -889,6 +889,54 @@ def test_replace_ambiguous() -> None: value.dt.replace(hour=1, ambiguous="raise") +def test_replace_datetime_preserve_ns() -> None: + df = pl.DataFrame( + { + "a": pl.Series(["2020-01-01T00:00:00.123456789"] * 2).cast( + pl.Datetime("ns") + ), + "year": [2021, None], + "microsecond": [50, None], + } + ) + + result = df.select( + year=pl.col("a").dt.replace(year="year"), + us=pl.col("a").dt.replace(microsecond="microsecond"), + ) + + expected = pl.DataFrame( + { + "year": pl.Series( + [ + "2021-01-01T00:00:00.123456789", + "2020-01-01T00:00:00.123456789", + ] + ).cast(pl.Datetime("ns")), + "us": pl.Series( + [ + "2020-01-01T00:00:00.000050", + "2020-01-01T00:00:00.123456789", + ] + ).cast(pl.Datetime("ns")), + } + ) + + assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("tu", ["ms", "us", "ns"]) +@pytest.mark.parametrize("tzinfo", [None, "Africa/Nairobi", "America/New_York"]) +def test_replace_preserve_tu_and_tz(tu: TimeUnit, tzinfo: str) -> None: + s = pl.Series( + [datetime(2024, 1, 1), datetime(2024, 1, 2)], + dtype=pl.Datetime(time_unit=tu, time_zone=tzinfo), + ) + result = s.dt.replace(year=2000) + assert result.dtype.time_unit == tu # type: ignore[attr-defined] + assert result.dtype.time_zone == tzinfo # type: ignore[attr-defined] + + @pytest.mark.parametrize( ("tzinfo", "time_zone"), [(None, None), (ZoneInfo("Asia/Kathmandu"), "Asia/Kathmandu")], From b5893d03abe0cb1a3e51364d4f69f24f4078b1c7 Mon Sep 17 00:00:00 2001 From: Marshall Crumiller Date: Sat, 14 Dec 2024 11:30:42 -0500 Subject: [PATCH 16/16] Remove println --- crates/polars-time/src/replace.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/crates/polars-time/src/replace.rs b/crates/polars-time/src/replace.rs index 3c03c142164c..e464d2eb35df 100644 --- a/crates/polars-time/src/replace.rs +++ b/crates/polars-time/src/replace.rs @@ -86,7 +86,6 @@ pub fn replace_datetime( &nanosecond.zip_with(&nanosecond.is_not_null(), &ca.nanosecond())? }; - println!("nanosecond: {:?}", nanosecond); let mut out = DatetimeChunked::new_from_parts( year, month,