Skip to content

Commit

Permalink
fix: Warn if asof keys not sorted (#20887)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jan 24, 2025
1 parent 884feeb commit 8b5b05a
Show file tree
Hide file tree
Showing 7 changed files with 66 additions and 8 deletions.
22 changes: 21 additions & 1 deletion crates/polars-ops/src/frame/join/asof/groups.rs
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ pub trait AsofJoinBy: IntoDf {
slice: Option<(i64, usize)>,
coalesce: bool,
allow_eq: bool,
check_sortedness: bool,
) -> PolarsResult<DataFrame> {
let (self_sliced_slot, other_sliced_slot, left_slice_s, right_slice_s); // Keeps temporaries alive.
let (self_df, other_df, left_key, right_key);
Expand Down Expand Up @@ -513,6 +514,7 @@ pub trait AsofJoinBy: IntoDf {
&right_asof,
tolerance.is_some(),
left_by.is_empty() && right_by.is_empty(),
check_sortedness,
)?;

let mut left_by = self_df.select(left_by)?;
Expand Down Expand Up @@ -579,6 +581,7 @@ pub trait AsofJoinBy: IntoDf {
strategy: AsofStrategy,
tolerance: Option<AnyValue<'static>>,
allow_eq: bool,
check_sortedness: bool,
) -> PolarsResult<DataFrame>
where
I: IntoIterator<Item = S>,
Expand All @@ -590,8 +593,18 @@ pub trait AsofJoinBy: IntoDf {
let left_key = self_df.column(left_on)?.as_materialized_series();
let right_key = other.column(right_on)?.as_materialized_series();
self_df._join_asof_by(
other, left_key, right_key, left_by, right_by, strategy, tolerance, None, None, true,
other,
left_key,
right_key,
left_by,
right_by,
strategy,
tolerance,
None,
None,
true,
allow_eq,
check_sortedness,
)
}
}
Expand Down Expand Up @@ -624,6 +637,7 @@ mod test {
AsofStrategy::Backward,
None,
true,
true,
)?;
assert_eq!(out.get_column_names(), &["a", "b", "right_vals"]);
let out = out.column("right_vals").unwrap();
Expand Down Expand Up @@ -668,6 +682,7 @@ mod test {
AsofStrategy::Backward,
None,
true,
true,
)?;
let a = out.column("bid_right").unwrap();
let a = a.f64().unwrap();
Expand All @@ -684,6 +699,7 @@ mod test {
AsofStrategy::Backward,
None,
true,
true,
)?;
let a = out.column("bid_right").unwrap();
let a = a.f64().unwrap();
Expand Down Expand Up @@ -715,6 +731,7 @@ mod test {
AsofStrategy::Forward,
None,
true,
true,
)?;
assert_eq!(out.get_column_names(), &["a", "b", "right_vals"]);
let out = out.column("right_vals").unwrap();
Expand All @@ -733,6 +750,7 @@ mod test {
AsofStrategy::Forward,
Some(AnyValue::Int32(1)),
true,
true,
)?;
assert_eq!(out.get_column_names(), &["a", "b", "right_vals"]);
let out = out.column("right_vals").unwrap();
Expand Down Expand Up @@ -802,6 +820,7 @@ mod test {
AsofStrategy::Forward,
None,
true,
true,
)?;
let a = out.column("bid_right").unwrap();
let a = a.f64().unwrap();
Expand All @@ -824,6 +843,7 @@ mod test {
AsofStrategy::Forward,
None,
true,
true,
)?;
let a = out.column("bid_right").unwrap();
let a = a.f64().unwrap();
Expand Down
31 changes: 26 additions & 5 deletions crates/polars-ops/src/frame/join/asof/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,13 +200,15 @@ pub struct AsOfOptions {
pub right_by: Option<Vec<PlSmallStr>>,
/// Allow equal matches
pub allow_eq: bool,
pub check_sortedness: bool,
}

fn check_asof_columns(
a: &Series,
b: &Series,
has_tolerance: bool,
check_sorted: bool,
sorted_err: bool,
check_sortedness: bool,
) -> PolarsResult<()> {
let dtype_a = a.dtype();
let dtype_b = b.dtype();
Expand All @@ -228,9 +230,21 @@ fn check_asof_columns(
ComputeError: "mismatching key dtypes in asof-join: `{}` and `{}`",
a.dtype(), b.dtype()
);
if check_sorted {
a.ensure_sorted_arg("asof_join")?;
b.ensure_sorted_arg("asof_join")?;
if check_sortedness {
if sorted_err {
a.ensure_sorted_arg("asof_join")?;
b.ensure_sorted_arg("asof_join")?;
} else {
let msg = |side| {
format!("{side} key of asof join is not sorted.\n\nThis can lead to invalid results. Ensure the asof key is sorted")
};
if a.ensure_sorted_arg("asof_join").is_err() {
polars_warn!(msg("left"))
}
if b.ensure_sorted_arg("asof_join").is_err() {
polars_warn!(msg("right"))
}
}
}
Ok(())
}
Expand Down Expand Up @@ -261,10 +275,17 @@ pub trait AsofJoin: IntoDf {
slice: Option<(i64, usize)>,
coalesce: bool,
allow_eq: bool,
check_sortedness: bool,
) -> PolarsResult<DataFrame> {
let self_df = self.to_df();

check_asof_columns(left_key, right_key, tolerance.is_some(), true)?;
check_asof_columns(
left_key,
right_key,
tolerance.is_some(),
true,
check_sortedness,
)?;
let left_key = left_key.to_physical_repr();
let right_key = right_key.to_physical_repr();

Expand Down
2 changes: 2 additions & 0 deletions crates/polars-ops/src/frame/join/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,7 @@ pub trait DataFrameJoinOps: IntoDf {
args.slice,
should_coalesce,
options.allow_eq,
options.check_sortedness,
),
(None, None) => left_df._join_asof(
other,
Expand All @@ -320,6 +321,7 @@ pub trait DataFrameJoinOps: IntoDf {
args.slice,
should_coalesce,
options.allow_eq,
options.check_sortedness,
),
_ => {
panic!("expected by arguments on both sides")
Expand Down
4 changes: 3 additions & 1 deletion crates/polars-python/src/lazyframe/general.rs
Original file line number Diff line number Diff line change
Expand Up @@ -990,7 +990,7 @@ impl PyLazyFrame {
}

#[cfg(feature = "asof_join")]
#[pyo3(signature = (other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str, coalesce, allow_eq))]
#[pyo3(signature = (other, left_on, right_on, left_by, right_by, allow_parallel, force_parallel, suffix, strategy, tolerance, tolerance_str, coalesce, allow_eq, check_sortedness))]
fn join_asof(
&self,
other: Self,
Expand All @@ -1006,6 +1006,7 @@ impl PyLazyFrame {
tolerance_str: Option<String>,
coalesce: bool,
allow_eq: bool,
check_sortedness: bool,
) -> PyResult<Self> {
let coalesce = if coalesce {
JoinCoalesce::CoalesceColumns
Expand All @@ -1031,6 +1032,7 @@ impl PyLazyFrame {
tolerance: tolerance.map(|t| t.0.into_static()),
tolerance_str: tolerance_str.map(|s| s.into()),
allow_eq,
check_sortedness,
}))
.suffix(suffix)
.finish()
Expand Down
2 changes: 2 additions & 0 deletions docs/source/src/rust/user-guide/transformations/joins.rs
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
AsofStrategy::Backward,
None,
true,
true,
)?;
println!("{}", result);
// --8<-- [end:asof]
Expand All @@ -267,6 +268,7 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
AsofStrategy::Backward,
Some(AnyValue::Duration(60000, TimeUnit::Milliseconds)),
true,
true,
)?;
println!("{}", result);
// --8<-- [end:asof-tolerance]
Expand Down
6 changes: 6 additions & 0 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -6981,6 +6981,7 @@ def join_asof(
force_parallel: bool = False,
coalesce: bool = True,
allow_exact_matches: bool = True,
check_sortedness: bool = True,
) -> DataFrame:
"""
Perform an asof join.
Expand Down Expand Up @@ -7073,6 +7074,10 @@ def join_asof(
(i.e. less-than-or-equal-to / greater-than-or-equal-to)
- If False, don't match the same ``on`` value
(i.e., strictly less-than / strictly greater-than).
check_sortedness
Check the sortedness of the asof keys. If the keys are not sorted Polars
will error, or in case of 'by' argument raise a warning. This might become
a hard error in the future.
Examples
--------
Expand Down Expand Up @@ -7306,6 +7311,7 @@ def join_asof(
force_parallel=force_parallel,
coalesce=coalesce,
allow_exact_matches=allow_exact_matches,
check_sortedness=check_sortedness,
)
.collect(_eager=True)
)
Expand Down
7 changes: 6 additions & 1 deletion py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4267,6 +4267,7 @@ def join_asof(
force_parallel: bool = False,
coalesce: bool = True,
allow_exact_matches: bool = True,
check_sortedness: bool = True,
) -> LazyFrame:
"""
Perform an asof join.
Expand Down Expand Up @@ -4359,7 +4360,10 @@ def join_asof(
(i.e. less-than-or-equal-to / greater-than-or-equal-to)
- If False, don't match the same ``on`` value
(i.e., strictly less-than / strictly greater-than).
check_sortedness
Check the sortedness of the asof keys. If the keys are not sorted Polars
will error, or in case of 'by' argument raise a warning. This might become
a hard error in the future.
Examples
Expand Down Expand Up @@ -4616,6 +4620,7 @@ def join_asof(
tolerance_str,
coalesce=coalesce,
allow_eq=allow_exact_matches,
check_sortedness=check_sortedness,
)
)

Expand Down

0 comments on commit 8b5b05a

Please sign in to comment.