Skip to content

Commit c20f034

Browse files
authored
feat: Add cum(_min/_max) for pl.Boolean (#19061)
1 parent be5a4b4 commit c20f034

File tree

3 files changed

+97
-4
lines changed

3 files changed

+97
-4
lines changed

crates/polars-core/src/chunked_array/ops/chunkops.rs

+21
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use arrow::bitmap::{Bitmap, MutableBitmap};
12
use arrow::legacy::kernels::concatenate::concatenate_owned_unchecked;
23
use polars_error::constants::LENGTH_LIMIT_MSG;
34

@@ -173,6 +174,26 @@ impl<T: PolarsDataType> ChunkedArray<T> {
173174
}
174175
}
175176

177+
pub fn rechunk_validity(&self) -> Option<Bitmap> {
178+
if self.chunks.len() == 1 {
179+
return self.chunks[0].validity().cloned();
180+
}
181+
182+
if !self.has_nulls() || self.is_empty() {
183+
return None;
184+
}
185+
186+
let mut bm = MutableBitmap::with_capacity(self.len());
187+
for arr in self.downcast_iter() {
188+
if let Some(v) = arr.validity() {
189+
bm.extend_from_bitmap(v);
190+
} else {
191+
bm.extend_constant(arr.len(), true);
192+
}
193+
}
194+
Some(bm.into())
195+
}
196+
176197
/// Split the array. The chunks are reallocated the underlying data slices are zero copy.
177198
///
178199
/// When offset is negative it will be counted from the end of the array.

crates/polars-ops/src/series/ops/cum_agg.rs

+64-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use std::ops::{Add, AddAssign, Mul};
22

33
use arity::unary_elementwise_values;
4+
use arrow::array::BooleanArray;
5+
use arrow::bitmap::MutableBitmap;
46
use num_traits::{Bounded, One, Zero};
57
use polars_core::prelude::*;
68
use polars_core::series::IsSorted;
@@ -90,6 +92,62 @@ where
9092
out.with_name(ca.name().clone())
9193
}
9294

95+
fn cum_max_bool(ca: &BooleanChunked, reverse: bool) -> BooleanChunked {
96+
if ca.len() == ca.null_count() {
97+
return ca.clone();
98+
}
99+
100+
let mut out;
101+
if !reverse {
102+
// TODO: efficient bitscan.
103+
let Some(first_true_idx) = ca.iter().position(|x| x == Some(true)) else {
104+
return ca.clone();
105+
};
106+
out = MutableBitmap::with_capacity(ca.len());
107+
out.extend_constant(first_true_idx, false);
108+
out.extend_constant(ca.len() - first_true_idx, true);
109+
} else {
110+
// TODO: efficient bitscan.
111+
let Some(last_true_idx) = ca.iter().rposition(|x| x == Some(true)) else {
112+
return ca.clone();
113+
};
114+
out = MutableBitmap::with_capacity(ca.len());
115+
out.extend_constant(last_true_idx + 1, true);
116+
out.extend_constant(ca.len() - 1 - last_true_idx, false);
117+
}
118+
119+
let arr: BooleanArray = out.freeze().into();
120+
BooleanChunked::with_chunk_like(ca, arr.with_validity(ca.rechunk_validity()))
121+
}
122+
123+
fn cum_min_bool(ca: &BooleanChunked, reverse: bool) -> BooleanChunked {
124+
if ca.len() == ca.null_count() {
125+
return ca.clone();
126+
}
127+
128+
let mut out;
129+
if !reverse {
130+
// TODO: efficient bitscan.
131+
let Some(first_false_idx) = ca.iter().position(|x| x == Some(false)) else {
132+
return ca.clone();
133+
};
134+
out = MutableBitmap::with_capacity(ca.len());
135+
out.extend_constant(first_false_idx, true);
136+
out.extend_constant(ca.len() - first_false_idx, false);
137+
} else {
138+
// TODO: efficient bitscan.
139+
let Some(last_false_idx) = ca.iter().rposition(|x| x == Some(false)) else {
140+
return ca.clone();
141+
};
142+
out = MutableBitmap::with_capacity(ca.len());
143+
out.extend_constant(last_false_idx + 1, false);
144+
out.extend_constant(ca.len() - 1 - last_false_idx, true);
145+
}
146+
147+
let arr: BooleanArray = out.freeze().into();
148+
BooleanChunked::with_chunk_like(ca, arr.with_validity(ca.rechunk_validity()))
149+
}
150+
93151
fn cum_sum_numeric<T>(ca: &ChunkedArray<T>, reverse: bool) -> ChunkedArray<T>
94152
where
95153
T: PolarsNumericType,
@@ -173,13 +231,14 @@ pub fn cum_min(s: &Series, reverse: bool) -> PolarsResult<Series> {
173231
let original_type = s.dtype();
174232
let s = s.to_physical_repr();
175233
match s.dtype() {
234+
DataType::Boolean => Ok(cum_min_bool(s.bool()?, reverse).into_series()),
176235
dt if dt.is_numeric() => {
177236
with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
178237
let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
179238
let out = cum_min_numeric(ca, reverse).into_series();
180-
if original_type.is_logical(){
239+
if original_type.is_logical() {
181240
out.cast(original_type)
182-
}else{
241+
} else {
183242
Ok(out)
184243
}
185244
})
@@ -193,13 +252,14 @@ pub fn cum_max(s: &Series, reverse: bool) -> PolarsResult<Series> {
193252
let original_type = s.dtype();
194253
let s = s.to_physical_repr();
195254
match s.dtype() {
255+
DataType::Boolean => Ok(cum_max_bool(s.bool()?, reverse).into_series()),
196256
dt if dt.is_numeric() => {
197257
with_match_physical_numeric_polars_type!(s.dtype(), |$T| {
198258
let ca: &ChunkedArray<$T> = s.as_ref().as_ref().as_ref();
199259
let out = cum_max_numeric(ca, reverse).into_series();
200-
if original_type.is_logical(){
260+
if original_type.is_logical() {
201261
out.cast(original_type)
202-
}else{
262+
} else {
203263
Ok(out)
204264
}
205265
})

py-polars/tests/unit/series/test_series.py

+12
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,18 @@ def test_cum_agg_with_nulls() -> None:
5757
assert_series_equal(s.cum_prod(), pl.Series("a", [None, 2, None, 14, 112, None]))
5858

5959

60+
def test_cum_min_max_bool() -> None:
61+
s = pl.Series("a", [None, True, True, None, False, None, True, False, False, None])
62+
assert_series_equal(s.cum_min().cast(pl.Int32), s.cast(pl.Int32).cum_min())
63+
assert_series_equal(s.cum_max().cast(pl.Int32), s.cast(pl.Int32).cum_max())
64+
assert_series_equal(
65+
s.cum_min(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_min(reverse=True)
66+
)
67+
assert_series_equal(
68+
s.cum_max(reverse=True).cast(pl.Int32), s.cast(pl.Int32).cum_max(reverse=True)
69+
)
70+
71+
6072
def test_init_inputs(monkeypatch: Any) -> None:
6173
nan = float("nan")
6274
# Good inputs

0 commit comments

Comments
 (0)