Skip to content

Commit

Permalink
chore: move filter to polars-compute (pola-rs#13897)
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 authored Jan 22, 2024
1 parent 6e7062c commit ccc30b7
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 40 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ features = [
"compute_boolean_kleene",
"compute_cast",
"compute_comparison",
"compute_filter",
"compute_if_then_else",
]

Expand Down
2 changes: 0 additions & 2 deletions crates/polars-arrow/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ compute_boolean = []
compute_boolean_kleene = []
compute_cast = ["compute_take", "ryu", "atoi_simd", "itoa", "fast-float"]
compute_comparison = ["compute_take", "compute_boolean"]
compute_filter = []
compute_hash = ["multiversion"]
compute_if_then_else = []
compute_take = []
Expand All @@ -152,7 +151,6 @@ compute = [
"compute_boolean_kleene",
"compute_cast",
"compute_comparison",
"compute_filter",
"compute_hash",
"compute_if_then_else",
"compute_take",
Expand Down
10 changes: 4 additions & 6 deletions crates/polars-arrow/src/array/growable/binview.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,12 +116,10 @@ impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> {
}

#[inline]
pub(crate) unsafe fn extend_unchecked_no_buffers(
&mut self,
index: usize,
start: usize,
len: usize,
) {
/// Ignores the buffers and doesn't update the view. This is only correct in a filter.
/// # Safety
/// doesn't check bounds
pub unsafe fn extend_unchecked_no_buffers(&mut self, index: usize, start: usize, len: usize) {
let array = *self.arrays.get_unchecked(index);

extend_validity(&mut self.validity, array, start, len);
Expand Down
3 changes: 0 additions & 3 deletions crates/polars-arrow/src/compute/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,6 @@ pub mod boolean_kleene;
#[cfg_attr(docsrs, doc(cfg(feature = "compute_cast")))]
pub mod cast;
pub mod concatenate;
#[cfg(feature = "compute_filter")]
#[cfg_attr(docsrs, doc(cfg(feature = "compute_filter")))]
pub mod filter;
#[cfg(feature = "compute_if_then_else")]
#[cfg_attr(docsrs, doc(cfg(feature = "compute_if_then_else")))]
pub mod if_then_else;
Expand Down
3 changes: 1 addition & 2 deletions crates/polars-arrow/src/types/bit_chunk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,8 +129,7 @@ impl<T: BitChunk> BitChunkOnes<T> {
}

#[inline]
#[cfg(feature = "compute_filter")]
pub(crate) fn from_known_count(value: T, remaining: usize) -> Self {
pub fn from_known_count(value: T, remaining: usize) -> Self {
Self { value, remaining }
}
}
Expand Down
1 change: 1 addition & 0 deletions crates/polars-compute/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ description = "Private compute kernels for the Polars DataFrame library"
arrow = { workspace = true }
bytemuck = { workspace = true }
num-traits = { workspace = true }
polars-error = { workspace = true }
polars-utils = { workspace = true }

[build-dependencies]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
//! Contains operators to filter arrays such as [`filter`].
use polars_error::PolarsResult;

use crate::array::growable::{make_growable, Growable};
use crate::array::*;
use crate::bitmap::utils::{BitChunkIterExact, BitChunksExact, SlicesIterator};
use crate::bitmap::{Bitmap, MutableBitmap};
use crate::chunk::Chunk;
use crate::datatypes::ArrowDataType;
use crate::types::simd::Simd;
use crate::types::{BitChunkOnes, NativeType};
use crate::with_match_primitive_type_full;
use arrow::array::growable::{make_growable, Growable};
use arrow::array::*;
use arrow::bitmap::utils::{BitChunkIterExact, BitChunksExact, SlicesIterator};
use arrow::bitmap::{Bitmap, MutableBitmap};
use arrow::chunk::Chunk;
use arrow::datatypes::ArrowDataType;
use arrow::types::simd::Simd;
use arrow::types::{BitChunkOnes, NativeType};
use arrow::with_match_primitive_type_full;
use polars_error::*;

/// Function that can filter arbitrary arrays
pub type Filter<'a> = Box<dyn Fn(&dyn Array) -> Box<dyn Array> + 'a + Send + Sync>;
Expand Down Expand Up @@ -217,7 +216,7 @@ pub fn build_filter(filter: &BooleanArray) -> PolarsResult<Filter> {
let filter_count = iter.slots();
let chunks = iter.collect::<Vec<_>>();

use crate::datatypes::PhysicalType::*;
use arrow::datatypes::PhysicalType::*;
Ok(Box::new(move |array: &dyn Array| {
match array.data_type().to_physical_type() {
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
Expand Down Expand Up @@ -246,34 +245,34 @@ pub fn build_filter(filter: &BooleanArray) -> PolarsResult<Filter> {
}))
}

pub fn filter(array: &dyn Array, filter: &BooleanArray) -> PolarsResult<Box<dyn Array>> {
pub fn filter(array: &dyn Array, mask: &BooleanArray) -> PolarsResult<Box<dyn Array>> {
// The validities may be masking out `true` bits, making the filter operation
// based on the values incorrect
if let Some(validities) = filter.validity() {
let values = filter.values();
if let Some(validities) = mask.validity() {
let values = mask.values();
let new_values = values & validities;
let filter = BooleanArray::new(ArrowDataType::Boolean, new_values, None);
return crate::compute::filter::filter(array, &filter);
let mask = BooleanArray::new(ArrowDataType::Boolean, new_values, None);
return filter(array, &mask);
}

let false_count = filter.values().unset_bits();
if false_count == filter.len() {
assert_eq!(array.len(), filter.len());
let false_count = mask.values().unset_bits();
if false_count == mask.len() {
assert_eq!(array.len(), mask.len());
return Ok(new_empty_array(array.data_type().clone()));
}
if false_count == 0 {
assert_eq!(array.len(), filter.len());
assert_eq!(array.len(), mask.len());
return Ok(array.to_boxed());
}

use crate::datatypes::PhysicalType::*;
use arrow::datatypes::PhysicalType::*;
match array.data_type().to_physical_type() {
Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| {
let array = array.as_any().downcast_ref().unwrap();
Ok(Box::new(filter_primitive::<$T>(array, filter)))
Ok(Box::new(filter_primitive::<$T>(array, mask)))
}),
BinaryView => {
let iter = SlicesIterator::new(filter.values());
let iter = SlicesIterator::new(mask.values());
let array = array.as_any().downcast_ref::<BinaryViewArray>().unwrap();
let mut mutable =
growable::GrowableBinaryViewArray::new(vec![array], false, iter.slots());
Expand All @@ -289,7 +288,7 @@ pub fn filter(array: &dyn Array, filter: &BooleanArray) -> PolarsResult<Box<dyn
unreachable!()
},
_ => {
let iter = SlicesIterator::new(filter.values());
let iter = SlicesIterator::new(mask.values());
let mut mutable = make_growable(&[array], false, iter.slots());
iter.for_each(|(start, len)| mutable.extend(0, start, len));
Ok(mutable.as_box())
Expand Down
1 change: 1 addition & 0 deletions crates/polars-compute/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#![cfg_attr(feature = "simd", feature(portable_simd))]

pub mod comparisons;
pub mod filter;
pub mod min_max;
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/filter.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#[cfg(feature = "object")]
use arrow::array::Array;
use arrow::compute::filter::filter as filter_fn;
use polars_compute::filter::filter as filter_fn;

#[cfg(feature = "object")]
use crate::chunked_array::object::builder::ObjectChunkedBuilder;
Expand Down

0 comments on commit ccc30b7

Please sign in to comment.