diff --git a/crates/polars-arrow/src/array/builder.rs b/crates/polars-arrow/src/array/builder.rs index c7cfc3952e9a..d18ca2a42614 100644 --- a/crates/polars-arrow/src/array/builder.rs +++ b/crates/polars-arrow/src/array/builder.rs @@ -44,6 +44,20 @@ pub trait StaticArrayBuilder { share: ShareStrategy, ); + /// The same as subslice_extend, but repeats the extension `repeats` times. + fn subslice_extend_repeated( + &mut self, + other: &Self::Array, + start: usize, + length: usize, + repeats: usize, + share: ShareStrategy, + ) { + for _ in 0..repeats { + self.subslice_extend(other, start, length, share) + } + } + /// Extends this builder with the contents of the given array at the given /// indices. That is, other[idxs[i]] is appended to this array in order, /// for each i=0..idxs.len(). May panic if other does not match the @@ -82,6 +96,18 @@ impl ArrayBuilder for T { StaticArrayBuilder::subslice_extend(self, other, start, length, share); } + fn subslice_extend_repeated( + &mut self, + other: &dyn Array, + start: usize, + length: usize, + repeats: usize, + share: ShareStrategy, + ) { + let other: &T::Array = other.as_any().downcast_ref().unwrap(); + StaticArrayBuilder::subslice_extend_repeated(self, other, start, length, repeats, share); + } + #[inline(always)] unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) { let other: &T::Array = other.as_any().downcast_ref().unwrap(); @@ -113,6 +139,16 @@ pub trait ArrayBuilder: ArrayBuilderBoxedHelper { share: ShareStrategy, ); + /// The same as subslice_extend, but repeats the extension `repeats` times. + fn subslice_extend_repeated( + &mut self, + other: &dyn Array, + start: usize, + length: usize, + repeats: usize, + share: ShareStrategy, + ); + /// Extends this builder with the contents of the given array at the given /// indices. That is, other[idxs[i]] is appended to this array in order, /// for each i=0..idxs.len(). May panic if other does not match the @@ -157,6 +193,17 @@ impl ArrayBuilder for Box { (**self).subslice_extend(other, start, length, share); } + fn subslice_extend_repeated( + &mut self, + other: &dyn Array, + start: usize, + length: usize, + repeats: usize, + share: ShareStrategy, + ) { + (**self).subslice_extend_repeated(other, start, length, repeats, share); + } + unsafe fn gather_extend(&mut self, other: &dyn Array, idxs: &[IdxSize], share: ShareStrategy) { (**self).gather_extend(other, idxs, share); } diff --git a/crates/polars-arrow/src/array/growable/binary.rs b/crates/polars-arrow/src/array/growable/binary.rs deleted file mode 100644 index b0063bbb1d84..000000000000 --- a/crates/polars-arrow/src/array/growable/binary.rs +++ /dev/null @@ -1,103 +0,0 @@ -use std::sync::Arc; - -use super::utils::extend_offset_values; -use super::Growable; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, BinaryArray}; -use crate::bitmap::BitmapBuilder; -use crate::datatypes::ArrowDataType; -use crate::offset::{Offset, Offsets}; - -/// Concrete [`Growable`] for the [`BinaryArray`]. -pub struct GrowableBinary<'a, O: Offset> { - arrays: Vec<&'a BinaryArray>, - dtype: ArrowDataType, - validity: Option, - values: Vec, - offsets: Offsets, -} - -impl<'a, O: Offset> GrowableBinary<'a, O> { - /// Creates a new [`GrowableBinary`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new(arrays: Vec<&'a BinaryArray>, mut use_validity: bool, capacity: usize) -> Self { - let dtype = arrays[0].dtype().clone(); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - Self { - arrays, - dtype, - values: Vec::with_capacity(0), - offsets: Offsets::with_capacity(capacity), - validity: prepare_validity(use_validity, capacity), - } - } - - fn to(&mut self) -> BinaryArray { - let dtype = self.dtype.clone(); - let validity = std::mem::take(&mut self.validity); - let offsets = std::mem::take(&mut self.offsets); - let values = std::mem::take(&mut self.values); - - BinaryArray::::new( - dtype, - offsets.into(), - values.into(), - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a, O: Offset> Growable<'a> for GrowableBinary<'a, O> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - let offsets = array.offsets(); - let values = array.values(); - - self.offsets - .try_extend_from_slice(offsets, start, len) - .unwrap(); - - // values - extend_offset_values::(&mut self.values, offsets.buffer(), values, start, len); - } - - fn extend_validity(&mut self, additional: usize) { - self.offsets.extend_constant(additional); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.offsets.len() - 1 - } - - fn as_arc(&mut self) -> Arc { - self.to().arced() - } - - fn as_box(&mut self) -> Box { - self.to().boxed() - } -} - -impl<'a, O: Offset> From> for BinaryArray { - fn from(val: GrowableBinary<'a, O>) -> Self { - BinaryArray::::new( - val.dtype, - val.offsets.into(), - val.values.into(), - val.validity.map(|v| v.freeze()), - ) - } -} diff --git a/crates/polars-arrow/src/array/growable/binview.rs b/crates/polars-arrow/src/array/growable/binview.rs deleted file mode 100644 index c2f6ed6f5676..000000000000 --- a/crates/polars-arrow/src/array/growable/binview.rs +++ /dev/null @@ -1,169 +0,0 @@ -use std::ops::Deref; -use std::sync::Arc; - -use polars_utils::aliases::{InitHashMaps, PlHashSet}; -use polars_utils::itertools::Itertools; - -use super::Growable; -use crate::array::binview::{BinaryViewArrayGeneric, ViewType}; -use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity}; -use crate::array::{Array, MutableBinaryViewArray, View}; -use crate::bitmap::BitmapBuilder; -use crate::buffer::Buffer; -use crate::datatypes::ArrowDataType; - -/// Concrete [`Growable`] for the [`BinaryArray`]. -pub struct GrowableBinaryViewArray<'a, T: ViewType + ?Sized> { - arrays: Vec<&'a BinaryViewArrayGeneric>, - dtype: ArrowDataType, - validity: Option, - inner: MutableBinaryViewArray, - same_buffers: Option<&'a Arc<[Buffer]>>, - total_same_buffers_len: usize, // Only valid if same_buffers is Some. - has_duplicate_buffers: bool, -} - -impl<'a, T: ViewType + ?Sized> GrowableBinaryViewArray<'a, T> { - /// Creates a new [`GrowableBinaryViewArray`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new( - arrays: Vec<&'a BinaryViewArrayGeneric>, - mut use_validity: bool, - capacity: usize, - ) -> Self { - let dtype = arrays[0].dtype().clone(); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - // Fast case. - // This happens in group-by's - // And prevents us to push `M` buffers insert in the buffers - // #15615 - let all_same_buffer = arrays - .iter() - .map(|array| array.data_buffers().as_ptr()) - .all_equal() - && !arrays.is_empty(); - let same_buffers = all_same_buffer.then(|| arrays[0].data_buffers()); - let total_same_buffers_len = all_same_buffer - .then(|| arrays[0].total_buffer_len()) - .unwrap_or_default(); - - let mut duplicates = PlHashSet::new(); - let mut has_duplicate_buffers = false; - for arr in arrays.iter() { - if !duplicates.insert(arr.data_buffers().as_ptr()) { - has_duplicate_buffers = true; - break; - } - } - Self { - arrays, - dtype, - validity: prepare_validity(use_validity, capacity), - inner: MutableBinaryViewArray::::with_capacity(capacity), - same_buffers, - total_same_buffers_len, - has_duplicate_buffers, - } - } - - fn to(&mut self) -> BinaryViewArrayGeneric { - let arr = std::mem::take(&mut self.inner); - if let Some(buffers) = self.same_buffers { - unsafe { - BinaryViewArrayGeneric::::new_unchecked( - self.dtype.clone(), - arr.views.into(), - buffers.clone(), - self.validity.take().map(BitmapBuilder::freeze), - arr.total_bytes_len, - self.total_same_buffers_len, - ) - } - } else { - arr.freeze_with_dtype(self.dtype.clone()) - .with_validity(self.validity.take().map(BitmapBuilder::freeze)) - } - } -} - -impl<'a, T: ViewType + ?Sized> Growable<'a> for GrowableBinaryViewArray<'a, T> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - let local_buffers = array.data_buffers(); - - extend_validity(&mut self.validity, array, start, len); - - let range = start..start + len; - - let views_iter = array.views().get_unchecked(range).iter().cloned(); - - if self.same_buffers.is_some() { - let mut total_len = 0; - self.inner - .views - .extend(views_iter.inspect(|v| total_len += v.length as usize)); - self.inner.total_bytes_len += total_len; - } else if self.has_duplicate_buffers { - self.inner - .extend_non_null_views_unchecked_dedupe(views_iter, local_buffers.deref()); - } else { - self.inner - .extend_non_null_views_unchecked(views_iter, local_buffers.deref()); - } - } - - unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) { - let orig_view_start = self.inner.views.len(); - let orig_total_bytes_len = self.inner.total_bytes_len; - if copies > 0 { - self.extend(index, start, len); - } - if copies > 1 { - let array = *self.arrays.get_unchecked(index); - extend_validity_copies(&mut self.validity, array, start, len, copies - 1); - let extended_view_end = self.inner.views.len(); - let total_bytes_len_end = self.inner.total_bytes_len; - for _ in 0..copies - 1 { - self.inner - .views - .extend_from_within(orig_view_start..extended_view_end); - self.inner.total_bytes_len += total_bytes_len_end - orig_total_bytes_len; - } - } - } - - fn extend_validity(&mut self, additional: usize) { - self.inner - .views - .extend(std::iter::repeat(View::default()).take(additional)); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.inner.len() - } - - fn as_arc(&mut self) -> Arc { - self.to().arced() - } - - fn as_box(&mut self) -> Box { - self.to().boxed() - } -} - -impl<'a, T: ViewType + ?Sized> From> for BinaryViewArrayGeneric { - fn from(mut val: GrowableBinaryViewArray<'a, T>) -> Self { - val.to() - } -} diff --git a/crates/polars-arrow/src/array/growable/boolean.rs b/crates/polars-arrow/src/array/growable/boolean.rs deleted file mode 100644 index 20aa0740a298..000000000000 --- a/crates/polars-arrow/src/array/growable/boolean.rs +++ /dev/null @@ -1,90 +0,0 @@ -use std::sync::Arc; - -use super::Growable; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, BooleanArray}; -use crate::bitmap::BitmapBuilder; -use crate::datatypes::ArrowDataType; - -/// Concrete [`Growable`] for the [`BooleanArray`]. -pub struct GrowableBoolean<'a> { - arrays: Vec<&'a BooleanArray>, - dtype: ArrowDataType, - validity: Option, - values: BitmapBuilder, -} - -impl<'a> GrowableBoolean<'a> { - /// Creates a new [`GrowableBoolean`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new(arrays: Vec<&'a BooleanArray>, mut use_validity: bool, capacity: usize) -> Self { - let dtype = arrays[0].dtype().clone(); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - Self { - arrays, - dtype, - values: BitmapBuilder::with_capacity(capacity), - validity: prepare_validity(use_validity, capacity), - } - } - - fn to(&mut self) -> BooleanArray { - let validity = self.validity.take(); - let values = std::mem::take(&mut self.values); - - BooleanArray::new( - self.dtype.clone(), - values.freeze(), - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a> Growable<'a> for GrowableBoolean<'a> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - let values = array.values(); - - let (slice, offset, _) = values.as_slice(); - self.values.extend_from_slice(slice, start + offset, len); - } - - fn extend_validity(&mut self, additional: usize) { - self.values.extend_constant(additional, false); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.values.len() - } - - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a> From> for BooleanArray { - fn from(val: GrowableBoolean<'a>) -> Self { - BooleanArray::new( - val.dtype, - val.values.freeze(), - val.validity.map(|v| v.freeze()), - ) - } -} diff --git a/crates/polars-arrow/src/array/growable/dictionary.rs b/crates/polars-arrow/src/array/growable/dictionary.rs deleted file mode 100644 index 2d7f7fe24794..000000000000 --- a/crates/polars-arrow/src/array/growable/dictionary.rs +++ /dev/null @@ -1,147 +0,0 @@ -use std::sync::Arc; - -use super::{make_growable, Growable}; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, DictionaryArray, DictionaryKey, PrimitiveArray}; -use crate::bitmap::BitmapBuilder; -use crate::datatypes::ArrowDataType; - -/// Concrete [`Growable`] for the [`DictionaryArray`]. -/// # Implementation -/// This growable does not perform collision checks and instead concatenates -/// the values of each [`DictionaryArray`] one after the other. -pub struct GrowableDictionary<'a, K: DictionaryKey> { - dtype: ArrowDataType, - keys: Vec<&'a PrimitiveArray>, - key_values: Vec, - validity: Option, - offsets: Vec, - values: Box, -} - -fn concatenate_values( - arrays_keys: &[&PrimitiveArray], - arrays_values: &[&dyn Array], - capacity: usize, -) -> (Box, Vec) { - let mut mutable = make_growable(arrays_values, false, capacity); - let mut offsets = Vec::with_capacity(arrays_keys.len() + 1); - offsets.push(0); - for (i, values) in arrays_values.iter().enumerate() { - unsafe { mutable.extend(i, 0, values.len()) }; - offsets.push(offsets[i] + values.len()); - } - (mutable.as_box(), offsets) -} - -impl<'a, T: DictionaryKey> GrowableDictionary<'a, T> { - /// Creates a new [`GrowableDictionary`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new(arrays: &[&'a DictionaryArray], mut use_validity: bool, capacity: usize) -> Self { - let dtype = arrays[0].dtype().clone(); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - let arrays_keys = arrays.iter().map(|array| array.keys()).collect::>(); - let arrays_values = arrays - .iter() - .map(|array| array.values().as_ref()) - .collect::>(); - - let (values, offsets) = concatenate_values(&arrays_keys, &arrays_values, capacity); - - Self { - dtype, - offsets, - values, - keys: arrays_keys, - key_values: Vec::with_capacity(capacity), - validity: prepare_validity(use_validity, capacity), - } - } - - #[inline] - fn to(&mut self) -> DictionaryArray { - let validity = self.validity.take(); - let key_values = std::mem::take(&mut self.key_values); - - #[cfg(debug_assertions)] - { - crate::array::specification::check_indexes(&key_values, self.values.len()).unwrap(); - } - let keys = PrimitiveArray::::new( - T::PRIMITIVE.into(), - key_values.into(), - validity.map(|v| v.freeze()), - ); - - // SAFETY: the invariant of this struct ensures that this is up-held - unsafe { - DictionaryArray::::try_new_unchecked(self.dtype.clone(), keys, self.values.clone()) - .unwrap() - } - } -} - -impl<'a, T: DictionaryKey> Growable<'a> for GrowableDictionary<'a, T> { - #[inline] - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let keys_array = *self.keys.get_unchecked(index); - extend_validity(&mut self.validity, keys_array, start, len); - - let values = &keys_array.values().get_unchecked(start..start + len); - let offset = self.offsets.get_unchecked(index); - self.key_values.extend( - values - .iter() - // `.unwrap_or(0)` because this operation does not check for null values, which may contain any key. - .map(|x| { - let x: usize = offset + (*x).try_into().unwrap_or(0); - let x: T = match x.try_into() { - Ok(key) => key, - // todo: convert this to an error. - Err(_) => { - panic!("The maximum key is too small") - }, - }; - x - }), - ); - } - - #[inline] - fn len(&self) -> usize { - self.key_values.len() - } - - #[inline] - fn extend_validity(&mut self, additional: usize) { - self.key_values - .resize(self.key_values.len() + additional, T::default()); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - #[inline] - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a, T: DictionaryKey> From> for DictionaryArray { - #[inline] - fn from(mut val: GrowableDictionary<'a, T>) -> Self { - val.to() - } -} diff --git a/crates/polars-arrow/src/array/growable/fixed_binary.rs b/crates/polars-arrow/src/array/growable/fixed_binary.rs deleted file mode 100644 index da1e598d4be3..000000000000 --- a/crates/polars-arrow/src/array/growable/fixed_binary.rs +++ /dev/null @@ -1,94 +0,0 @@ -use std::sync::Arc; - -use super::Growable; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, FixedSizeBinaryArray}; -use crate::bitmap::BitmapBuilder; - -/// Concrete [`Growable`] for the [`FixedSizeBinaryArray`]. -pub struct GrowableFixedSizeBinary<'a> { - arrays: Vec<&'a FixedSizeBinaryArray>, - validity: Option, - values: Vec, - size: usize, // just a cache -} - -impl<'a> GrowableFixedSizeBinary<'a> { - /// Creates a new [`GrowableFixedSizeBinary`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new( - arrays: Vec<&'a FixedSizeBinaryArray>, - mut use_validity: bool, - capacity: usize, - ) -> Self { - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - let size = FixedSizeBinaryArray::get_size(arrays[0].dtype()); - Self { - arrays, - values: Vec::with_capacity(0), - validity: prepare_validity(use_validity, capacity), - size, - } - } - - fn to(&mut self) -> FixedSizeBinaryArray { - let validity = std::mem::take(&mut self.validity); - let values = std::mem::take(&mut self.values); - - FixedSizeBinaryArray::new( - self.arrays[0].dtype().clone(), - values.into(), - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a> Growable<'a> for GrowableFixedSizeBinary<'a> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - let values = array.values(); - - self.values.extend_from_slice( - values.get_unchecked(start * self.size..start * self.size + len * self.size), - ); - } - - fn extend_validity(&mut self, additional: usize) { - self.values - .extend_from_slice(&vec![0; self.size * additional]); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.values.len() / self.size - } - - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a> From> for FixedSizeBinaryArray { - fn from(val: GrowableFixedSizeBinary<'a>) -> Self { - FixedSizeBinaryArray::new( - val.arrays[0].dtype().clone(), - val.values.into(), - val.validity.map(|v| v.freeze()), - ) - } -} diff --git a/crates/polars-arrow/src/array/growable/fixed_size_list.rs b/crates/polars-arrow/src/array/growable/fixed_size_list.rs deleted file mode 100644 index 5972e24e4ee9..000000000000 --- a/crates/polars-arrow/src/array/growable/fixed_size_list.rs +++ /dev/null @@ -1,128 +0,0 @@ -use std::sync::Arc; - -use super::{make_growable, Growable}; -use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity}; -use crate::array::{Array, FixedSizeListArray}; -use crate::bitmap::BitmapBuilder; - -/// Concrete [`Growable`] for the [`FixedSizeListArray`]. -pub struct GrowableFixedSizeList<'a> { - arrays: Vec<&'a FixedSizeListArray>, - validity: Option, - values: Box + 'a>, - size: usize, - length: usize, -} - -impl<'a> GrowableFixedSizeList<'a> { - /// Creates a new [`GrowableFixedSizeList`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new( - arrays: Vec<&'a FixedSizeListArray>, - mut use_validity: bool, - capacity: usize, - ) -> Self { - assert!(!arrays.is_empty()); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - let size = arrays[0].size(); - - let inner = arrays - .iter() - .map(|array| { - debug_assert_eq!(array.size(), size); - array.values().as_ref() - }) - .collect::>(); - let values = make_growable(&inner, use_validity, 0); - - assert_eq!(values.len(), 0); - - Self { - arrays, - values, - validity: prepare_validity(use_validity, capacity), - size, - length: 0, - } - } - - pub fn to(&mut self) -> FixedSizeListArray { - let validity = std::mem::take(&mut self.validity); - let values = self.values.as_box(); - - FixedSizeListArray::new( - self.arrays[0].dtype().clone(), - self.length, - values, - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a> Growable<'a> for GrowableFixedSizeList<'a> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - self.length += len; - let start_length = self.values.len(); - self.values - .extend(index, start * self.size, len * self.size); - debug_assert!(self.size == 0 || (self.values.len() - start_length) / self.size == len); - } - - unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity_copies(&mut self.validity, array, start, len, copies); - - self.length += len * copies; - let start_length = self.values.len(); - self.values - .extend_copies(index, start * self.size, len * self.size, copies); - debug_assert!( - self.size == 0 || (self.values.len() - start_length) / self.size == len * copies - ); - } - - fn extend_validity(&mut self, additional: usize) { - self.values.extend_validity(additional * self.size); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - self.length += additional; - } - - #[inline] - fn len(&self) -> usize { - self.length - } - - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a> From> for FixedSizeListArray { - fn from(val: GrowableFixedSizeList<'a>) -> Self { - let mut values = val.values; - let values = values.as_box(); - - Self::new( - val.arrays[0].dtype().clone(), - val.length, - values, - val.validity.map(|v| v.freeze()), - ) - } -} diff --git a/crates/polars-arrow/src/array/growable/list.rs b/crates/polars-arrow/src/array/growable/list.rs deleted file mode 100644 index 8dd070a003e6..000000000000 --- a/crates/polars-arrow/src/array/growable/list.rs +++ /dev/null @@ -1,108 +0,0 @@ -use std::sync::Arc; - -use super::{make_growable, Growable}; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, ListArray}; -use crate::bitmap::BitmapBuilder; -use crate::offset::{Offset, Offsets}; - -unsafe fn extend_offset_values( - growable: &mut GrowableList<'_, O>, - index: usize, - start: usize, - len: usize, -) { - let array = growable.arrays.get_unchecked(index); - let offsets = array.offsets(); - - growable - .offsets - .try_extend_from_slice(offsets, start, len) - .unwrap(); - - let end = offsets.buffer().get_unchecked(start + len).to_usize(); - let start = offsets.buffer().get_unchecked(start).to_usize(); - let len = end - start; - growable.values.extend(index, start, len); -} - -/// Concrete [`Growable`] for the [`ListArray`]. -pub struct GrowableList<'a, O: Offset> { - arrays: Vec<&'a ListArray>, - validity: Option, - values: Box + 'a>, - offsets: Offsets, -} - -impl<'a, O: Offset> GrowableList<'a, O> { - /// Creates a new [`GrowableList`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new(arrays: Vec<&'a ListArray>, mut use_validity: bool, capacity: usize) -> Self { - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - let inner = arrays - .iter() - .map(|array| array.values().as_ref()) - .collect::>(); - let values = make_growable(&inner, use_validity, 0); - - Self { - arrays, - offsets: Offsets::with_capacity(capacity), - values, - validity: prepare_validity(use_validity, capacity), - } - } - - pub fn to(&mut self) -> ListArray { - let validity = std::mem::take(&mut self.validity); - let offsets = std::mem::take(&mut self.offsets); - let values = self.values.as_box(); - - ListArray::::new( - self.arrays[0].dtype().clone(), - offsets.into(), - values, - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a, O: Offset> Growable<'a> for GrowableList<'a, O> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - extend_offset_values::(self, index, start, len); - } - - fn extend_validity(&mut self, additional: usize) { - self.offsets.extend_constant(additional); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.offsets.len() - 1 - } - - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a, O: Offset> From> for ListArray { - fn from(mut val: GrowableList<'a, O>) -> Self { - val.to() - } -} diff --git a/crates/polars-arrow/src/array/growable/mod.rs b/crates/polars-arrow/src/array/growable/mod.rs deleted file mode 100644 index 1238b29f59a3..000000000000 --- a/crates/polars-arrow/src/array/growable/mod.rs +++ /dev/null @@ -1,166 +0,0 @@ -//! Contains the trait [`Growable`] and corresponding concreate implementations, one per concrete array, -//! that offer the ability to create a new [`Array`] out of slices of existing [`Array`]s. - -use crate::array::*; -use crate::datatypes::*; - -mod binary; -pub use binary::GrowableBinary; -mod boolean; -pub use boolean::GrowableBoolean; -mod fixed_binary; -pub use fixed_binary::GrowableFixedSizeBinary; -mod null; -pub use null::GrowableNull; -mod primitive; -pub use primitive::GrowablePrimitive; -mod list; -pub use list::GrowableList; -mod structure; -pub use structure::GrowableStruct; -mod fixed_size_list; -pub use fixed_size_list::GrowableFixedSizeList; -mod utf8; -pub use utf8::GrowableUtf8; -mod dictionary; -pub use dictionary::GrowableDictionary; - -mod binview; -pub use binview::GrowableBinaryViewArray; - -mod utils; - -/// Describes a struct that can be extended from slices of other pre-existing [`Array`]s. -/// This is used in operations where a new array is built out of other arrays, such -/// as filter and concatenation. -pub trait Growable<'a> { - /// Extends this [`Growable`] with elements from the bounded [`Array`] at index `index` from - /// a slice starting at `start` and length `len`. - /// - /// # Safety - /// Doesn't do any bound checks. - unsafe fn extend(&mut self, index: usize, start: usize, len: usize); - - /// Same as extend, except it repeats the extension `copies` times. - /// - /// # Safety - /// Doesn't do any bound checks. - unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) { - for _ in 0..copies { - self.extend(index, start, len) - } - } - - /// Extends this [`Growable`] with null elements, disregarding the bound arrays - /// - /// # Safety - /// Doesn't do any bound checks - fn extend_validity(&mut self, additional: usize); - - /// The current length of the [`Growable`]. - fn len(&self) -> usize; - - /// Converts this [`Growable`] to an [`Arc`], thereby finishing the mutation. - /// Self will be empty after such operation. - fn as_arc(&mut self) -> Arc { - self.as_box().into() - } - - /// Converts this [`Growable`] to an [`Box`], thereby finishing the mutation. - /// Self will be empty after such operation - fn as_box(&mut self) -> Box; -} - -macro_rules! dyn_growable { - ($ty:ty, $arrays:expr, $use_validity:expr, $capacity:expr) => {{ - let arrays = $arrays - .iter() - .map(|array| array.as_any().downcast_ref().unwrap()) - .collect::>(); - Box::new(<$ty>::new(arrays, $use_validity, $capacity)) - }}; -} - -/// Creates a new [`Growable`] from an arbitrary number of [`Array`]s. -/// # Panics -/// This function panics iff -/// * the arrays do not have the same [`ArrowDataType`]. -/// * `arrays.is_empty()`. -pub fn make_growable<'a>( - arrays: &[&'a dyn Array], - use_validity: bool, - capacity: usize, -) -> Box + 'a> { - assert!(!arrays.is_empty()); - let dtype = arrays[0].dtype(); - - use PhysicalType::*; - match dtype.to_physical_type() { - Null => Box::new(null::GrowableNull::new(dtype.clone())), - Boolean => dyn_growable!(boolean::GrowableBoolean, arrays, use_validity, capacity), - Primitive(primitive) => with_match_primitive_type_full!(primitive, |$T| { - dyn_growable!(primitive::GrowablePrimitive::<$T>, arrays, use_validity, capacity) - }), - Binary => dyn_growable!( - binary::GrowableBinary::, - arrays, - use_validity, - capacity - ), - LargeBinary => dyn_growable!( - binary::GrowableBinary::, - arrays, - use_validity, - capacity - ), - FixedSizeBinary => dyn_growable!( - fixed_binary::GrowableFixedSizeBinary, - arrays, - use_validity, - capacity - ), - LargeList => dyn_growable!(list::GrowableList::, arrays, use_validity, capacity), - Struct => dyn_growable!(structure::GrowableStruct, arrays, use_validity, capacity), - FixedSizeList => dyn_growable!( - fixed_size_list::GrowableFixedSizeList, - arrays, - use_validity, - capacity - ), - BinaryView => { - dyn_growable!( - binview::GrowableBinaryViewArray::<[u8]>, - arrays, - use_validity, - capacity - ) - }, - Utf8View => { - dyn_growable!( - binview::GrowableBinaryViewArray::, - arrays, - use_validity, - capacity - ) - }, - Dictionary(key_type) => { - match_integer_type!(key_type, |$T| { - let arrays = arrays - .iter() - .map(|array| { - array - .as_any() - .downcast_ref::>() - .unwrap() - }) - .collect::>(); - Box::new(dictionary::GrowableDictionary::<$T>::new( - &arrays, - use_validity, - capacity, - )) - }) - }, - Union | Map | Utf8 | LargeUtf8 | List => unimplemented!(), - } -} diff --git a/crates/polars-arrow/src/array/growable/null.rs b/crates/polars-arrow/src/array/growable/null.rs deleted file mode 100644 index e663fc31b8b4..000000000000 --- a/crates/polars-arrow/src/array/growable/null.rs +++ /dev/null @@ -1,53 +0,0 @@ -use std::sync::Arc; - -use super::Growable; -use crate::array::{Array, NullArray}; -use crate::datatypes::ArrowDataType; - -/// Concrete [`Growable`] for the [`NullArray`]. -pub struct GrowableNull { - dtype: ArrowDataType, - length: usize, -} - -impl Default for GrowableNull { - fn default() -> Self { - Self::new(ArrowDataType::Null) - } -} - -impl GrowableNull { - /// Creates a new [`GrowableNull`]. - pub fn new(dtype: ArrowDataType) -> Self { - Self { dtype, length: 0 } - } -} - -impl Growable<'_> for GrowableNull { - unsafe fn extend(&mut self, _: usize, _: usize, len: usize) { - self.length += len; - } - - fn extend_validity(&mut self, additional: usize) { - self.length += additional; - } - - #[inline] - fn len(&self) -> usize { - self.length - } - - fn as_arc(&mut self) -> Arc { - Arc::new(NullArray::new(self.dtype.clone(), self.length)) - } - - fn as_box(&mut self) -> Box { - Box::new(NullArray::new(self.dtype.clone(), self.length)) - } -} - -impl From for NullArray { - fn from(val: GrowableNull) -> Self { - NullArray::new(val.dtype, val.length) - } -} diff --git a/crates/polars-arrow/src/array/growable/primitive.rs b/crates/polars-arrow/src/array/growable/primitive.rs deleted file mode 100644 index c2f3188634d8..000000000000 --- a/crates/polars-arrow/src/array/growable/primitive.rs +++ /dev/null @@ -1,114 +0,0 @@ -use std::sync::Arc; - -use super::Growable; -use crate::array::growable::utils::{extend_validity, extend_validity_copies, prepare_validity}; -use crate::array::{Array, PrimitiveArray}; -use crate::bitmap::BitmapBuilder; -use crate::datatypes::ArrowDataType; -use crate::types::NativeType; - -/// Concrete [`Growable`] for the [`PrimitiveArray`]. -pub struct GrowablePrimitive<'a, T: NativeType> { - dtype: ArrowDataType, - arrays: Vec<&'a PrimitiveArray>, - validity: Option, - values: Vec, -} - -impl<'a, T: NativeType> GrowablePrimitive<'a, T> { - /// Creates a new [`GrowablePrimitive`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new( - arrays: Vec<&'a PrimitiveArray>, - mut use_validity: bool, - capacity: usize, - ) -> Self { - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if !use_validity & arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - let dtype = arrays[0].dtype().clone(); - - Self { - dtype, - arrays, - values: Vec::with_capacity(capacity), - validity: prepare_validity(use_validity, capacity), - } - } - - #[inline] - fn to(&mut self) -> PrimitiveArray { - let validity = std::mem::take(&mut self.validity); - let values = std::mem::take(&mut self.values); - - PrimitiveArray::::new( - self.dtype.clone(), - values.into(), - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a, T: NativeType> Growable<'a> for GrowablePrimitive<'a, T> { - #[inline] - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - let values = array.values().as_slice(); - self.values - .extend_from_slice(values.get_unchecked(start..start + len)); - } - - #[inline] - unsafe fn extend_copies(&mut self, index: usize, start: usize, len: usize, copies: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity_copies(&mut self.validity, array, start, len, copies); - - let values = array.values().as_slice(); - self.values.reserve(len * copies); - for _ in 0..copies { - self.values - .extend_from_slice(values.get_unchecked(start..start + len)); - } - } - - #[inline] - fn extend_validity(&mut self, additional: usize) { - self.values - .resize(self.values.len() + additional, T::default()); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.values.len() - } - - #[inline] - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - #[inline] - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a, T: NativeType> From> for PrimitiveArray { - #[inline] - fn from(val: GrowablePrimitive<'a, T>) -> Self { - PrimitiveArray::::new( - val.dtype, - val.values.into(), - val.validity.map(|v| v.freeze()), - ) - } -} diff --git a/crates/polars-arrow/src/array/growable/structure.rs b/crates/polars-arrow/src/array/growable/structure.rs deleted file mode 100644 index 386c3cc0d470..000000000000 --- a/crates/polars-arrow/src/array/growable/structure.rs +++ /dev/null @@ -1,135 +0,0 @@ -use std::sync::Arc; - -use super::{make_growable, Growable}; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, StructArray}; -use crate::bitmap::BitmapBuilder; - -/// Concrete [`Growable`] for the [`StructArray`]. -pub struct GrowableStruct<'a> { - arrays: Vec<&'a StructArray>, - length: usize, - validity: Option, - values: Vec + 'a>>, -} - -impl<'a> GrowableStruct<'a> { - /// Creates a new [`GrowableStruct`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new(arrays: Vec<&'a StructArray>, mut use_validity: bool, capacity: usize) -> Self { - assert!(!arrays.is_empty()); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - let arrays = arrays - .iter() - .map(|array| array.as_any().downcast_ref::().unwrap()) - .collect::>(); - - // ([field1, field2], [field3, field4]) -> ([field1, field3], [field2, field3]) - let values = (0..arrays[0].values().len()) - .map(|i| { - make_growable( - &arrays - .iter() - .map(|x| x.values()[i].as_ref()) - .collect::>(), - use_validity, - capacity, - ) - }) - .collect::>>(); - - Self { - arrays, - length: 0, - values, - validity: prepare_validity(use_validity, capacity), - } - } - - fn to(&mut self) -> StructArray { - let validity = std::mem::take(&mut self.validity); - let values = std::mem::take(&mut self.values); - let values = values.into_iter().map(|mut x| x.as_box()).collect(); - - StructArray::new( - self.arrays[0].dtype().clone(), - self.length, - values, - validity.map(|v| v.freeze()), - ) - } -} - -impl<'a> Growable<'a> for GrowableStruct<'a> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - self.length += len; - - if array.null_count() == 0 { - self.values - .iter_mut() - .for_each(|child| child.extend(index, start, len)) - } else { - (start..start + len).for_each(|i| { - if array.is_valid(i) { - self.values - .iter_mut() - .for_each(|child| child.extend(index, i, 1)) - } else { - self.values - .iter_mut() - .for_each(|child| child.extend_validity(1)) - } - }) - } - } - - fn extend_validity(&mut self, additional: usize) { - self.values - .iter_mut() - .for_each(|child| child.extend_validity(additional)); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - self.length += additional; - } - - #[inline] - fn len(&self) -> usize { - if let Some(child) = self.values.first() { - child.len() - } else { - unreachable!() - } - } - - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a> From> for StructArray { - fn from(val: GrowableStruct<'a>) -> Self { - let values = val.values.into_iter().map(|mut x| x.as_box()).collect(); - - StructArray::new( - val.arrays[0].dtype().clone(), - val.length, - values, - val.validity.map(|v| v.freeze()), - ) - } -} diff --git a/crates/polars-arrow/src/array/growable/utf8.rs b/crates/polars-arrow/src/array/growable/utf8.rs deleted file mode 100644 index 432744238860..000000000000 --- a/crates/polars-arrow/src/array/growable/utf8.rs +++ /dev/null @@ -1,101 +0,0 @@ -use std::sync::Arc; - -use super::utils::extend_offset_values; -use super::Growable; -use crate::array::growable::utils::{extend_validity, prepare_validity}; -use crate::array::{Array, Utf8Array}; -use crate::bitmap::BitmapBuilder; -use crate::offset::{Offset, Offsets}; - -/// Concrete [`Growable`] for the [`Utf8Array`]. -pub struct GrowableUtf8<'a, O: Offset> { - arrays: Vec<&'a Utf8Array>, - validity: Option, - values: Vec, - offsets: Offsets, -} - -impl<'a, O: Offset> GrowableUtf8<'a, O> { - /// Creates a new [`GrowableUtf8`] bound to `arrays` with a pre-allocated `capacity`. - /// # Panics - /// If `arrays` is empty. - pub fn new(arrays: Vec<&'a Utf8Array>, mut use_validity: bool, capacity: usize) -> Self { - assert!(!arrays.is_empty()); - - // if any of the arrays has nulls, insertions from any array requires setting bits - // as there is at least one array with nulls. - if arrays.iter().any(|array| array.null_count() > 0) { - use_validity = true; - }; - - Self { - arrays: arrays.to_vec(), - values: Vec::with_capacity(0), - offsets: Offsets::with_capacity(capacity), - validity: prepare_validity(use_validity, capacity), - } - } - - fn to(&mut self) -> Utf8Array { - let validity = std::mem::take(&mut self.validity); - let offsets = std::mem::take(&mut self.offsets); - let values = std::mem::take(&mut self.values); - - #[cfg(debug_assertions)] - { - crate::array::specification::try_check_utf8(offsets.as_slice(), &values).unwrap(); - } - - unsafe { - Utf8Array::::new_unchecked( - self.arrays[0].dtype().clone(), - offsets.into(), - values.into(), - validity.map(|v| v.freeze()), - ) - } - } -} - -impl<'a, O: Offset> Growable<'a> for GrowableUtf8<'a, O> { - unsafe fn extend(&mut self, index: usize, start: usize, len: usize) { - let array = *self.arrays.get_unchecked(index); - extend_validity(&mut self.validity, array, start, len); - - let offsets = array.offsets(); - let values = array.values(); - - self.offsets - .try_extend_from_slice(offsets, start, len) - .unwrap(); - - // values - extend_offset_values::(&mut self.values, offsets.as_slice(), values, start, len); - } - - fn extend_validity(&mut self, additional: usize) { - self.offsets.extend_constant(additional); - if let Some(validity) = &mut self.validity { - validity.extend_constant(additional, false); - } - } - - #[inline] - fn len(&self) -> usize { - self.offsets.len() - 1 - } - - fn as_arc(&mut self) -> Arc { - Arc::new(self.to()) - } - - fn as_box(&mut self) -> Box { - Box::new(self.to()) - } -} - -impl<'a, O: Offset> From> for Utf8Array { - fn from(mut val: GrowableUtf8<'a, O>) -> Self { - val.to() - } -} diff --git a/crates/polars-arrow/src/array/growable/utils.rs b/crates/polars-arrow/src/array/growable/utils.rs deleted file mode 100644 index cb7a8a47fb6f..000000000000 --- a/crates/polars-arrow/src/array/growable/utils.rs +++ /dev/null @@ -1,64 +0,0 @@ -use crate::array::Array; -use crate::bitmap::BitmapBuilder; -use crate::offset::Offset; - -#[inline] -pub(super) unsafe fn extend_offset_values( - buffer: &mut Vec, - offsets: &[O], - values: &[u8], - start: usize, - len: usize, -) { - let start_values = offsets.get_unchecked(start).to_usize(); - let end_values = offsets.get_unchecked(start + len).to_usize(); - let new_values = &values.get_unchecked(start_values..end_values); - buffer.extend_from_slice(new_values); -} - -pub(super) fn prepare_validity(use_validity: bool, capacity: usize) -> Option { - if use_validity { - Some(BitmapBuilder::with_capacity(capacity)) - } else { - None - } -} - -pub(super) fn extend_validity( - mutable_validity: &mut Option, - array: &dyn Array, - start: usize, - len: usize, -) { - if let Some(mutable_validity) = mutable_validity { - match array.validity() { - None => mutable_validity.extend_constant(len, true), - Some(validity) => { - debug_assert!(start + len <= validity.len()); - let (slice, offset, _) = validity.as_slice(); - mutable_validity.extend_from_slice(slice, start + offset, len); - }, - } - } -} - -pub(super) fn extend_validity_copies( - mutable_validity: &mut Option, - array: &dyn Array, - start: usize, - len: usize, - copies: usize, -) { - if let Some(mutable_validity) = mutable_validity { - match array.validity() { - None => mutable_validity.extend_constant(len * copies, true), - Some(validity) => { - debug_assert!(start + len <= validity.len()); - let (slice, offset, _) = validity.as_slice(); - for _ in 0..copies { - mutable_validity.extend_from_slice(slice, start + offset, len); - } - }, - } - } -} diff --git a/crates/polars-arrow/src/array/mod.rs b/crates/polars-arrow/src/array/mod.rs index ff73e1d32e27..8a99a97acd8a 100644 --- a/crates/polars-arrow/src/array/mod.rs +++ b/crates/polars-arrow/src/array/mod.rs @@ -677,30 +677,33 @@ pub mod indexable; pub mod iterator; mod binview; -pub mod growable; mod values; pub use binary::{BinaryArray, BinaryValueIter, MutableBinaryArray, MutableBinaryValuesArray}; pub use binview::{ - validate_utf8_view, BinaryViewArray, BinaryViewArrayGeneric, MutableBinaryViewArray, - MutablePlBinary, MutablePlString, Utf8ViewArray, View, ViewType, + validate_utf8_view, BinaryViewArray, BinaryViewArrayGeneric, BinaryViewArrayGenericBuilder, + MutableBinaryViewArray, MutablePlBinary, MutablePlString, Utf8ViewArray, View, ViewType, }; -pub use boolean::{BooleanArray, MutableBooleanArray}; +pub use boolean::{BooleanArray, BooleanArrayBuilder, MutableBooleanArray}; pub use dictionary::{DictionaryArray, DictionaryKey, MutableDictionaryArray}; pub use equal::equal; -pub use fixed_size_binary::{FixedSizeBinaryArray, MutableFixedSizeBinaryArray}; -pub use fixed_size_list::{FixedSizeListArray, MutableFixedSizeListArray}; +pub use fixed_size_binary::{ + FixedSizeBinaryArray, FixedSizeBinaryArrayBuilder, MutableFixedSizeBinaryArray, +}; +pub use fixed_size_list::{ + FixedSizeListArray, FixedSizeListArrayBuilder, MutableFixedSizeListArray, +}; pub use fmt::{get_display, get_value_display}; pub(crate) use iterator::ArrayAccessor; pub use iterator::ArrayValuesIter; -pub use list::{ListArray, ListValuesIter, MutableListArray}; +pub use list::{ListArray, ListArrayBuilder, ListValuesIter, MutableListArray}; pub use map::MapArray; -pub use null::{MutableNullArray, NullArray}; +pub use null::{MutableNullArray, NullArray, NullArrayBuilder}; use polars_error::PolarsResult; pub use primitive::*; pub use static_array::{ParameterFreeDtypeStaticArray, StaticArray}; pub use static_array_collect::{ArrayCollectIterExt, ArrayFromIter, ArrayFromIterDtype}; -pub use struct_::StructArray; +pub use struct_::{StructArray, StructArrayBuilder}; pub use union::UnionArray; pub use utf8::{MutableUtf8Array, MutableUtf8ValuesArray, Utf8Array, Utf8ValuesIter}; pub use values::ValueSize; diff --git a/crates/polars-arrow/src/array/null.rs b/crates/polars-arrow/src/array/null.rs index d8602e123eec..6ebe926ab1ee 100644 --- a/crates/polars-arrow/src/array/null.rs +++ b/crates/polars-arrow/src/array/null.rs @@ -4,7 +4,7 @@ use polars_error::{polars_bail, PolarsResult}; use polars_utils::IdxSize; use super::Splitable; -use crate::array::builder::{ArrayBuilder, ShareStrategy}; +use crate::array::builder::{ShareStrategy, StaticArrayBuilder}; use crate::array::{Array, FromFfi, MutableArray, ToFfi}; use crate::bitmap::{Bitmap, MutableBitmap}; use crate::datatypes::{ArrowDataType, PhysicalType}; @@ -227,20 +227,22 @@ impl NullArrayBuilder { } } -impl ArrayBuilder for NullArrayBuilder { +impl StaticArrayBuilder for NullArrayBuilder { + type Array = NullArray; + fn dtype(&self) -> &ArrowDataType { &self.dtype } fn reserve(&mut self, _additional: usize) {} - fn freeze(self) -> Box { - NullArray::new(self.dtype, self.length).to_boxed() + fn freeze(self) -> NullArray { + NullArray::new(self.dtype, self.length) } fn subslice_extend( &mut self, - _other: &dyn Array, + _other: &NullArray, _start: usize, length: usize, _share: ShareStrategy, @@ -248,9 +250,20 @@ impl ArrayBuilder for NullArrayBuilder { self.length += length; } + fn subslice_extend_repeated( + &mut self, + _other: &NullArray, + _start: usize, + length: usize, + repeats: usize, + _share: ShareStrategy, + ) { + self.length += length * repeats; + } + unsafe fn gather_extend( &mut self, - _other: &dyn Array, + _other: &NullArray, idxs: &[IdxSize], _share: ShareStrategy, ) { diff --git a/crates/polars-arrow/src/array/static_array.rs b/crates/polars-arrow/src/array/static_array.rs index a79b6a909fe6..a92a80be8954 100644 --- a/crates/polars-arrow/src/array/static_array.rs +++ b/crates/polars-arrow/src/array/static_array.rs @@ -1,8 +1,9 @@ use bytemuck::Zeroable; use polars_utils::no_call_const; -use super::growable::{Growable, GrowableFixedSizeList}; use crate::array::binview::BinaryViewValueIter; +use crate::array::builder::{make_builder, ShareStrategy, StaticArrayBuilder}; +use crate::array::fixed_size_list::FixedSizeListArrayBuilder; use crate::array::static_array_collect::ArrayFromIterDtype; use crate::array::{ Array, ArrayValuesIter, BinaryArray, BinaryValueIter, BinaryViewArray, BooleanArray, @@ -394,10 +395,11 @@ impl StaticArray for FixedSizeListArray { } fn full(length: usize, value: Self::ValueT<'_>, dtype: ArrowDataType) -> Self { - let singular_arr = FixedSizeListArray::new(dtype, 1, value, None); - let mut arr = GrowableFixedSizeList::new(vec![&singular_arr], false, length); - unsafe { arr.extend_copies(0, 0, 1, length) } - arr.into() + let singular_arr = FixedSizeListArray::new(dtype.clone(), 1, value, None); + let inner_dt = dtype.inner_dtype().unwrap(); + let mut builder = FixedSizeListArrayBuilder::new(dtype.clone(), make_builder(inner_dt)); + builder.subslice_extend_repeated(&singular_arr, 0, 1, length, ShareStrategy::Always); + builder.freeze() } } diff --git a/crates/polars-compute/src/filter/mod.rs b/crates/polars-compute/src/filter/mod.rs index 22f0de851ec0..5d89d8ec6a19 100644 --- a/crates/polars-compute/src/filter/mod.rs +++ b/crates/polars-compute/src/filter/mod.rs @@ -6,7 +6,7 @@ mod scalar; #[cfg(all(target_arch = "x86_64", feature = "simd"))] mod avx512; -use arrow::array::growable::make_growable; +use arrow::array::builder::{make_builder, ArrayBuilder, ShareStrategy}; use arrow::array::{ new_empty_array, Array, BinaryViewArray, BooleanArray, PrimitiveArray, Utf8ViewArray, }; @@ -99,11 +99,12 @@ pub fn filter_with_bitmap(array: &dyn Array, mask: &Bitmap) -> Box { }, _ => { let iter = SlicesIterator::new(mask); - let mut mutable = make_growable(&[array], false, iter.slots()); - // SAFETY: - // we are in bounds - iter.for_each(|(start, len)| unsafe { mutable.extend(0, start, len) }); - mutable.as_box() + let mut mutable = make_builder(array.dtype()); + mutable.reserve(iter.slots()); + iter.for_each(|(start, len)| { + mutable.subslice_extend(array, start, len, ShareStrategy::Always) + }); + mutable.freeze() }, } } diff --git a/crates/polars-compute/src/if_then_else/array.rs b/crates/polars-compute/src/if_then_else/array.rs index 67f9b450ec7c..ca33897e2da1 100644 --- a/crates/polars-compute/src/if_then_else/array.rs +++ b/crates/polars-compute/src/if_then_else/array.rs @@ -1,5 +1,5 @@ -use arrow::array::growable::{Growable, GrowableFixedSizeList}; -use arrow::array::{Array, ArrayCollectIterExt, FixedSizeListArray}; +use arrow::array::builder::{make_builder, ShareStrategy, StaticArrayBuilder}; +use arrow::array::{Array, ArrayCollectIterExt, FixedSizeListArray, FixedSizeListArrayBuilder}; use arrow::bitmap::Bitmap; use super::{if_then_else_extend, IfThenElseKernel}; @@ -8,16 +8,17 @@ impl IfThenElseKernel for FixedSizeListArray { type Scalar<'a> = Box; fn if_then_else(mask: &Bitmap, if_true: &Self, if_false: &Self) -> Self { - let mut growable = GrowableFixedSizeList::new(vec![if_true, if_false], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, off, len| g.extend(0, off, len), - |g, off, len| g.extend(1, off, len), - ) - }; - growable.to() + let inner_dt = if_true.dtype().inner_dtype().unwrap(); + let mut builder = + FixedSizeListArrayBuilder::new(if_true.dtype().clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, off, len| b.subslice_extend(if_true, off, len, ShareStrategy::Always), + |b, off, len| b.subslice_extend(if_false, off, len, ShareStrategy::Always), + ); + builder.freeze() } fn if_then_else_broadcast_true( @@ -27,17 +28,17 @@ impl IfThenElseKernel for FixedSizeListArray { ) -> Self { let if_true_list: FixedSizeListArray = std::iter::once(if_true).collect_arr_trusted_with_dtype(if_false.dtype().clone()); - let mut growable = - GrowableFixedSizeList::new(vec![&if_true_list, if_false], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, _, len| g.extend_copies(0, 0, 1, len), - |g, off, len| g.extend(1, off, len), - ) - }; - growable.to() + let inner_dt = if_false.dtype().inner_dtype().unwrap(); + let mut builder = + FixedSizeListArrayBuilder::new(if_false.dtype().clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, _, len| b.subslice_extend_repeated(&if_true_list, 0, 1, len, ShareStrategy::Always), + |b, off, len| b.subslice_extend(if_false, off, len, ShareStrategy::Always), + ); + builder.freeze() } fn if_then_else_broadcast_false( @@ -47,17 +48,19 @@ impl IfThenElseKernel for FixedSizeListArray { ) -> Self { let if_false_list: FixedSizeListArray = std::iter::once(if_false).collect_arr_trusted_with_dtype(if_true.dtype().clone()); - let mut growable = - GrowableFixedSizeList::new(vec![if_true, &if_false_list], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, off, len| g.extend(0, off, len), - |g, _, len| g.extend_copies(1, 0, 1, len), - ) - }; - growable.to() + let inner_dt = if_true.dtype().inner_dtype().unwrap(); + let mut builder = + FixedSizeListArrayBuilder::new(if_true.dtype().clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, off, len| b.subslice_extend(if_true, off, len, ShareStrategy::Always), + |b, _, len| { + b.subslice_extend_repeated(&if_false_list, 0, 1, len, ShareStrategy::Always) + }, + ); + builder.freeze() } fn if_then_else_broadcast_both( @@ -70,16 +73,17 @@ impl IfThenElseKernel for FixedSizeListArray { std::iter::once(if_true).collect_arr_trusted_with_dtype(dtype.clone()); let if_false_list: FixedSizeListArray = std::iter::once(if_false).collect_arr_trusted_with_dtype(dtype.clone()); - let mut growable = - GrowableFixedSizeList::new(vec![&if_true_list, &if_false_list], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, _, len| g.extend_copies(0, 0, 1, len), - |g, _, len| g.extend_copies(1, 0, 1, len), - ) - }; - growable.to() + let inner_dt = dtype.inner_dtype().unwrap(); + let mut builder = FixedSizeListArrayBuilder::new(dtype.clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, _, len| b.subslice_extend_repeated(&if_true_list, 0, 1, len, ShareStrategy::Always), + |b, _, len| { + b.subslice_extend_repeated(&if_false_list, 0, 1, len, ShareStrategy::Always) + }, + ); + builder.freeze() } } diff --git a/crates/polars-compute/src/if_then_else/list.rs b/crates/polars-compute/src/if_then_else/list.rs index 284d6b7f0420..8f0b4dbbceb0 100644 --- a/crates/polars-compute/src/if_then_else/list.rs +++ b/crates/polars-compute/src/if_then_else/list.rs @@ -1,5 +1,5 @@ -use arrow::array::growable::{Growable, GrowableList}; -use arrow::array::{Array, ArrayCollectIterExt, ListArray}; +use arrow::array::builder::{make_builder, ShareStrategy, StaticArrayBuilder}; +use arrow::array::{Array, ArrayCollectIterExt, ListArray, ListArrayBuilder}; use arrow::bitmap::Bitmap; use super::{if_then_else_extend, IfThenElseKernel}; @@ -8,16 +8,16 @@ impl IfThenElseKernel for ListArray { type Scalar<'a> = Box; fn if_then_else(mask: &Bitmap, if_true: &Self, if_false: &Self) -> Self { - let mut growable = GrowableList::new(vec![if_true, if_false], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, off, len| g.extend(0, off, len), - |g, off, len| g.extend(1, off, len), - ) - }; - growable.to() + let inner_dt = if_true.dtype().inner_dtype().unwrap(); + let mut builder = ListArrayBuilder::new(if_true.dtype().clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, off, len| b.subslice_extend(if_true, off, len, ShareStrategy::Always), + |b, off, len| b.subslice_extend(if_false, off, len, ShareStrategy::Always), + ); + builder.freeze() } fn if_then_else_broadcast_true( @@ -27,16 +27,16 @@ impl IfThenElseKernel for ListArray { ) -> Self { let if_true_list: ListArray = std::iter::once(if_true).collect_arr_trusted_with_dtype(if_false.dtype().clone()); - let mut growable = GrowableList::new(vec![&if_true_list, if_false], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, _, len| g.extend_copies(0, 0, 1, len), - |g, off, len| g.extend(1, off, len), - ) - }; - growable.to() + let inner_dt = if_false.dtype().inner_dtype().unwrap(); + let mut builder = ListArrayBuilder::new(if_false.dtype().clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, _, len| b.subslice_extend_repeated(&if_true_list, 0, 1, len, ShareStrategy::Always), + |b, off, len| b.subslice_extend(if_false, off, len, ShareStrategy::Always), + ); + builder.freeze() } fn if_then_else_broadcast_false( @@ -46,16 +46,18 @@ impl IfThenElseKernel for ListArray { ) -> Self { let if_false_list: ListArray = std::iter::once(if_false).collect_arr_trusted_with_dtype(if_true.dtype().clone()); - let mut growable = GrowableList::new(vec![if_true, &if_false_list], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, off, len| g.extend(0, off, len), - |g, _, len| g.extend_copies(1, 0, 1, len), - ) - }; - growable.to() + let inner_dt = if_true.dtype().inner_dtype().unwrap(); + let mut builder = ListArrayBuilder::new(if_true.dtype().clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, off, len| b.subslice_extend(if_true, off, len, ShareStrategy::Always), + |b, _, len| { + b.subslice_extend_repeated(&if_false_list, 0, 1, len, ShareStrategy::Always) + }, + ); + builder.freeze() } fn if_then_else_broadcast_both( @@ -68,16 +70,17 @@ impl IfThenElseKernel for ListArray { std::iter::once(if_true).collect_arr_trusted_with_dtype(dtype.clone()); let if_false_list: ListArray = std::iter::once(if_false).collect_arr_trusted_with_dtype(dtype.clone()); - let mut growable = - GrowableList::new(vec![&if_true_list, &if_false_list], false, mask.len()); - unsafe { - if_then_else_extend( - &mut growable, - mask, - |g, _, len| g.extend_copies(0, 0, 1, len), - |g, _, len| g.extend_copies(1, 0, 1, len), - ) - }; - growable.to() + let inner_dt = dtype.inner_dtype().unwrap(); + let mut builder = ListArrayBuilder::new(dtype.clone(), make_builder(inner_dt)); + builder.reserve(mask.len()); + if_then_else_extend( + &mut builder, + mask, + |b, _, len| b.subslice_extend_repeated(&if_true_list, 0, 1, len, ShareStrategy::Always), + |b, _, len| { + b.subslice_extend_repeated(&if_false_list, 0, 1, len, ShareStrategy::Always) + }, + ); + builder.freeze() } } diff --git a/crates/polars-compute/src/if_then_else/mod.rs b/crates/polars-compute/src/if_then_else/mod.rs index 8265422fb9de..4d3723ae426a 100644 --- a/crates/polars-compute/src/if_then_else/mod.rs +++ b/crates/polars-compute/src/if_then_else/mod.rs @@ -113,8 +113,8 @@ pub fn if_then_else_validity( } } -fn if_then_else_extend( - growable: &mut G, +fn if_then_else_extend( + builder: &mut B, mask: &Bitmap, extend_true: ET, extend_false: EF, @@ -122,13 +122,13 @@ fn if_then_else_extend::from([Some("a"), Some("bc"), None, Some("defh")]); - - let mut a = GrowableBinary::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - assert_eq!(a.len(), 2); - - let result: BinaryArray = a.into(); - - let expected = BinaryArray::::from([Some("bc"), None]); - assert_eq!(result, expected); -} - -/// tests extending from a variable-sized (strings and binary) array -/// with an offset and nulls -#[test] -fn with_offsets() { - let array = BinaryArray::::from([Some("a"), Some("bc"), None, Some("defh")]); - let array = array.sliced(1, 3); - - let mut a = GrowableBinary::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 0, 3); - } - assert_eq!(a.len(), 3); - - let result: BinaryArray = a.into(); - - let expected = BinaryArray::::from([Some("bc"), None, Some("defh")]); - assert_eq!(result, expected); -} - -#[test] -fn test_string_offsets() { - let array = BinaryArray::::from([Some("a"), Some("bc"), None, Some("defh")]); - let array = array.sliced(1, 3); - - let mut a = GrowableBinary::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 0, 3); - } - assert_eq!(a.len(), 3); - - let result: BinaryArray = a.into(); - - let expected = BinaryArray::::from([Some("bc"), None, Some("defh")]); - assert_eq!(result, expected); -} - -#[test] -fn test_multiple_with_validity() { - let array1 = BinaryArray::::from_slice([b"hello", b"world"]); - let array2 = BinaryArray::::from([Some("1"), None]); - - let mut a = GrowableBinary::new(vec![&array1, &array2], false, 5); - - unsafe { - a.extend(0, 0, 2); - } - unsafe { - a.extend(1, 0, 2); - } - assert_eq!(a.len(), 4); - - let result: BinaryArray = a.into(); - - let expected = BinaryArray::::from([Some("hello"), Some("world"), Some("1"), None]); - assert_eq!(result, expected); -} - -#[test] -fn test_string_null_offset_validity() { - let array = BinaryArray::::from([Some("a"), Some("bc"), None, Some("defh")]); - let array = array.sliced(1, 3); - - let mut a = GrowableBinary::new(vec![&array], true, 0); - - unsafe { - a.extend(0, 1, 2); - } - a.extend_validity(1); - assert_eq!(a.len(), 3); - - let result: BinaryArray = a.into(); - - let expected = BinaryArray::::from([None, Some("defh"), None]); - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/growable/boolean.rs b/crates/polars/tests/it/arrow/array/growable/boolean.rs deleted file mode 100644 index b6721029cb81..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/boolean.rs +++ /dev/null @@ -1,19 +0,0 @@ -use arrow::array::growable::{Growable, GrowableBoolean}; -use arrow::array::BooleanArray; - -#[test] -fn test_bool() { - let array = BooleanArray::from(vec![Some(false), Some(true), None, Some(false)]); - - let mut a = GrowableBoolean::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - assert_eq!(a.len(), 2); - - let result: BooleanArray = a.into(); - - let expected = BooleanArray::from(vec![Some(true), None]); - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/growable/dictionary.rs b/crates/polars/tests/it/arrow/array/growable/dictionary.rs deleted file mode 100644 index c84d95113d7c..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/dictionary.rs +++ /dev/null @@ -1,72 +0,0 @@ -use arrow::array::growable::{Growable, GrowableDictionary}; -use arrow::array::*; -use polars_error::PolarsResult; - -#[test] -fn test_single() -> PolarsResult<()> { - let original_data = vec![Some("a"), Some("b"), Some("a")]; - - let data = original_data.clone(); - let mut array = MutableDictionaryArray::>::new(); - array.try_extend(data)?; - let array = array.into(); - - // same values, less keys - let expected = DictionaryArray::try_from_keys( - PrimitiveArray::from_vec(vec![1, 0]), - Box::new(Utf8ViewArray::from_slice(&original_data)), - ) - .unwrap(); - - let mut growable = GrowableDictionary::new(&[&array], false, 0); - - unsafe { - growable.extend(0, 1, 2); - } - assert_eq!(growable.len(), 2); - - let result: DictionaryArray = growable.into(); - - assert_eq!(result, expected); - Ok(()) -} - -#[test] -fn test_multi() -> PolarsResult<()> { - let mut original_data1 = vec![Some("a"), Some("b"), None, Some("a")]; - let original_data2 = vec![Some("c"), Some("b"), None, Some("a")]; - - let data1 = original_data1.clone(); - let data2 = original_data2.clone(); - - let mut array1 = MutableDictionaryArray::>::new(); - array1.try_extend(data1)?; - let array1: DictionaryArray = array1.into(); - - let mut array2 = MutableDictionaryArray::>::new(); - array2.try_extend(data2)?; - let array2: DictionaryArray = array2.into(); - - // same values, less keys - original_data1.extend(original_data2.iter().cloned()); - let expected = DictionaryArray::try_from_keys( - PrimitiveArray::from(&[Some(1), None, Some(3), None]), - Utf8ViewArray::from_slice_values(["a", "b", "c", "b", "a"]).boxed(), - ) - .unwrap(); - - let mut growable = GrowableDictionary::new(&[&array1, &array2], false, 0); - - unsafe { - growable.extend(0, 1, 2); - } - unsafe { - growable.extend(1, 1, 2); - } - assert_eq!(growable.len(), 4); - - let result: DictionaryArray = growable.into(); - - assert_eq!(result, expected); - Ok(()) -} diff --git a/crates/polars/tests/it/arrow/array/growable/fixed_binary.rs b/crates/polars/tests/it/arrow/array/growable/fixed_binary.rs deleted file mode 100644 index 9ebb631f682c..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/fixed_binary.rs +++ /dev/null @@ -1,146 +0,0 @@ -use arrow::array::growable::{Growable, GrowableFixedSizeBinary}; -use arrow::array::FixedSizeBinaryArray; - -/// tests extending from a variable-sized (strings and binary) array w/ offset with nulls -#[test] -fn basic() { - let array = - FixedSizeBinaryArray::from_iter(vec![Some(b"ab"), Some(b"bc"), None, Some(b"de")], 2); - - let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - assert_eq!(a.len(), 2); - - let result: FixedSizeBinaryArray = a.into(); - - let expected = FixedSizeBinaryArray::from_iter(vec![Some("bc"), None], 2); - assert_eq!(result, expected); -} - -/// tests extending from a variable-sized (strings and binary) array -/// with an offset and nulls -#[test] -fn offsets() { - let array = - FixedSizeBinaryArray::from_iter(vec![Some(b"ab"), Some(b"bc"), None, Some(b"fh")], 2); - let array = array.sliced(1, 3); - - let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 0, 3); - } - assert_eq!(a.len(), 3); - - let result: FixedSizeBinaryArray = a.into(); - - let expected = FixedSizeBinaryArray::from_iter(vec![Some(b"bc"), None, Some(b"fh")], 2); - assert_eq!(result, expected); -} - -#[test] -fn multiple_with_validity() { - let array1 = FixedSizeBinaryArray::from_iter(vec![Some("hello"), Some("world")], 5); - let array2 = FixedSizeBinaryArray::from_iter(vec![Some("12345"), None], 5); - - let mut a = GrowableFixedSizeBinary::new(vec![&array1, &array2], false, 5); - - unsafe { - a.extend(0, 0, 2); - } - unsafe { - a.extend(1, 0, 2); - } - assert_eq!(a.len(), 4); - - let result: FixedSizeBinaryArray = a.into(); - - let expected = - FixedSizeBinaryArray::from_iter(vec![Some("hello"), Some("world"), Some("12345"), None], 5); - assert_eq!(result, expected); -} - -#[test] -fn null_offset_validity() { - let array = FixedSizeBinaryArray::from_iter(vec![Some("aa"), Some("bc"), None, Some("fh")], 2); - let array = array.sliced(1, 3); - - let mut a = GrowableFixedSizeBinary::new(vec![&array], true, 0); - - unsafe { - a.extend(0, 1, 2); - } - a.extend_validity(1); - assert_eq!(a.len(), 3); - - let result: FixedSizeBinaryArray = a.into(); - - let expected = FixedSizeBinaryArray::from_iter(vec![None, Some("fh"), None], 2); - assert_eq!(result, expected); -} - -#[test] -fn sized_offsets() { - let array = - FixedSizeBinaryArray::from_iter(vec![Some(&[0, 0]), Some(&[0, 1]), Some(&[0, 2])], 2); - let array = array.sliced(1, 2); - // = [[0, 1], [0, 2]] due to the offset = 1 - - let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 1); - } - unsafe { - a.extend(0, 0, 1); - } - assert_eq!(a.len(), 2); - - let result: FixedSizeBinaryArray = a.into(); - - let expected = FixedSizeBinaryArray::from_iter(vec![Some(&[0, 2]), Some(&[0, 1])], 2); - assert_eq!(result, expected); -} - -/// to, as_box, as_arc -#[test] -fn as_box() { - let array = - FixedSizeBinaryArray::from_iter(vec![Some(b"ab"), Some(b"bc"), None, Some(b"de")], 2); - let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); - unsafe { - a.extend(0, 1, 2); - } - - let result = a.as_box(); - let result = result - .as_any() - .downcast_ref::() - .unwrap(); - - let expected = FixedSizeBinaryArray::from_iter(vec![Some("bc"), None], 2); - assert_eq!(&expected, result); -} - -/// as_arc -#[test] -fn as_arc() { - let array = - FixedSizeBinaryArray::from_iter(vec![Some(b"ab"), Some(b"bc"), None, Some(b"de")], 2); - let mut a = GrowableFixedSizeBinary::new(vec![&array], false, 0); - unsafe { - a.extend(0, 1, 2); - } - - let result = a.as_arc(); - let result = result - .as_any() - .downcast_ref::() - .unwrap(); - - let expected = FixedSizeBinaryArray::from_iter(vec![Some("bc"), None], 2); - assert_eq!(&expected, result); -} diff --git a/crates/polars/tests/it/arrow/array/growable/fixed_size_list.rs b/crates/polars/tests/it/arrow/array/growable/fixed_size_list.rs deleted file mode 100644 index dcdc25d1bda9..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/fixed_size_list.rs +++ /dev/null @@ -1,95 +0,0 @@ -use arrow::array::growable::{Growable, GrowableFixedSizeList}; -use arrow::array::{ - FixedSizeListArray, MutableFixedSizeListArray, MutablePrimitiveArray, TryExtend, -}; - -fn create_list_array(data: Vec>>>) -> FixedSizeListArray { - let mut array = MutableFixedSizeListArray::new(MutablePrimitiveArray::::new(), 3); - array.try_extend(data).unwrap(); - array.into() -} - -#[test] -fn basic() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - Some(vec![Some(4), Some(5), Some(6)]), - Some(vec![Some(7i32), Some(8), Some(9)]), - ]; - - let array = create_list_array(data); - - let mut a = GrowableFixedSizeList::new(vec![&array], false, 0); - unsafe { - a.extend(0, 0, 1); - } - assert_eq!(a.len(), 1); - - let result: FixedSizeListArray = a.into(); - - let expected = vec![Some(vec![Some(1i32), Some(2), Some(3)])]; - let expected = create_list_array(expected); - - assert_eq!(result, expected) -} - -#[test] -fn null_offset() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(6i32), Some(7), Some(8)]), - ]; - let array = create_list_array(data); - let array = array.sliced(1, 2); - - let mut a = GrowableFixedSizeList::new(vec![&array], false, 0); - unsafe { - a.extend(0, 1, 1); - } - assert_eq!(a.len(), 1); - - let result: FixedSizeListArray = a.into(); - - let expected = vec![Some(vec![Some(6i32), Some(7), Some(8)])]; - let expected = create_list_array(expected); - - assert_eq!(result, expected) -} - -#[test] -fn test_from_two_lists() { - let data_1 = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(6i32), None, Some(8)]), - ]; - let array_1 = create_list_array(data_1); - - let data_2 = vec![ - Some(vec![Some(8i32), Some(7), Some(6)]), - Some(vec![Some(5i32), None, Some(4)]), - Some(vec![Some(2i32), Some(1), Some(0)]), - ]; - let array_2 = create_list_array(data_2); - - let mut a = GrowableFixedSizeList::new(vec![&array_1, &array_2], false, 6); - unsafe { - a.extend(0, 0, 2); - } - unsafe { - a.extend(1, 1, 1); - } - assert_eq!(a.len(), 3); - - let result: FixedSizeListArray = a.into(); - - let expected = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(5i32), None, Some(4)]), - ]; - let expected = create_list_array(expected); - - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/growable/list.rs b/crates/polars/tests/it/arrow/array/growable/list.rs deleted file mode 100644 index 6361623d2ee6..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/list.rs +++ /dev/null @@ -1,150 +0,0 @@ -use arrow::array::growable::{Growable, GrowableList}; -use arrow::array::{Array, ListArray, MutableListArray, MutablePrimitiveArray, TryExtend}; -use arrow::datatypes::{ArrowDataType, ExtensionType}; - -fn create_list_array(data: Vec>>>) -> ListArray { - let mut array = MutableListArray::>::new(); - array.try_extend(data).unwrap(); - array.into() -} - -#[test] -fn extension() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - Some(vec![Some(4), Some(5)]), - Some(vec![Some(6i32), Some(7), Some(8)]), - ]; - - let array = create_list_array(data); - - let dtype = ArrowDataType::Extension(Box::new(ExtensionType { - name: "ext".into(), - inner: array.dtype().clone(), - metadata: None, - })); - let array_ext = ListArray::new( - dtype, - array.offsets().clone(), - array.values().clone(), - array.validity().cloned(), - ); - - let mut a = GrowableList::new(vec![&array_ext], false, 0); - unsafe { - a.extend(0, 0, 1); - } - assert_eq!(a.len(), 1); - - let result: ListArray = a.into(); - assert_eq!(array_ext.dtype(), result.dtype()); -} - -#[test] -fn basic() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - Some(vec![Some(4), Some(5)]), - Some(vec![Some(6i32), Some(7), Some(8)]), - ]; - - let array = create_list_array(data); - - let mut a = GrowableList::new(vec![&array], false, 0); - unsafe { - a.extend(0, 0, 1); - } - assert_eq!(a.len(), 1); - - let result: ListArray = a.into(); - - let expected = vec![Some(vec![Some(1i32), Some(2), Some(3)])]; - let expected = create_list_array(expected); - - assert_eq!(result, expected) -} - -#[test] -fn null_offset() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(6i32), Some(7), Some(8)]), - ]; - let array = create_list_array(data); - let array = array.sliced(1, 2); - - let mut a = GrowableList::new(vec![&array], false, 0); - unsafe { - a.extend(0, 1, 1); - } - assert_eq!(a.len(), 1); - - let result: ListArray = a.into(); - - let expected = vec![Some(vec![Some(6i32), Some(7), Some(8)])]; - let expected = create_list_array(expected); - - assert_eq!(result, expected) -} - -#[test] -fn null_offsets() { - let data = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(6i32), None, Some(8)]), - ]; - let array = create_list_array(data); - let array = array.sliced(1, 2); - - let mut a = GrowableList::new(vec![&array], false, 0); - unsafe { - a.extend(0, 1, 1); - } - assert_eq!(a.len(), 1); - - let result: ListArray = a.into(); - - let expected = vec![Some(vec![Some(6i32), None, Some(8)])]; - let expected = create_list_array(expected); - - assert_eq!(result, expected) -} - -#[test] -fn test_from_two_lists() { - let data_1 = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(6i32), None, Some(8)]), - ]; - let array_1 = create_list_array(data_1); - - let data_2 = vec![ - Some(vec![Some(8i32), Some(7), Some(6)]), - Some(vec![Some(5i32), None, Some(4)]), - Some(vec![Some(2i32), Some(1), Some(0)]), - ]; - let array_2 = create_list_array(data_2); - - let mut a = GrowableList::new(vec![&array_1, &array_2], false, 6); - unsafe { - a.extend(0, 0, 2); - } - unsafe { - a.extend(1, 1, 1); - } - assert_eq!(a.len(), 3); - - let result: ListArray = a.into(); - - let expected = vec![ - Some(vec![Some(1i32), Some(2), Some(3)]), - None, - Some(vec![Some(5i32), None, Some(4)]), - ]; - let expected = create_list_array(expected); - - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/growable/mod.rs b/crates/polars/tests/it/arrow/array/growable/mod.rs deleted file mode 100644 index 20e3e53954b7..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/mod.rs +++ /dev/null @@ -1,69 +0,0 @@ -mod binary; -mod boolean; -mod dictionary; -mod fixed_binary; -mod fixed_size_list; -mod list; -mod null; -mod primitive; -mod struct_; -mod utf8; - -use arrow::array::growable::make_growable; -use arrow::array::*; -use arrow::datatypes::{ArrowDataType, ExtensionType, Field}; - -#[test] -fn test_make_growable() { - let array = Int32Array::from_slice([1, 2]); - make_growable(&[&array], false, 2); - - let array = BinaryArray::::from_slice([b"a".as_ref(), b"aa".as_ref()]); - make_growable(&[&array], false, 2); - - let array = BinaryArray::::from_slice([b"a".as_ref(), b"aa".as_ref()]); - make_growable(&[&array], false, 2); - - let array = BinaryArray::::from_slice([b"a".as_ref(), b"aa".as_ref()]); - make_growable(&[&array], false, 2); - - let array = FixedSizeBinaryArray::new( - ArrowDataType::FixedSizeBinary(2), - b"abcd".to_vec().into(), - None, - ); - make_growable(&[&array], false, 2); -} - -#[test] -fn test_make_growable_extension() { - let array = DictionaryArray::try_from_keys( - Int32Array::from_slice([1, 0]), - Int32Array::from_slice([1, 2]).boxed(), - ) - .unwrap(); - make_growable(&[&array], false, 2); - - let dtype = ArrowDataType::Extension(Box::new(ExtensionType { - name: "ext".into(), - inner: ArrowDataType::Int32, - metadata: None, - })); - let array = Int32Array::from_slice([1, 2]).to(dtype.clone()); - let array_grown = make_growable(&[&array], false, 2).as_box(); - assert_eq!(array_grown.dtype(), &dtype); - - let dtype = ArrowDataType::Extension(Box::new(ExtensionType { - name: "ext".into(), - inner: ArrowDataType::Struct(vec![Field::new("a".into(), ArrowDataType::Int32, false)]), - metadata: None, - })); - let array = StructArray::new( - dtype.clone(), - 2, - vec![Int32Array::from_slice([1, 2]).boxed()], - None, - ); - let array_grown = make_growable(&[&array], false, 2).as_box(); - assert_eq!(array_grown.dtype(), &dtype); -} diff --git a/crates/polars/tests/it/arrow/array/growable/null.rs b/crates/polars/tests/it/arrow/array/growable/null.rs deleted file mode 100644 index 2d6a118a117c..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/null.rs +++ /dev/null @@ -1,21 +0,0 @@ -use arrow::array::growable::{Growable, GrowableNull}; -use arrow::array::NullArray; -use arrow::datatypes::ArrowDataType; - -#[test] -fn null() { - let mut mutable = GrowableNull::default(); - - unsafe { - mutable.extend(0, 1, 2); - } - unsafe { - mutable.extend(1, 0, 1); - } - assert_eq!(mutable.len(), 3); - - let result: NullArray = mutable.into(); - - let expected = NullArray::new(ArrowDataType::Null, 3); - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/growable/primitive.rs b/crates/polars/tests/it/arrow/array/growable/primitive.rs deleted file mode 100644 index 37c105f2c728..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/primitive.rs +++ /dev/null @@ -1,82 +0,0 @@ -use arrow::array::growable::{Growable, GrowablePrimitive}; -use arrow::array::PrimitiveArray; - -/// tests extending from a primitive array w/ offset nor nulls -#[test] -fn basics() { - let b = PrimitiveArray::::from(vec![Some(1), Some(2), Some(3)]); - let mut a = GrowablePrimitive::new(vec![&b], false, 3); - unsafe { - a.extend(0, 0, 2); - } - assert_eq!(a.len(), 2); - let result: PrimitiveArray = a.into(); - let expected = PrimitiveArray::::from(vec![Some(1), Some(2)]); - assert_eq!(result, expected); -} - -/// tests extending from a primitive array with offset w/ nulls -#[test] -fn offset() { - let b = PrimitiveArray::::from(vec![Some(1), Some(2), Some(3)]); - let b = b.sliced(1, 2); - let mut a = GrowablePrimitive::new(vec![&b], false, 2); - unsafe { - a.extend(0, 0, 2); - } - assert_eq!(a.len(), 2); - let result: PrimitiveArray = a.into(); - let expected = PrimitiveArray::::from(vec![Some(2), Some(3)]); - assert_eq!(result, expected); -} - -/// tests extending from a primitive array with offset and nulls -#[test] -fn null_offset() { - let b = PrimitiveArray::::from(vec![Some(1), None, Some(3)]); - let b = b.sliced(1, 2); - let mut a = GrowablePrimitive::new(vec![&b], false, 2); - unsafe { - a.extend(0, 0, 2); - } - assert_eq!(a.len(), 2); - let result: PrimitiveArray = a.into(); - let expected = PrimitiveArray::::from(vec![None, Some(3)]); - assert_eq!(result, expected); -} - -#[test] -fn null_offset_validity() { - let b = PrimitiveArray::::from(&[Some(1), Some(2), Some(3)]); - let b = b.sliced(1, 2); - let mut a = GrowablePrimitive::new(vec![&b], true, 2); - unsafe { - a.extend(0, 0, 2); - } - a.extend_validity(3); - unsafe { - a.extend(0, 1, 1); - } - assert_eq!(a.len(), 6); - let result: PrimitiveArray = a.into(); - let expected = PrimitiveArray::::from(&[Some(2), Some(3), None, None, None, Some(3)]); - assert_eq!(result, expected); -} - -#[test] -fn joining_arrays() { - let b = PrimitiveArray::::from(&[Some(1), Some(2), Some(3)]); - let c = PrimitiveArray::::from(&[Some(4), Some(5), Some(6)]); - let mut a = GrowablePrimitive::new(vec![&b, &c], false, 4); - unsafe { - a.extend(0, 0, 2); - } - unsafe { - a.extend(1, 1, 2); - } - assert_eq!(a.len(), 4); - let result: PrimitiveArray = a.into(); - - let expected = PrimitiveArray::::from(&[Some(1), Some(2), Some(5), Some(6)]); - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/growable/struct_.rs b/crates/polars/tests/it/arrow/array/growable/struct_.rs deleted file mode 100644 index 2749fa88bb1c..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/struct_.rs +++ /dev/null @@ -1,145 +0,0 @@ -use arrow::array::growable::{Growable, GrowableStruct}; -use arrow::array::{Array, PrimitiveArray, StructArray, Utf8ViewArray}; -use arrow::bitmap::Bitmap; -use arrow::datatypes::{ArrowDataType, Field}; - -fn some_values() -> (ArrowDataType, Vec>) { - let strings: Box = Box::new(Utf8ViewArray::from_slice([ - Some("a"), - Some("aa"), - None, - Some("mark"), - Some("doe"), - ])); - let ints: Box = Box::new(PrimitiveArray::::from(&[ - Some(1), - Some(2), - Some(3), - Some(4), - Some(5), - ])); - let fields = vec![ - Field::new("f1".into(), ArrowDataType::Utf8View, true), - Field::new("f2".into(), ArrowDataType::Int32, true), - ]; - (ArrowDataType::Struct(fields), vec![strings, ints]) -} - -#[test] -fn basic() { - let (fields, values) = some_values(); - - let array = StructArray::new(fields.clone(), values[0].len(), values.clone(), None); - - let mut a = GrowableStruct::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - assert_eq!(a.len(), 2); - let result: StructArray = a.into(); - - let expected = StructArray::new( - fields, - 2, - vec![values[0].sliced(1, 2), values[1].sliced(1, 2)], - None, - ); - assert_eq!(result, expected) -} - -#[test] -fn offset() { - let (fields, values) = some_values(); - - let array = - StructArray::new(fields.clone(), values[0].len(), values.clone(), None).sliced(1, 3); - - let mut a = GrowableStruct::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - assert_eq!(a.len(), 2); - let result: StructArray = a.into(); - - let expected = StructArray::new( - fields, - 2, - vec![values[0].sliced(2, 2), values[1].sliced(2, 2)], - None, - ); - - assert_eq!(result, expected); -} - -#[test] -fn nulls() { - let (fields, values) = some_values(); - - let array = StructArray::new( - fields.clone(), - values[0].len(), - values.clone(), - Some(Bitmap::from_u8_slice([0b00000010], 5)), - ); - - let mut a = GrowableStruct::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - assert_eq!(a.len(), 2); - let result: StructArray = a.into(); - - let expected = StructArray::new( - fields, - 2, - vec![values[0].sliced(1, 2), values[1].sliced(1, 2)], - Some(Bitmap::from_u8_slice([0b00000010], 5).sliced(1, 2)), - ); - - assert_eq!(result, expected) -} - -#[test] -fn many() { - let (fields, values) = some_values(); - - let array = StructArray::new(fields.clone(), values[0].len(), values.clone(), None); - - let mut mutable = GrowableStruct::new(vec![&array, &array], true, 0); - - unsafe { - mutable.extend(0, 1, 2); - } - unsafe { - mutable.extend(1, 0, 2); - } - mutable.extend_validity(1); - assert_eq!(mutable.len(), 5); - let result = mutable.as_box(); - - let expected_string: Box = Box::new(Utf8ViewArray::from_slice([ - Some("aa"), - None, - Some("a"), - Some("aa"), - None, - ])); - let expected_int: Box = Box::new(PrimitiveArray::::from(vec![ - Some(2), - Some(3), - Some(1), - Some(2), - None, - ])); - - let expected = StructArray::new( - fields, - expected_string.len(), - vec![expected_string, expected_int], - Some(Bitmap::from([true, true, true, true, false])), - ); - assert_eq!(expected, result.as_ref()) -} diff --git a/crates/polars/tests/it/arrow/array/growable/utf8.rs b/crates/polars/tests/it/arrow/array/growable/utf8.rs deleted file mode 100644 index af2be2ab9867..000000000000 --- a/crates/polars/tests/it/arrow/array/growable/utf8.rs +++ /dev/null @@ -1,97 +0,0 @@ -use arrow::array::growable::{Growable, GrowableUtf8}; -use arrow::array::Utf8Array; - -/// tests extending from a variable-sized (strings and binary) array w/ offset with nulls -#[test] -fn validity() { - let array = Utf8Array::::from([Some("a"), Some("bc"), None, Some("defh")]); - - let mut a = GrowableUtf8::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 1, 2); - } - - let result: Utf8Array = a.into(); - - let expected = Utf8Array::::from([Some("bc"), None]); - assert_eq!(result, expected); -} - -/// tests extending from a variable-sized (strings and binary) array -/// with an offset and nulls -#[test] -fn offsets() { - let array = Utf8Array::::from([Some("a"), Some("bc"), None, Some("defh")]); - let array = array.sliced(1, 3); - - let mut a = GrowableUtf8::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 0, 3); - } - assert_eq!(a.len(), 3); - - let result: Utf8Array = a.into(); - - let expected = Utf8Array::::from([Some("bc"), None, Some("defh")]); - assert_eq!(result, expected); -} - -#[test] -fn offsets2() { - let array = Utf8Array::::from([Some("a"), Some("bc"), None, Some("defh")]); - let array = array.sliced(1, 3); - - let mut a = GrowableUtf8::new(vec![&array], false, 0); - - unsafe { - a.extend(0, 0, 3); - } - assert_eq!(a.len(), 3); - - let result: Utf8Array = a.into(); - - let expected = Utf8Array::::from([Some("bc"), None, Some("defh")]); - assert_eq!(result, expected); -} - -#[test] -fn multiple_with_validity() { - let array1 = Utf8Array::::from_slice(["hello", "world"]); - let array2 = Utf8Array::::from([Some("1"), None]); - - let mut a = GrowableUtf8::new(vec![&array1, &array2], false, 5); - - unsafe { - a.extend(0, 0, 2); - } - unsafe { - a.extend(1, 0, 2); - } - assert_eq!(a.len(), 4); - - let result: Utf8Array = a.into(); - - let expected = Utf8Array::::from([Some("hello"), Some("world"), Some("1"), None]); - assert_eq!(result, expected); -} - -#[test] -fn null_offset_validity() { - let array = Utf8Array::::from([Some("a"), Some("bc"), None, Some("defh")]); - let array = array.sliced(1, 3); - - let mut a = GrowableUtf8::new(vec![&array], true, 0); - - unsafe { - a.extend(0, 1, 2); - } - a.extend_validity(1); - assert_eq!(a.len(), 3); - - let result: Utf8Array = a.into(); - - let expected = Utf8Array::::from([None, Some("defh"), None]); - assert_eq!(result, expected); -} diff --git a/crates/polars/tests/it/arrow/array/mod.rs b/crates/polars/tests/it/arrow/array/mod.rs index 6c1e6f672b60..c96d9a903b5a 100644 --- a/crates/polars/tests/it/arrow/array/mod.rs +++ b/crates/polars/tests/it/arrow/array/mod.rs @@ -5,7 +5,6 @@ mod dictionary; mod equal; mod fixed_size_binary; mod fixed_size_list; -mod growable; mod list; mod map; mod primitive;