From 567ff76831a8ce666fa800c138bb3f49767b32b4 Mon Sep 17 00:00:00 2001 From: Orson Peters Date: Tue, 4 Mar 2025 15:33:03 +0100 Subject: [PATCH] refactor(rust): Add freeze_reset to the builders --- .../polars-arrow/src/array/binview/builder.rs | 28 +++++++++++++++++++ .../polars-arrow/src/array/boolean/builder.rs | 6 ++++ crates/polars-arrow/src/array/builder.rs | 16 +++++++++++ .../src/array/fixed_size_binary/builder.rs | 10 +++++++ .../src/array/fixed_size_list/builder.rs | 8 ++++++ crates/polars-arrow/src/array/list/builder.rs | 7 +++++ crates/polars-arrow/src/array/null.rs | 6 ++++ .../src/array/primitive/builder.rs | 6 ++++ .../polars-arrow/src/array/struct_/builder.rs | 12 ++++++++ crates/polars-core/src/frame/builder.rs | 19 +++++++++++++ crates/polars-core/src/series/builder.rs | 10 +++++++ 11 files changed, 128 insertions(+) diff --git a/crates/polars-arrow/src/array/binview/builder.rs b/crates/polars-arrow/src/array/binview/builder.rs index 6132534ace3e..b552a351a036 100644 --- a/crates/polars-arrow/src/array/binview/builder.rs +++ b/crates/polars-arrow/src/array/binview/builder.rs @@ -209,6 +209,34 @@ impl StaticArrayBuilder for BinaryViewArrayGenericBuilder< } } + fn freeze_reset(&mut self) -> Self::Array { + // Flush active buffer and/or remove extra placeholder buffer. + if !self.active_buffer.is_empty() { + self.buffer_set[self.active_buffer_idx as usize] = + Buffer::from(core::mem::take(&mut self.active_buffer)); + } else if self.buffer_set.last().is_some_and(|b| b.is_empty()) { + self.buffer_set.pop(); + } + + let out = unsafe { + BinaryViewArrayGeneric::new_unchecked( + self.dtype.clone(), + Buffer::from(core::mem::take(&mut self.views)), + Arc::from(core::mem::take(&mut self.buffer_set)), + core::mem::take(&mut self.validity).into_opt_validity(), + self.total_bytes_len, + self.total_buffer_len, + ) + }; + + self.total_buffer_len = 0; + self.total_bytes_len = 0; + self.active_buffer_idx = 0; + self.stolen_buffers.clear(); + self.last_buffer_set_stolen_from = None; + out + } + fn len(&self) -> usize { self.views.len() } diff --git a/crates/polars-arrow/src/array/boolean/builder.rs b/crates/polars-arrow/src/array/boolean/builder.rs index 0118bceae0df..38919020b355 100644 --- a/crates/polars-arrow/src/array/boolean/builder.rs +++ b/crates/polars-arrow/src/array/boolean/builder.rs @@ -39,6 +39,12 @@ impl StaticArrayBuilder for BooleanArrayBuilder { BooleanArray::try_new(self.dtype, values, validity).unwrap() } + fn freeze_reset(&mut self) -> Self::Array { + let values = core::mem::take(&mut self.values).freeze(); + let validity = core::mem::take(&mut self.validity).into_opt_validity(); + BooleanArray::try_new(self.dtype.clone(), values, validity).unwrap() + } + fn len(&self) -> usize { self.values.len() } diff --git a/crates/polars-arrow/src/array/builder.rs b/crates/polars-arrow/src/array/builder.rs index 2861632366da..bd27022257e3 100644 --- a/crates/polars-arrow/src/array/builder.rs +++ b/crates/polars-arrow/src/array/builder.rs @@ -28,6 +28,9 @@ pub trait StaticArrayBuilder { /// Consume this builder returning the built array. fn freeze(self) -> Self::Array; + /// Return the built array and reset to an empty state. + fn freeze_reset(&mut self) -> Self::Array; + /// Returns the length of this builder (so far). fn len(&self) -> usize; @@ -96,6 +99,11 @@ impl ArrayBuilder for T { Box::new(StaticArrayBuilder::freeze(self)) } + #[inline(always)] + fn freeze_reset(&mut self) -> Box { + Box::new(StaticArrayBuilder::freeze_reset(self)) + } + #[inline(always)] fn len(&self) -> usize { StaticArrayBuilder::len(self) @@ -152,6 +160,9 @@ pub trait ArrayBuilder: ArrayBuilderBoxedHelper { /// Consume this builder returning the built array. fn freeze(self) -> Box; + /// Return the built array and reset to an empty state. + fn freeze_reset(&mut self) -> Box; + /// Returns the length of this builder (so far). fn len(&self) -> usize; @@ -227,6 +238,11 @@ impl ArrayBuilder for Box { self.freeze_boxed() } + #[inline(always)] + fn freeze_reset(&mut self) -> Box { + (**self).freeze_reset() + } + #[inline(always)] fn len(&self) -> usize { (**self).len() diff --git a/crates/polars-arrow/src/array/fixed_size_binary/builder.rs b/crates/polars-arrow/src/array/fixed_size_binary/builder.rs index 68371201bc89..edb1b6246c7f 100644 --- a/crates/polars-arrow/src/array/fixed_size_binary/builder.rs +++ b/crates/polars-arrow/src/array/fixed_size_binary/builder.rs @@ -48,6 +48,16 @@ impl StaticArrayBuilder for FixedSizeBinaryArrayBuilder { FixedSizeBinaryArray::new(self.dtype, values, validity) } + fn freeze_reset(&mut self) -> Self::Array { + // TODO: FixedSizeBinaryArray should track its own length to be correct + // for size-0 inner. + let values = Buffer::from(core::mem::take(&mut self.values)); + let validity = core::mem::take(&mut self.validity).into_opt_validity(); + let out = FixedSizeBinaryArray::new(self.dtype.clone(), values, validity); + self.length = 0; + out + } + fn len(&self) -> usize { self.length } diff --git a/crates/polars-arrow/src/array/fixed_size_list/builder.rs b/crates/polars-arrow/src/array/fixed_size_list/builder.rs index ac10fba0f030..7f06172b9fb0 100644 --- a/crates/polars-arrow/src/array/fixed_size_list/builder.rs +++ b/crates/polars-arrow/src/array/fixed_size_list/builder.rs @@ -42,6 +42,14 @@ impl StaticArrayBuilder for FixedSizeListArrayBuilder { FixedSizeListArray::new(self.dtype, self.length, values, validity) } + fn freeze_reset(&mut self) -> Self::Array { + let values = self.inner_builder.freeze_reset(); + let validity = core::mem::take(&mut self.validity).into_opt_validity(); + let out = FixedSizeListArray::new(self.dtype.clone(), self.length, values, validity); + self.length = 0; + out + } + fn len(&self) -> usize { self.length } diff --git a/crates/polars-arrow/src/array/list/builder.rs b/crates/polars-arrow/src/array/list/builder.rs index 6098243f3446..aba3315a7a53 100644 --- a/crates/polars-arrow/src/array/list/builder.rs +++ b/crates/polars-arrow/src/array/list/builder.rs @@ -45,6 +45,13 @@ impl StaticArrayBuilder for ListArrayBuilder { ListArray::new(self.dtype, offsets, values, validity) } + fn freeze_reset(&mut self) -> Self::Array { + let offsets = OffsetsBuffer::from(core::mem::take(&mut self.offsets)); + let values = self.inner_builder.freeze_reset(); + let validity = core::mem::take(&mut self.validity).into_opt_validity(); + ListArray::new(self.dtype.clone(), offsets, values, validity) + } + fn len(&self) -> usize { self.offsets.len() } diff --git a/crates/polars-arrow/src/array/null.rs b/crates/polars-arrow/src/array/null.rs index d878c5304ef2..72ce92b77619 100644 --- a/crates/polars-arrow/src/array/null.rs +++ b/crates/polars-arrow/src/array/null.rs @@ -240,6 +240,12 @@ impl StaticArrayBuilder for NullArrayBuilder { NullArray::new(self.dtype, self.length) } + fn freeze_reset(&mut self) -> Self::Array { + let out = NullArray::new(self.dtype.clone(), self.length); + self.length = 0; + out + } + fn len(&self) -> usize { self.length } diff --git a/crates/polars-arrow/src/array/primitive/builder.rs b/crates/polars-arrow/src/array/primitive/builder.rs index 83a0000286be..82177674926f 100644 --- a/crates/polars-arrow/src/array/primitive/builder.rs +++ b/crates/polars-arrow/src/array/primitive/builder.rs @@ -42,6 +42,12 @@ impl StaticArrayBuilder for PrimitiveArrayBuilder { PrimitiveArray::new(self.dtype, values, validity) } + fn freeze_reset(&mut self) -> Self::Array { + let values = Buffer::from(core::mem::take(&mut self.values)); + let validity = core::mem::take(&mut self.validity).into_opt_validity(); + PrimitiveArray::new(self.dtype.clone(), values, validity) + } + fn len(&self) -> usize { self.values.len() } diff --git a/crates/polars-arrow/src/array/struct_/builder.rs b/crates/polars-arrow/src/array/struct_/builder.rs index 2d939deb0c0a..7acf40caef04 100644 --- a/crates/polars-arrow/src/array/struct_/builder.rs +++ b/crates/polars-arrow/src/array/struct_/builder.rs @@ -47,6 +47,18 @@ impl StaticArrayBuilder for StructArrayBuilder { StructArray::new(self.dtype, self.length, values, validity) } + fn freeze_reset(&mut self) -> Self::Array { + let values = self + .inner_builders + .iter_mut() + .map(|b| b.freeze_reset()) + .collect(); + let validity = core::mem::take(&mut self.validity).into_opt_validity(); + let out = StructArray::new(self.dtype.clone(), self.length, values, validity); + self.length = 0; + out + } + fn len(&self) -> usize { self.length } diff --git a/crates/polars-core/src/frame/builder.rs b/crates/polars-core/src/frame/builder.rs index 1a174edd87fb..43c701d74325 100644 --- a/crates/polars-core/src/frame/builder.rs +++ b/crates/polars-core/src/frame/builder.rs @@ -50,6 +50,25 @@ impl DataFrameBuilder { unsafe { DataFrame::new_no_checks(self.height, columns) } } + pub fn freeze_reset(&mut self) -> DataFrame { + let columns = self + .schema + .iter_names() + .zip(&mut self.builders) + .map(|(n, b)| { + let s = b.freeze_reset(n.clone()); + assert!(s.len() == self.height); + Column::from(s) + }) + .collect(); + + // SAFETY: we checked the lengths and the names are unique because they + // come from Schema. + let out = unsafe { DataFrame::new_no_checks(self.height, columns) }; + self.height = 0; + out + } + pub fn len(&self) -> usize { self.height } diff --git a/crates/polars-core/src/series/builder.rs b/crates/polars-core/src/series/builder.rs index 00e7d879c26d..1d1f92de2b5e 100644 --- a/crates/polars-core/src/series/builder.rs +++ b/crates/polars-core/src/series/builder.rs @@ -27,6 +27,16 @@ impl SeriesBuilder { } } + pub fn freeze_reset(&mut self, name: PlSmallStr) -> Series { + unsafe { + Series::from_chunks_and_dtype_unchecked( + name, + vec![self.builder.freeze_reset()], + &self.dtype, + ) + } + } + pub fn len(&self) -> usize { self.builder.len() }