Skip to content

Commit

Permalink
refactor: Make with_column_unchecked take Column (#18863)
Browse files Browse the repository at this point in the history
  • Loading branch information
coastalwhite authored Sep 23, 2024
1 parent ea7953e commit b68e259
Show file tree
Hide file tree
Showing 9 changed files with 75 additions and 36 deletions.
15 changes: 7 additions & 8 deletions crates/polars-core/src/frame/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1256,14 +1256,13 @@ impl DataFrame {
/// on length or duplicates.
///
/// # Safety
/// The caller must ensure `column.len() == self.height()` .
pub unsafe fn with_column_unchecked(&mut self, column: Series) -> &mut Self {
if cfg!(debug_assertions) {
self.with_column(column).unwrap()
} else {
self.get_columns_mut().push(column.into_column());
self
}
/// The caller must ensure `self.width() == 0 || column.len() == self.height()` .
pub unsafe fn with_column_unchecked(&mut self, column: Column) -> &mut Self {
debug_assert!(self.width() == 0 || self.height() == column.len());
debug_assert!(self.get_column_index(column.name().as_str()).is_none());

unsafe { self.get_columns_mut() }.push(column);
self
}

fn add_column_by_schema(&mut self, c: Column, schema: &Schema) -> PolarsResult<()> {
Expand Down
23 changes: 23 additions & 0 deletions crates/polars-core/src/series/ops/null.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
use arrow::bitmap::Bitmap;
use arrow::buffer::Buffer;
use arrow::offset::OffsetsBuffer;

#[cfg(feature = "object")]
use crate::chunked_array::object::registry::get_object_builder;
Expand Down Expand Up @@ -64,6 +66,27 @@ impl Series {
ca.into_series()
}
},
DataType::BinaryOffset => {
let length = size as IdxSize;

let offsets = vec![0; size + 1];
let array = BinaryArray::<i64>::new(
dtype.to_arrow(CompatLevel::oldest()),
unsafe { OffsetsBuffer::new_unchecked(Buffer::from(offsets)) },
Buffer::default(),
Some(Bitmap::new_zeroed(size)),
);

unsafe {
BinaryOffsetChunked::new_with_dims(
Arc::new(Field::new(name, dtype.clone())),
vec![Box::new(array)],
length,
length,
)
}
.into_series()
},
DataType::Null => Series::new_null(name, size),
DataType::Unknown(kind) => {
let dtype = kind.materialize().expect("expected known type");
Expand Down
9 changes: 8 additions & 1 deletion crates/polars-io/src/ipc/ipc_file.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,14 @@ impl<R: MmapBytesReader> SerReader<R> for IpcReader<R> {

if let Some((col, value)) = include_file_path {
unsafe {
df.with_column_unchecked(StringChunked::full(col, &value, row_count).into_series())
df.with_column_unchecked(Column::new_scalar(
col,
Scalar::new(
DataType::String,
AnyValue::StringOwned(value.as_ref().into()),
),
row_count,
))
};
}

Expand Down
6 changes: 3 additions & 3 deletions crates/polars-io/src/parquet/read/read_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1135,7 +1135,7 @@ impl BatchedParquetReader {
self.metadata.num_rows
},
)
.into_series(),
.into_column(),
)
};
}
Expand All @@ -1155,7 +1155,7 @@ impl BatchedParquetReader {

if let Some(ca) = &self.include_file_path {
unsafe {
df.with_column_unchecked(ca.clear().into_series());
df.with_column_unchecked(ca.clear().into_column());
}
};

Expand Down Expand Up @@ -1193,7 +1193,7 @@ impl BatchedParquetReader {

if let Some(ca) = &self.include_file_path {
unsafe {
df.with_column_unchecked(ca.clear().into_series());
df.with_column_unchecked(ca.clear().into_column());
}
};

Expand Down
16 changes: 8 additions & 8 deletions crates/polars-io/src/parquet/read/reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -245,14 +245,14 @@ impl<R: MmapBytesReader> SerReader<R> for ParquetReader<R> {

if let Some((col, value)) = &self.include_file_path {
unsafe {
df.with_column_unchecked(
StringChunked::full(
col.clone(),
value,
if df.width() > 0 { df.height() } else { n_rows },
)
.into_series(),
)
df.with_column_unchecked(Column::new_scalar(
col.clone(),
Scalar::new(
DataType::String,
AnyValue::StringOwned(value.as_ref().into()),
),
if df.width() > 0 { df.height() } else { n_rows },
))
};
}

Expand Down
8 changes: 5 additions & 3 deletions crates/polars-mem-engine/src/executors/scan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,11 @@ impl CsvExec {
let name = source.to_include_path_name();

unsafe {
df.with_column_unchecked(
StringChunked::full(col.clone(), name, df.height()).into_series(),
)
df.with_column_unchecked(Column::new_scalar(
col.clone(),
Scalar::new(DataType::String, AnyValue::StringOwned(name.into())),
df.height(),
))
};
}

Expand Down
10 changes: 6 additions & 4 deletions crates/polars-mem-engine/src/executors/scan/ndjson.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ impl JsonExec {
let mut df = DataFrame::empty_with_schema(schema);
if let Some(col) = &self.file_scan_options.include_file_paths {
unsafe {
df.with_column_unchecked(StringChunked::full_null(col.clone(), 0).into_series())
df.with_column_unchecked(Column::new_empty(col.clone(), &DataType::String))
};
}
if let Some(row_index) = &self.file_scan_options.row_index {
Expand Down Expand Up @@ -111,9 +111,11 @@ impl JsonExec {
if let Some(col) = &self.file_scan_options.include_file_paths {
let name = source.to_include_path_name();
unsafe {
df.with_column_unchecked(
StringChunked::full(col.clone(), name, df.height()).into_series(),
)
df.with_column_unchecked(Column::new_scalar(
col.clone(),
Scalar::new(DataType::String, AnyValue::StringOwned(name.into())),
df.height(),
))
};
}

Expand Down
2 changes: 1 addition & 1 deletion crates/polars-ops/src/frame/pivot/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ fn pivot_impl(
// @scalar-opt
let columns_struct = StructChunked::from_columns(column.clone(), fields)
.unwrap()
.into_series();
.into_column();
let mut binding = pivot_df.clone();
let pivot_df = unsafe { binding.with_column_unchecked(columns_struct) };
pivot_impl_single_column(
Expand Down
22 changes: 14 additions & 8 deletions crates/polars-pipe/src/executors/sinks/sort/sink_multiple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,16 +226,22 @@ impl SortSinkMultiple {
})
}

let rows_encoded = polars_row::convert_columns(&self.sort_column, &self.sort_fields);
let column = unsafe {
Series::from_chunks_and_dtype_unchecked(
PlSmallStr::from_static(POLARS_SORT_COLUMN),
vec![Box::new(rows_encoded.into_array())],
&DataType::BinaryOffset,
)
let name = PlSmallStr::from_static(POLARS_SORT_COLUMN);
let column = if chunk.data.height() == 0 && chunk.data.width() > 0 {
Column::new_empty(name, &DataType::BinaryOffset)
} else {
let rows_encoded = polars_row::convert_columns(&self.sort_column, &self.sort_fields);
let series = unsafe {
Series::from_chunks_and_dtype_unchecked(
name,
vec![Box::new(rows_encoded.into_array())],
&DataType::BinaryOffset,
)
};
debug_assert_eq!(series.chunks().len(), 1);
series.into()
};

debug_assert_eq!(column.chunks().len(), 1);
// SAFETY: length is correct
unsafe { chunk.data.with_column_unchecked(column) };
Ok(())
Expand Down

0 comments on commit b68e259

Please sign in to comment.