Skip to content

Commit

Permalink
fix: Always use global registry for object
Browse files Browse the repository at this point in the history
  • Loading branch information
ritchie46 committed Mar 6, 2025
1 parent a599b9e commit 33c00fb
Show file tree
Hide file tree
Showing 37 changed files with 83 additions and 141 deletions.
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/builder/list/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ pub fn get_list_builder(

match &physical_type {
#[cfg(feature = "object")]
DataType::Object(_, _) => {
DataType::Object(_) => {
let builder = get_object_builder(PlSmallStr::EMPTY, 0).get_list_builder(
name,
value_capacity,
Expand Down
17 changes: 4 additions & 13 deletions crates/polars-core/src/chunked_array/object/builder.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use arrow::bitmap::BitmapBuilder;

use super::*;
use crate::chunked_array::object::registry::{AnonymousObjectBuilder, ObjectRegistry};
use crate::utils::get_iter_capacity;

pub struct ObjectChunkedBuilder<T> {
Expand All @@ -16,7 +15,7 @@ where
{
pub fn new(name: PlSmallStr, capacity: usize) -> Self {
ObjectChunkedBuilder {
field: Field::new(name, DataType::Object(T::type_name(), None)),
field: Field::new(name, DataType::Object(T::type_name())),
values: Vec::with_capacity(capacity),
bitmask_builder: BitmapBuilder::with_capacity(capacity),
}
Expand Down Expand Up @@ -76,15 +75,7 @@ where
/// Initialize a polars Object data type. The type has got information needed to
/// construct new objects.
pub(crate) fn get_object_type<T: PolarsObject>() -> DataType {
let object_builder = Box::new(|name: PlSmallStr, capacity: usize| {
Box::new(ObjectChunkedBuilder::<T>::new(name, capacity)) as Box<dyn AnonymousObjectBuilder>
});

let object_size = size_of::<T>();
let physical_dtype = ArrowDataType::FixedSizeBinary(object_size);

let registry = ObjectRegistry::new(object_builder, physical_dtype);
DataType::Object(T::type_name(), Some(Arc::new(registry)))
DataType::Object(T::type_name())
}

impl<T> Default for ObjectChunkedBuilder<T>
Expand Down Expand Up @@ -135,7 +126,7 @@ where
T: PolarsObject,
{
pub fn new_from_vec(name: PlSmallStr, v: Vec<T>) -> Self {
let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
let len = v.len();
let arr = Box::new(ObjectArray {
values: v.into(),
Expand All @@ -150,7 +141,7 @@ where
v: Vec<T>,
validity: Option<Bitmap>,
) -> Self {
let field = Arc::new(Field::new(name, DataType::Object(T::type_name(), None)));
let field = Arc::new(Field::new(name, DataType::Object(T::type_name())));
let len = v.len();
let null_count = validity.as_ref().map(|v| v.unset_bits()).unwrap_or(0);
let arr = Box::new(ObjectArray {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pub(crate) unsafe fn drop_list(ca: &ListChunked) {
inner = a;
}

if matches!(inner, DataType::Object(_, _)) {
if matches!(inner, DataType::Object(_)) {
if nested_count != 0 {
panic!("multiple nested objects not yet supported")
}
Expand Down
13 changes: 0 additions & 13 deletions crates/polars-core/src/chunked_array/object/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,19 +35,6 @@ impl Debug for ObjectRegistry {
}
}

impl ObjectRegistry {
pub(super) fn new(
builder_constructor: BuilderConstructor,
physical_dtype: ArrowDataType,
) -> Self {
Self {
builder_constructor,
object_converter: None,
physical_dtype,
}
}
}

static GLOBAL_OBJECT_REGISTRY: Lazy<RwLock<Option<ObjectRegistry>>> = Lazy::new(Default::default);

/// This trait can be registered, after which that global registration
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ pub(crate) unsafe fn arr_to_any_value<'a>(
AnyValue::Decimal(v, scale.unwrap_or_else(|| unreachable!()))
},
#[cfg(feature = "object")]
DataType::Object(_, _) => {
DataType::Object(_) => {
// We should almost never hit this. The only known exception is when we put objects in
// structs. Any other hit should be considered a bug.
let arr = arr.as_any().downcast_ref::<FixedSizeBinaryArray>().unwrap();
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/chunked_array/ops/chunkops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
pub fn rechunk(&self) -> Cow<'_, Self> {
match self.dtype() {
#[cfg(feature = "object")]
DataType::Object(_, _) => {
DataType::Object(_) => {
panic!("implementation error")
},
_ => {
Expand Down Expand Up @@ -256,7 +256,7 @@ impl<T: PolarsDataType> ChunkedArray<T> {
match length {
0 => match self.dtype() {
#[cfg(feature = "object")]
DataType::Object(_, _) => exec(),
DataType::Object(_) => exec(),
_ => self.clear(),
},
_ => exec(),
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/ops/row_encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ pub fn get_row_encoding_context(dtype: &DataType, ordered: bool) -> Option<RowEn
DataType::Unknown(_) => panic!("Unsupported in row encoding"),

#[cfg(feature = "object")]
DataType::Object(_, _) => panic!("Unsupported in row encoding"),
DataType::Object(_) => panic!("Unsupported in row encoding"),

#[cfg(feature = "dtype-decimal")]
DataType::Decimal(precision, _) => {
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/chunked_array/struct_/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ impl StructChunked {

match s.dtype() {
#[cfg(feature = "object")]
DataType::Object(_, _) => {
DataType::Object(_) => {
polars_bail!(InvalidOperation: "nested objects are not allowed")
},
_ => {},
Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/datatypes/_serde.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ impl From<&DataType> for SerializableDataType {
#[cfg(feature = "dtype-decimal")]
Decimal(precision, scale) => Self::Decimal(*precision, *scale),
#[cfg(feature = "object")]
Object(name, _) => Self::Object(name.to_string()),
Object(name) => Self::Object(name.to_string()),
dt => panic!("{dt:?} not supported"),
}
}
Expand Down Expand Up @@ -230,7 +230,7 @@ impl From<SerializableDataType> for DataType {
#[cfg(feature = "dtype-decimal")]
Decimal(precision, scale) => Self::Decimal(precision, scale),
#[cfg(feature = "object")]
Object(_) => Self::Object("unknown", None),
Object(_) => Self::Object("unknown"),
}
}
}
4 changes: 2 additions & 2 deletions crates/polars-core/src/datatypes/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -436,9 +436,9 @@ impl<'a> AnyValue<'a> {
#[cfg(feature = "dtype-decimal")]
Decimal(_, scale) => DataType::Decimal(None, Some(*scale)),
#[cfg(feature = "object")]
Object(o) => DataType::Object(o.type_name(), None),
Object(o) => DataType::Object(o.type_name()),
#[cfg(feature = "object")]
ObjectOwned(o) => DataType::Object(o.0.type_name(), None),
ObjectOwned(o) => DataType::Object(o.0.type_name()),
}
}

Expand Down
26 changes: 9 additions & 17 deletions crates/polars-core/src/datatypes/dtype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@ use serde::{Deserialize, Serialize};
use strum_macros::IntoStaticStr;

use super::*;
#[cfg(feature = "object")]
use crate::chunked_array::object::registry::ObjectRegistry;
use crate::chunked_array::object::registry::get_object_physical_type;
use crate::utils::materialize_dyn_int;

pub type TimeZone = PlSmallStr;
Expand Down Expand Up @@ -131,7 +130,7 @@ pub enum DataType {
/// A generic type that can be used in a `Series`
/// &'static str can be used to determine/set inner type
#[cfg(feature = "object")]
Object(&'static str, Option<Arc<ObjectRegistry>>),
Object(&'static str),
Null,
// The RevMapping has the internal state.
// This is ignored with comparisons, hashing etc.
Expand Down Expand Up @@ -190,7 +189,7 @@ impl PartialEq for DataType {
is_prec_eq && is_scale_eq
},
#[cfg(feature = "object")]
(Object(lhs, _), Object(rhs, _)) => lhs == rhs,
(Object(lhs), Object(rhs)) => lhs == rhs,
#[cfg(feature = "dtype-struct")]
(Struct(lhs), Struct(rhs)) => Vec::as_ptr(lhs) == Vec::as_ptr(rhs) || lhs == rhs,
#[cfg(feature = "dtype-array")]
Expand Down Expand Up @@ -386,9 +385,9 @@ impl DataType {
| (D::Binary, D::Categorical(_, _) | D::Enum(_, _)) => false,

#[cfg(feature = "object")]
(D::Object(_, _), D::Object(_, _)) => true,
(D::Object(_), D::Object(_)) => true,
#[cfg(feature = "object")]
(D::Object(_, _), _) | (_, D::Object(_, _)) => false,
(D::Object(_), _) | (_, D::Object(_)) => false,

(D::Boolean, dt) | (dt, D::Boolean) => match dt {
dt if dt.is_primitive_numeric() => true,
Expand Down Expand Up @@ -538,7 +537,7 @@ impl DataType {
pub fn is_object(&self) -> bool {
#[cfg(feature = "object")]
{
matches!(self, DataType::Object(_, _))
matches!(self, DataType::Object(_))
}
#[cfg(not(feature = "object"))]
{
Expand Down Expand Up @@ -585,7 +584,7 @@ impl DataType {
use DataType::*;
match self {
#[cfg(feature = "object")]
Object(_, _) => true,
Object(_) => true,
List(inner) => inner.contains_objects(),
#[cfg(feature = "dtype-array")]
Array(inner, _) => inner.contains_objects(),
Expand Down Expand Up @@ -831,14 +830,7 @@ impl DataType {
))),
Null => Ok(ArrowDataType::Null),
#[cfg(feature = "object")]
Object(_, Some(reg)) => Ok(reg.physical_dtype.clone()),
#[cfg(feature = "object")]
Object(_, None) => {
// FIXME: find out why we have Objects floating around without a
// known dtype.
// polars_bail!(InvalidOperation: "cannot convert Object dtype without registry to Arrow")
Ok(ArrowDataType::Unknown)
},
Object(_) => Ok(get_object_physical_type()),
#[cfg(feature = "dtype-categorical")]
Categorical(_, _) | Enum(_, _) => {
let values = if compat_level.0 >= 1 {
Expand Down Expand Up @@ -975,7 +967,7 @@ impl Display for DataType {
},
DataType::List(tp) => return write!(f, "list[{tp}]"),
#[cfg(feature = "object")]
DataType::Object(s, _) => s,
DataType::Object(s) => s,
#[cfg(feature = "dtype-categorical")]
DataType::Categorical(_, _) => "cat",
#[cfg(feature = "dtype-categorical")]
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/datatypes/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ impl DataType {
ArrowDataType::Extension(ext) if ext.name.as_str() == EXTENSION_NAME => {
#[cfg(feature = "object")]
{
DataType::Object("object", None)
DataType::Object("object")
}
#[cfg(not(feature = "object"))]
{
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/datatypes/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ unsafe impl<T: PolarsObject> PolarsDataType for ObjectType<T> {
type IsLogical = FalseT;

fn get_dtype() -> DataType {
DataType::Object(T::type_name(), None)
DataType::Object(T::type_name())
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/polars-core/src/fmt.rs
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ fn format_object_array(
array_type: &str,
) -> fmt::Result {
match object.dtype() {
DataType::Object(inner_type, _) => {
DataType::Object(inner_type) => {
let limit = std::cmp::min(DEFAULT_ROW_LIMIT, object.len());
write!(
f,
Expand Down Expand Up @@ -403,7 +403,7 @@ impl Debug for Series {
format_array!(f, self.list().unwrap(), &dt, self.name(), "Series")
},
#[cfg(feature = "object")]
DataType::Object(_, _) => format_object_array(f, self, self.name(), "Series"),
DataType::Object(_) => format_object_array(f, self, self.name(), "Series"),
#[cfg(feature = "dtype-categorical")]
DataType::Categorical(_, _) => {
format_array!(f, self.categorical().unwrap(), "cat", self.name(), "Series")
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/row/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl DataFrame {
for (col_i, s) in self.materialized_column_iter().enumerate() {
match s.dtype() {
#[cfg(feature = "object")]
DataType::Object(_, _) => {
DataType::Object(_) => {
for row_i in 0..s.len() {
let av = s.get(row_i).unwrap();
buf[row_i * width + col_i] = av
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/frame/row/transpose.rs
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ impl DataFrame {
DataType::Float32 => numeric_transpose::<Float32Type>(cols, names_out, &mut cols_t),
DataType::Float64 => numeric_transpose::<Float64Type>(cols, names_out, &mut cols_t),
#[cfg(feature = "object")]
DataType::Object(_, _) => {
DataType::Object(_) => {
// this requires to support `Object` in Series::iter which we don't yet
polars_bail!(InvalidOperation: "Object dtype not supported in 'transpose'")
},
Expand Down
60 changes: 18 additions & 42 deletions crates/polars-core/src/series/any_value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@ use std::fmt::Write;
use arrow::bitmap::MutableBitmap;

use crate::chunked_array::builder::{get_list_builder, AnonymousOwnedListBuilder};
#[cfg(feature = "object")]
use crate::chunked_array::object::registry::ObjectRegistry;
use crate::prelude::*;
use crate::utils::any_values_to_supertype;

Expand Down Expand Up @@ -140,7 +138,7 @@ impl Series {
#[cfg(feature = "dtype-struct")]
DataType::Struct(fields) => any_values_to_struct(values, fields, strict)?,
#[cfg(feature = "object")]
DataType::Object(_, registry) => any_values_to_object(values, registry)?,
DataType::Object(_) => any_values_to_object(values)?,
DataType::Null => Series::new_null(PlSmallStr::EMPTY, values.len()),
dt => {
polars_bail!(
Expand Down Expand Up @@ -626,7 +624,7 @@ fn any_values_to_list(
},

#[cfg(feature = "object")]
DataType::Object(_, _) => polars_bail!(nyi = "Nested object types"),
DataType::Object(_) => polars_bail!(nyi = "Nested object types"),

_ => {
let list_inner_type = match inner_type {
Expand Down Expand Up @@ -864,44 +862,22 @@ fn any_values_to_struct(
}

#[cfg(feature = "object")]
fn any_values_to_object(
values: &[AnyValue],
registry: &Option<Arc<ObjectRegistry>>,
) -> PolarsResult<Series> {
let mut builder = match registry {
None => {
use crate::chunked_array::object::registry;
let converter = registry::get_object_converter();
let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
for av in values {
match av {
AnyValue::Object(val) => builder.append_value(val.as_any()),
AnyValue::Null => builder.append_null(),
_ => {
// This is needed because in Python users can send mixed types.
// This only works if you set a global converter.
let any = converter(av.as_borrowed());
builder.append_value(&*any)
},
}
}
builder
},
Some(registry) => {
let mut builder = (*registry.builder_constructor)(PlSmallStr::EMPTY, values.len());
for av in values {
match av {
AnyValue::Object(val) => builder.append_value(val.as_any()),
AnyValue::ObjectOwned(val) => builder.append_value(val.0.as_any()),
AnyValue::Null => builder.append_null(),
_ => {
polars_bail!(SchemaMismatch: "expected object");
},
}
}
builder
},
};
fn any_values_to_object(values: &[AnyValue]) -> PolarsResult<Series> {
use crate::chunked_array::object::registry;
let converter = registry::get_object_converter();
let mut builder = registry::get_object_builder(PlSmallStr::EMPTY, values.len());
for av in values {
match av {
AnyValue::Object(val) => builder.append_value(val.as_any()),
AnyValue::Null => builder.append_null(),
_ => {
// This is needed because in Python users can send mixed types.
// This only works if you set a global converter.
let any = converter(av.as_borrowed());
builder.append_value(&*any)
},
}
}

Ok(builder.to_series())
}
Expand Down
2 changes: 1 addition & 1 deletion crates/polars-core/src/series/from.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ impl Series {
ca.into_series()
},
#[cfg(feature = "object")]
Object(_, _) => {
Object(_) => {
assert_eq!(chunks.len(), 1);
let arr = chunks[0]
.as_any()
Expand Down
Loading

0 comments on commit 33c00fb

Please sign in to comment.