diff --git a/Cargo.lock b/Cargo.lock index bc8b2943b246..4dbd63e4e86d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1472,6 +1472,15 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +[[package]] +name = "convert_case" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb402b8d4c85569410425650ce3eddc7d698ed96d39a73f941b08fb63082f1e7" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "core-foundation" version = "0.9.4" @@ -1964,6 +1973,28 @@ dependencies = [ "uuid", ] +[[package]] +name = "datafusion-excalibur" +version = "45.0.0" +dependencies = [ + "arrow", + "datafusion-common", + "datafusion-excalibur-macros", + "datafusion-expr", + "datafusion-expr-common", +] + +[[package]] +name = "datafusion-excalibur-macros" +version = "45.0.0" +dependencies = [ + "convert_case", + "proc-macro2", + "quote", + "syn 2.0.98", + "trybuild", +] + [[package]] name = "datafusion-execution" version = "45.0.0" @@ -5374,6 +5405,15 @@ dependencies = [ "syn 2.0.98", ] +[[package]] +name = "serde_spanned" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +dependencies = [ + "serde", +] + [[package]] name = "serde_tokenstream" version = "0.2.2" @@ -5848,6 +5888,12 @@ version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" +[[package]] +name = "target-triple" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42a4d50cdb458045afc8131fd91b64904da29548bcb63c7236e0844936c13078" + [[package]] name = "tempfile" version = "3.17.0" @@ -5862,6 +5908,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + [[package]] name = "termtree" version = "0.5.1" @@ -6166,11 +6221,26 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml" +version = "0.8.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" +dependencies = [ + "serde", + "serde_spanned", + "toml_datetime", + "toml_edit", +] + [[package]] name = "toml_datetime" version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" +dependencies = [ + "serde", +] [[package]] name = "toml_edit" @@ -6179,6 +6249,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" dependencies = [ "indexmap 2.7.1", + "serde", + "serde_spanned", "toml_datetime", "winnow", ] @@ -6297,6 +6369,21 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +[[package]] +name = "trybuild" +version = "1.0.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b812699e0c4f813b872b373a4471717d9eb550da14b311058a4d9cf4173cbca6" +dependencies = [ + "glob", + "serde", + "serde_derive", + "serde_json", + "target-triple", + "termcolor", + "toml", +] + [[package]] name = "tstr" version = "0.2.4" diff --git a/Cargo.toml b/Cargo.toml index 4fcc13144243..c5436101bf7d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,6 +23,8 @@ members = [ "datafusion/catalog", "datafusion/catalog-listing", "datafusion/core", + "datafusion/excalibur/lib", + "datafusion/excalibur/macros", "datafusion/expr", "datafusion/expr-common", "datafusion/execution", @@ -104,6 +106,8 @@ datafusion-common = { path = "datafusion/common", version = "45.0.0", default-fe datafusion-common-runtime = { path = "datafusion/common-runtime", version = "45.0.0" } datafusion-datasource = { path = "datafusion/datasource", version = "45.0.0", default-features = false } datafusion-doc = { path = "datafusion/doc", version = "45.0.0" } +datafusion-excalibur = { path = "datafusion/excalibur/lib", version = "45.0.0" } +datafusion-excalibur-macros = { path = "datafusion/excalibur/macros", version = "45.0.0" } datafusion-execution = { path = "datafusion/execution", version = "45.0.0" } datafusion-expr = { path = "datafusion/expr", version = "45.0.0" } datafusion-expr-common = { path = "datafusion/expr-common", version = "45.0.0" } diff --git a/datafusion/excalibur/lib/Cargo.toml b/datafusion/excalibur/lib/Cargo.toml new file mode 100644 index 000000000000..c09f2a2161c8 --- /dev/null +++ b/datafusion/excalibur/lib/Cargo.toml @@ -0,0 +1,36 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-excalibur" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +authors = { workspace = true } +rust-version = { workspace = true } + +[lints] +workspace = true + +[dependencies] +arrow = { workspace = true } +datafusion-common = { workspace = true } +datafusion-excalibur-macros = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-expr-common = { workspace = true } diff --git a/datafusion/excalibur/lib/src/arg_type.rs b/datafusion/excalibur/lib/src/arg_type.rs new file mode 100644 index 000000000000..f0450d1d2fc9 --- /dev/null +++ b/datafusion/excalibur/lib/src/arg_type.rs @@ -0,0 +1,47 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::reader::ExArrayReaderConsumer; +use datafusion_common::types::NativeType; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; + +pub trait ExInstantiable { + type StackType<'a>; +} + +pub trait ExArgType: ExInstantiable { + fn logical_type() -> NativeType; + + fn decode( + arg: ColumnarValue, + consumer: impl for<'a> ExArrayReaderConsumer = Self::StackType<'a>>, + ) -> Result<()>; +} + +pub type FindExArgType = ::Type; + +pub trait ExFindImplementation { + type Type: ExArgType; +} + +impl ExFindImplementation for T +where + T: ExArgType, +{ + type Type = T; +} diff --git a/datafusion/excalibur/lib/src/arg_type_list.rs b/datafusion/excalibur/lib/src/arg_type_list.rs new file mode 100644 index 000000000000..e31faf894ca1 --- /dev/null +++ b/datafusion/excalibur/lib/src/arg_type_list.rs @@ -0,0 +1,43 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::arg_type::ExArgType; +use datafusion_expr::TypeSignatureClass; +use std::sync::Arc; + +pub trait ExArgTypeList { + fn type_signature() -> Vec; +} + +impl ExArgTypeList for () { + fn type_signature() -> Vec { + vec![] + } +} + +impl ExArgTypeList for (Head, Tail) +where + Head: ExArgType, + Tail: ExArgTypeList, +{ + fn type_signature() -> Vec { + let mut signature = + vec![TypeSignatureClass::Native(Arc::new(Head::logical_type()))]; + signature.extend(Tail::type_signature()); + signature + } +} diff --git a/datafusion/excalibur/lib/src/boolean.rs b/datafusion/excalibur/lib/src/boolean.rs new file mode 100644 index 000000000000..78d029f251de --- /dev/null +++ b/datafusion/excalibur/lib/src/boolean.rs @@ -0,0 +1,72 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use crate::primitive_type; +use crate::reader::ExArrayReader; +use arrow::array::{Array, ArrayRef, BooleanArray, BooleanBuilder}; +use arrow::datatypes::DataType; +use datafusion_common::cast::as_boolean_array; +use datafusion_common::Result; +use std::sync::Arc; + +primitive_type!(bool, Boolean, BooleanArray, as_boolean_array); + +impl<'a> ExArrayReader<'a> for &'a BooleanArray { + type ValueType = bool; + + fn is_valid(&self, position: usize) -> bool { + Array::is_valid(self, position) + } + + fn get(&self, position: usize) -> Self::ValueType { + self.value(position) + } +} + +impl ExFullResultType for ((), bool) { + type BuilderType = BooleanBuilder; + + fn data_type() -> DataType { + DataType::Boolean + } + + fn builder_with_capacity(number_rows: usize) -> Self::BuilderType { + Self::BuilderType::with_capacity(number_rows) + } +} + +impl ExArrayBuilder for BooleanBuilder { + type OutArg = (); + type Return = bool; + + fn get_out_arg(&mut self, _position: usize) -> Self::OutArg {} + + fn append(&mut self, fn_ret: Self::Return) -> Result<()> { + self.append_value(fn_ret); + Ok(()) + } + + fn append_null(&mut self) -> Result<()> { + self.append_null(); + Ok(()) + } + + fn build(mut self) -> Result { + Ok(Arc::new(self.finish())) + } +} diff --git a/datafusion/excalibur/lib/src/bridge.rs b/datafusion/excalibur/lib/src/bridge.rs new file mode 100644 index 000000000000..78e79f13cf12 --- /dev/null +++ b/datafusion/excalibur/lib/src/bridge.rs @@ -0,0 +1,42 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Contract between the macro and the library + +use crate::arg_type::ExInstantiable; + +pub trait ExcaliburScalarUdf { + // for example "my_function" + const SQL_NAME: &'static str; + + // for example + // - (i32, (u64, ()) for my_function(a: i32, b: u64) + // - (i32, (u64, ()) for my_function(a: i32, b: u64, out: &mut X) + // excludes the out arg + type ArgumentRustTypes: ExInstantiable; + + // T for `&mut T` passed to the function or () is there is no out argument + type OutArgRustType: ExInstantiable; + + // for example i32 for my_function(..) -> i32 + type ReturnRustType; + + fn invoke( + regular_args: ::StackType<'_>, + out_arg: &mut ::StackType<'_>, + ) -> Self::ReturnRustType; +} diff --git a/datafusion/excalibur/lib/src/builder.rs b/datafusion/excalibur/lib/src/builder.rs new file mode 100644 index 000000000000..7b752c9077a9 --- /dev/null +++ b/datafusion/excalibur/lib/src/builder.rs @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::__private::ExInstantiable; +use arrow::array::ArrayRef; +use arrow::datatypes::DataType; +use datafusion_common::Result; + +pub trait ExFullResultType { + type BuilderType: ExArrayBuilder; + + fn data_type() -> DataType; + + fn builder_with_capacity(number_rows: usize) -> Self::BuilderType; +} + +pub trait ExArrayBuilder { + type OutArg: ExInstantiable; + type Return; + + fn get_out_arg( + &mut self, + position: usize, + ) -> ::StackType<'_>; + + fn append(&mut self, fn_ret: Self::Return) -> Result<()>; + + fn append_null(&mut self) -> Result<()>; + + fn build(self) -> Result; +} + +impl ExInstantiable for () { + type StackType<'a> = (); +} diff --git a/datafusion/excalibur/lib/src/ints.rs b/datafusion/excalibur/lib/src/ints.rs new file mode 100644 index 000000000000..65bf051f7a72 --- /dev/null +++ b/datafusion/excalibur/lib/src/ints.rs @@ -0,0 +1,108 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use crate::primitive_type; +use crate::reader::ExArrayReader; +use arrow::array::{Array, ArrowPrimitiveType, PrimitiveArray}; +use arrow::array::{ArrayRef, PrimitiveBuilder}; +use arrow::datatypes::DataType; +use arrow::datatypes::{ + Int16Type, Int32Type, Int64Type, Int8Type, UInt16Type, UInt32Type, UInt64Type, + UInt8Type, +}; +use datafusion_common::cast::{ + as_int16_array, as_int32_array, as_int64_array, as_int8_array, as_uint16_array, + as_uint32_array, as_uint64_array, as_uint8_array, +}; +use datafusion_common::Result; +use std::sync::Arc; + +primitive_type!(i8, Int8, Int8Array, as_int8_array); +primitive_type!(i16, Int16, Int16Array, as_int16_array); +primitive_type!(i32, Int32, Int32Array, as_int32_array); +primitive_type!(i64, Int64, Int64Array, as_int64_array); + +primitive_type!(u8, UInt8, UInt8Array, as_uint8_array); +primitive_type!(u16, UInt16, UInt16Array, as_uint16_array); +primitive_type!(u32, UInt32, UInt32Array, as_uint32_array); +primitive_type!(u64, UInt64, UInt64Array, as_uint64_array); + +impl<'a, T> ExArrayReader<'a> for &'a PrimitiveArray +where + T: ArrowPrimitiveType, +{ + type ValueType = T::Native; + + fn is_valid(&self, position: usize) -> bool { + Array::is_valid(self, position) + } + + fn get(&self, position: usize) -> Self::ValueType { + self.value(position) + } +} + +macro_rules! primitive_result_type { + ($native_type:ty, $arrow_primitive_type:ty, $dt_option_name:ident) => { + impl ExFullResultType for ((), $native_type) { + type BuilderType = PrimitiveBuilder<$arrow_primitive_type>; + + fn data_type() -> DataType { + DataType::$dt_option_name + } + + fn builder_with_capacity(number_rows: usize) -> Self::BuilderType { + Self::BuilderType::with_capacity(number_rows) + } + } + }; +} + +primitive_result_type!(i8, Int8Type, Int8); +primitive_result_type!(i16, Int16Type, Int16); +primitive_result_type!(i32, Int32Type, Int32); +primitive_result_type!(i64, Int64Type, Int64); + +primitive_result_type!(u8, UInt8Type, UInt8); +primitive_result_type!(u16, UInt16Type, UInt16); +primitive_result_type!(u32, UInt32Type, UInt32); +primitive_result_type!(u64, UInt64Type, UInt64); + +impl ExArrayBuilder for PrimitiveBuilder +where + T: ArrowPrimitiveType, +{ + type OutArg = (); + type Return = T::Native; + + fn get_out_arg(&mut self, _position: usize) -> Self::OutArg {} + + fn append(&mut self, fn_ret: T::Native) -> Result<()> { + self.append_value(fn_ret); + Ok(()) + } + + fn append_null(&mut self) -> Result<()> { + self.append_null(); + Ok(()) + } + + fn build(mut self) -> Result { + Ok(Arc::new(self.finish())) + } +} diff --git a/datafusion/excalibur/lib/src/invoke.rs b/datafusion/excalibur/lib/src/invoke.rs new file mode 100644 index 000000000000..202aebbb38b5 --- /dev/null +++ b/datafusion/excalibur/lib/src/invoke.rs @@ -0,0 +1,222 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::arg_type::{ExArgType, ExInstantiable}; +use crate::bridge::ExcaliburScalarUdf; +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use crate::reader::{ExArrayReader, ExArrayReaderConsumer}; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use datafusion_expr::ScalarFunctionArgs; +use std::collections::VecDeque; + +pub fn excalibur_invoke(args: ScalarFunctionArgs) -> Result +where + T: ExcaliburScalarUdf, + T::ArgumentRustTypes: ApplyList, + (T::OutArgRustType, T::ReturnRustType): ExFullResultType< + BuilderType: ExArrayBuilder< + OutArg: for<'a> ExInstantiable< + StackType<'a> = ::StackType<'a>, + >, + Return = T::ReturnRustType, + >, + >, +{ + let number_rows = args.number_rows; + let args = args.args; + assert_eq!(args.len(), T::ArgumentRustTypes::ARITY); + let args = VecDeque::from(args); + + let mut builder = + <(T::OutArgRustType, T::ReturnRustType) as ExFullResultType>::builder_with_capacity( + number_rows, + ); + + T::ArgumentRustTypes::apply( + args, + number_rows, + // valid + |_position: usize| true, + // apply + |_position, args, out_arg| T::invoke(args, out_arg), + &mut builder, + )?; + + let array = builder.build()?; + Ok(ColumnarValue::Array(array)) +} + +pub trait ApplyList: ExInstantiable { + const ARITY: usize; + + fn apply( + args: VecDeque, + number_rows: usize, + valid: Valid, + invoke: Invoke, + builder: &mut Builder, + ) -> Result<()> + where + Builder: ExArrayBuilder, + Valid: Fn(usize) -> bool, + Invoke: for<'a> Fn( + usize, + Self::StackType<'a>, + &mut ::StackType<'_>, + ) -> Builder::Return; +} + +impl ExInstantiable for (Head, Tail) +where + Head: ExArgType, + Tail: ApplyList, +{ + type StackType<'a> = (Head::StackType<'a>, Tail::StackType<'a>); +} + +impl ApplyList for (Head, Tail) +where + Head: ExArgType, + Tail: ApplyList, +{ + const ARITY: usize = 1 + Tail::ARITY; + + fn apply( + mut args: VecDeque, + number_rows: usize, + valid: Valid, + invoke: Invoke, + builder: &mut Builder, + ) -> Result<()> + where + Builder: ExArrayBuilder, + Valid: Fn(usize) -> bool, + Invoke: for<'a> Fn( + usize, + Self::StackType<'a>, + &mut ::StackType<'_>, + ) -> Builder::Return, + { + let arg = args.pop_front().unwrap(); + let continuation = ApplyListHeadConsumer { + remaining_args: args, + number_rows, + valid, + invoke, + builder, + _phantom_head: std::marker::PhantomData::, + _phantom_tail: std::marker::PhantomData::, + }; + Head::decode(arg, continuation) + } +} + +struct ApplyListHeadConsumer<'a, Head, Tail, Builder, Valid, Invoke> { + remaining_args: VecDeque, + number_rows: usize, + valid: Valid, + invoke: Invoke, + builder: &'a mut Builder, + _phantom_head: std::marker::PhantomData, + _phantom_tail: std::marker::PhantomData, +} + +impl ExArrayReaderConsumer + for ApplyListHeadConsumer<'_, Head, Tail, Builder, Valid, Invoke> +where + Head: ExArgType, + Tail: ApplyList, + Builder: ExArrayBuilder, + Valid: Fn(usize) -> bool, + Invoke: for<'a> Fn( + usize, + (Head::StackType<'a>, Tail::StackType<'a>), + &mut ::StackType<'_>, + ) -> Builder::Return, +{ + type ValueType<'a> = Head::StackType<'a>; + + fn consume<'a, AR>(self, reader: AR) -> Result<()> + where + AR: ExArrayReader<'a, ValueType = Self::ValueType<'a>>, + { + let ApplyListHeadConsumer { + remaining_args: args, + number_rows, + valid, + invoke, + builder, + _phantom_head, + _phantom_tail, + } = self; + Tail::apply( + args, + number_rows, + |position| valid(position) && reader.is_valid(position), + |position, tail_args, out_arg| { + let head_arg: Head::StackType<'_> = reader.get(position); + let record = (head_arg, tail_args); + // FIXME: here we succumb to the borrow checker + // SAFETY: the Invoke is guaranteed not to capture the reference it is given + let record = unsafe { + std::mem::transmute::< + (Head::StackType<'_>, Tail::StackType<'_>), + (Head::StackType<'_>, Tail::StackType<'_>), + >(record) + }; + invoke(position, record, out_arg) + }, + builder, + ) + } +} + +impl ApplyList for () { + const ARITY: usize = 0; + + fn apply( + args: VecDeque, + number_rows: usize, + valid: Valid, + invoke: Invoke, + builder: &mut Builder, + ) -> Result<()> + where + Builder: ExArrayBuilder, + Valid: Fn(usize) -> bool, + Invoke: for<'a> Fn( + usize, + Self::StackType<'a>, + &mut ::StackType<'_>, + ) -> Builder::Return, + { + assert!(args.is_empty()); + for position in 0..number_rows { + if valid(position) { + let mut out_arg: ::StackType<'_> = + builder.get_out_arg(position); + let result = invoke(position, (), &mut out_arg); + drop(out_arg); + builder.append(result)?; + } else { + builder.append_null()?; + } + } + Ok(()) + } +} diff --git a/datafusion/excalibur/lib/src/lib.rs b/datafusion/excalibur/lib/src/lib.rs new file mode 100644 index 000000000000..44ce993ab4fa --- /dev/null +++ b/datafusion/excalibur/lib/src/lib.rs @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod arg_type; +mod arg_type_list; +mod boolean; +mod bridge; +mod builder; +mod ints; +mod invoke; +mod option_arg; +mod option_ret; +mod primitives; +mod reader; +mod result; +mod ret_type; +mod scalar_udf; +mod signature; +mod string; + +pub use ret_type::ValuePresence; + +// Not public API. +#[doc(hidden)] +pub mod __private { + // Re-exports used by the macros. + + pub use crate::arg_type::ExInstantiable; + pub use crate::arg_type::FindExArgType; + pub use crate::bridge::ExcaliburScalarUdf; + pub use crate::ret_type::FindExOutArgType; + pub use crate::scalar_udf::create_excalibur_scalar_udf; + pub use datafusion_expr::ScalarUDFImpl; +} diff --git a/datafusion/excalibur/lib/src/option_arg.rs b/datafusion/excalibur/lib/src/option_arg.rs new file mode 100644 index 000000000000..30f4734aa28c --- /dev/null +++ b/datafusion/excalibur/lib/src/option_arg.rs @@ -0,0 +1,111 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::arg_type::{ExArgType, ExInstantiable}; +use crate::reader::{ExArrayReader, ExArrayReaderConsumer}; +use datafusion_common::types::NativeType; +use datafusion_common::Result; +use datafusion_expr::ColumnarValue; +use std::marker::PhantomData; + +impl ExInstantiable for Option +where + T: ExArgType, +{ + type StackType<'a> = Option>; +} + +impl ExArgType for Option +where + T: ExArgType, +{ + fn logical_type() -> NativeType { + T::logical_type() + } + + fn decode( + arg: ColumnarValue, + consumer: impl for<'a> ExArrayReaderConsumer = Self::StackType<'a>>, + ) -> Result<()> { + let consumer = NullableConsumer { + _t: PhantomData, + delegate: consumer, + }; + T::decode(arg, consumer) + } +} + +struct NullableConsumer { + _t: PhantomData, + delegate: Delegate, +} + +impl ExArrayReaderConsumer for NullableConsumer +where + T: ExArgType, + Delegate: for<'a> ExArrayReaderConsumer = Option>>, +{ + type ValueType<'a> = T::StackType<'a>; + + fn consume<'a, AR>(self, reader: AR) -> Result<()> + where + AR: ExArrayReader<'a, ValueType = Self::ValueType<'a>>, + { + let NullableConsumer { _t: _, delegate } = self; + let reader = NullableReader { delegate: reader }; + delegate.consume(reader) + } +} + +struct NullableReader { + delegate: Delegate, +} + +impl<'a, Delegate> ExArrayReader<'a> for NullableReader +where + Delegate: ExArrayReader<'a>, +{ + type ValueType = Option; + + fn is_valid(&self, _position: usize) -> bool { + true + } + + fn get(&self, position: usize) -> Self::ValueType { + if self.delegate.is_valid(position) { + Some(self.delegate.get(position)) + } else { + None + } + } +} + +// Generic reader for scalar values +impl ExArrayReader<'_> for Option +where + T: Copy, +{ + type ValueType = T; + + fn is_valid(&self, _position: usize) -> bool { + self.is_some() + } + + fn get(&self, _position: usize) -> Self::ValueType { + self.unwrap() + } +} diff --git a/datafusion/excalibur/lib/src/option_ret.rs b/datafusion/excalibur/lib/src/option_ret.rs new file mode 100644 index 000000000000..743f77f6e5df --- /dev/null +++ b/datafusion/excalibur/lib/src/option_ret.rs @@ -0,0 +1,75 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::__private::ExInstantiable; +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use arrow::array::ArrayRef; +use arrow::datatypes::DataType; +use datafusion_common::Result; + +impl ExFullResultType for ((), Option) +where + ((), T): ExFullResultType>, +{ + type BuilderType = + ResultBuilderWithOptionSupport<<((), T) as ExFullResultType>::BuilderType>; + + fn data_type() -> DataType { + <((), T) as ExFullResultType>::data_type() + } + + fn builder_with_capacity(number_rows: usize) -> Self::BuilderType { + Self::BuilderType { + delegate: <((), T) as ExFullResultType>::builder_with_capacity(number_rows), + } + } +} + +pub struct ResultBuilderWithOptionSupport { + delegate: Delegate, +} + +impl ExArrayBuilder for ResultBuilderWithOptionSupport +where + Delegate: ExArrayBuilder, +{ + type OutArg = Delegate::OutArg; + type Return = Option; + + fn get_out_arg( + &mut self, + position: usize, + ) -> ::StackType<'_> { + self.delegate.get_out_arg(position) + } + + fn append(&mut self, fn_ret: Self::Return) -> Result<()> { + if let Some(ret) = fn_ret { + self.delegate.append(ret) + } else { + self.delegate.append_null() + } + } + + fn append_null(&mut self) -> Result<()> { + self.delegate.append_null() + } + + fn build(self) -> Result { + self.delegate.build() + } +} diff --git a/datafusion/excalibur/lib/src/primitives.rs b/datafusion/excalibur/lib/src/primitives.rs new file mode 100644 index 000000000000..4e6aca291776 --- /dev/null +++ b/datafusion/excalibur/lib/src/primitives.rs @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[macro_export] +macro_rules! primitive_type { + ($native_type:ty, $dt_option_name:ident, $array_type:ty, $as_array:ident) => { + impl $crate::arg_type::ExInstantiable for $native_type { + type StackType<'a> = $native_type; + } + + impl $crate::arg_type::ExArgType for $native_type { + fn logical_type() -> datafusion_common::types::NativeType { + arrow::datatypes::DataType::$dt_option_name.into() + } + + fn decode( + arg: datafusion_expr::ColumnarValue, + consumer: impl for<'a> $crate::reader::ExArrayReaderConsumer< + ValueType<'a> = Self::StackType<'a>, + >, + ) -> Result<()> { + use datafusion_common::ScalarValue; + use datafusion_expr::ColumnarValue::*; + match arg { + Array(array) => consumer.consume($as_array(&array)?), + Scalar(scalar) => { + if let ScalarValue::$dt_option_name(value) = scalar { + consumer.consume(value) + } else { + datafusion_common::internal_err!( + "Expected {} scalar, got: {:?}", + stringify!($native_type), + scalar + ) + } + } + } + } + } + }; +} diff --git a/datafusion/excalibur/lib/src/reader.rs b/datafusion/excalibur/lib/src/reader.rs new file mode 100644 index 000000000000..369979c47c80 --- /dev/null +++ b/datafusion/excalibur/lib/src/reader.rs @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_common::Result; + +pub trait ExArrayReader<'a> { + type ValueType; + + // TODO use this for loop unswitching + /// Returns the length L of the stride of positions guaranteed to be valid, starting + /// from the given position S. The position S + L is *not* guaranteed + /// to be invalid. + fn valid_stride(&self, start_position: usize) -> usize { + if self.is_valid(start_position) { + 1 + } else { + 0 + } + } + + /// Checks whether the position is valid or null. + /// + /// Panics if position out of bounds. + fn is_valid(&self, position: usize) -> bool; + + /// Retrieves the value at the given position. + /// + /// Panics if position is invalid or out of bounds. + fn get(&self, position: usize) -> Self::ValueType; +} + +pub trait ExArrayReaderConsumer { + type ValueType<'a>; + + fn consume<'a, AR>(self, reader: AR) -> Result<()> + where + AR: ExArrayReader<'a, ValueType = Self::ValueType<'a>>; +} diff --git a/datafusion/excalibur/lib/src/result.rs b/datafusion/excalibur/lib/src/result.rs new file mode 100644 index 000000000000..6e035b400781 --- /dev/null +++ b/datafusion/excalibur/lib/src/result.rs @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::__private::ExInstantiable; +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use arrow::array::ArrayRef; +use arrow::datatypes::DataType; +use datafusion_common::Result; + +impl ExFullResultType for ((), Result) +where + ((), T): ExFullResultType, +{ + type BuilderType = + ResultBuilderWithResultSupport<<((), T) as ExFullResultType>::BuilderType>; + + fn data_type() -> DataType { + <((), T) as ExFullResultType>::data_type() + } + + fn builder_with_capacity(number_rows: usize) -> Self::BuilderType { + Self::BuilderType { + delegate: <((), T) as ExFullResultType>::builder_with_capacity(number_rows), + } + } +} + +pub struct ResultBuilderWithResultSupport { + delegate: Delegate, +} + +impl ExArrayBuilder for ResultBuilderWithResultSupport +where + Delegate: ExArrayBuilder, +{ + type OutArg = Delegate::OutArg; + type Return = Result; + + fn get_out_arg( + &mut self, + position: usize, + ) -> ::StackType<'_> { + self.delegate.get_out_arg(position) + } + + fn append(&mut self, fn_ret: Self::Return) -> Result<()> { + self.delegate.append(fn_ret?) + } + + fn append_null(&mut self) -> Result<()> { + self.delegate.append_null() + } + + fn build(self) -> Result { + self.delegate.build() + } +} diff --git a/datafusion/excalibur/lib/src/ret_type.rs b/datafusion/excalibur/lib/src/ret_type.rs new file mode 100644 index 000000000000..b0f6ae86d2c4 --- /dev/null +++ b/datafusion/excalibur/lib/src/ret_type.rs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub type FindExOutArgType = ::Type; + +pub trait ExFindOutImplementation { + type Type; +} + +pub enum ValuePresence { + Value, + Null, +} diff --git a/datafusion/excalibur/lib/src/scalar_udf.rs b/datafusion/excalibur/lib/src/scalar_udf.rs new file mode 100644 index 000000000000..b1b23c9b55a6 --- /dev/null +++ b/datafusion/excalibur/lib/src/scalar_udf.rs @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::__private::ExInstantiable; +use crate::arg_type_list::ExArgTypeList; +use crate::bridge::ExcaliburScalarUdf; +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use crate::invoke::{excalibur_invoke, ApplyList}; +use crate::signature::{create_excalibur_signature, ExcaliburSignature}; +use arrow::datatypes::DataType; +use datafusion_common::Result; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature}; +use std::any::Any; +use std::fmt::{Debug, Formatter}; +use std::marker::PhantomData; +use std::sync::Arc; + +pub fn create_excalibur_scalar_udf() -> Arc +where + T: ExcaliburScalarUdf + Send + Sync + 'static, + T::ArgumentRustTypes: ExArgTypeList, + T::ArgumentRustTypes: ApplyList, + (T::OutArgRustType, T::ReturnRustType): ExFullResultType< + BuilderType: ExArrayBuilder< + OutArg: for<'a> ExInstantiable< + StackType<'a> = ::StackType<'a>, + >, + Return = T::ReturnRustType, + >, + >, +{ + Arc::new(ExcaliburScalarUdfImpl:: { + signature: create_excalibur_signature::(), + _phantom: Default::default(), + }) +} + +struct ExcaliburScalarUdfImpl { + signature: ExcaliburSignature, + _phantom: PhantomData, +} + +impl Debug for ExcaliburScalarUdfImpl +where + T: ExcaliburScalarUdf, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("ExcaliburScalarUdfImpl({})", T::SQL_NAME)) + } +} + +impl ScalarUDFImpl for ExcaliburScalarUdfImpl +where + T: ExcaliburScalarUdf + Send + Sync + 'static, + T::ArgumentRustTypes: ApplyList, + (T::OutArgRustType, T::ReturnRustType): ExFullResultType< + BuilderType: ExArrayBuilder< + OutArg: for<'a> ExInstantiable< + StackType<'a> = ::StackType<'a>, + >, + Return = T::ReturnRustType, + >, + >, +{ + fn as_any(&self) -> &dyn Any { + self + } + + fn name(&self) -> &str { + T::SQL_NAME + } + + fn signature(&self) -> &Signature { + self.signature.signature() + } + + fn return_type(&self, arg_types: &[DataType]) -> Result { + self.signature.return_type(arg_types) + } + + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + excalibur_invoke::(args) + } + + fn invoke_batch( + &self, + _args: &[ColumnarValue], + _number_rows: usize, + ) -> Result { + unimplemented!("invoke_batch is not implemented, it should be deprecated in https://github.com/apache/datafusion/issues/13515") + } +} diff --git a/datafusion/excalibur/lib/src/signature.rs b/datafusion/excalibur/lib/src/signature.rs new file mode 100644 index 000000000000..c80fa3cf7160 --- /dev/null +++ b/datafusion/excalibur/lib/src/signature.rs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::arg_type_list::ExArgTypeList; +use crate::bridge::ExcaliburScalarUdf; +use crate::builder::ExFullResultType; +use arrow::datatypes::DataType; +use datafusion_common::Result; +use datafusion_expr::{Signature, Volatility}; +use datafusion_expr_common::signature::Coercion; + +pub fn create_excalibur_signature() -> ExcaliburSignature +where + T: ExcaliburScalarUdf, + T::ArgumentRustTypes: ExArgTypeList, + (T::OutArgRustType, T::ReturnRustType): ExFullResultType, +{ + ExcaliburSignature { + signature: Signature::coercible( + T::ArgumentRustTypes::type_signature() + .into_iter() + // TODO this *exact* is unintentional, see https://github.com/apache/datafusion/pull/14440#discussion_r1959483130 + .map(Coercion::new_exact) + .collect(), + Volatility::Immutable, + ), + return_type: + <(T::OutArgRustType, T::ReturnRustType) as ExFullResultType>::data_type(), + } +} + +pub struct ExcaliburSignature { + signature: Signature, + return_type: DataType, +} + +impl ExcaliburSignature { + pub fn signature(&self) -> &Signature { + &self.signature + } + + pub fn return_type(&self, _arg_types: &[DataType]) -> Result { + Ok(self.return_type.clone()) + } +} diff --git a/datafusion/excalibur/lib/src/string.rs b/datafusion/excalibur/lib/src/string.rs new file mode 100644 index 000000000000..14bf5192ff02 --- /dev/null +++ b/datafusion/excalibur/lib/src/string.rs @@ -0,0 +1,174 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::arg_type::{ExArgType, ExFindImplementation, ExInstantiable}; +use crate::builder::{ExArrayBuilder, ExFullResultType}; +use crate::reader::{ExArrayReader, ExArrayReaderConsumer}; +use crate::ret_type::ExFindOutImplementation; +use crate::ValuePresence; +use arrow::array::{Array, ArrayRef, StringArray, StringBuilder, StringViewArray}; +use arrow::datatypes::DataType; +use datafusion_common::cast::{as_string_array, as_string_view_array}; +use datafusion_common::types::NativeType; +use datafusion_common::ScalarValue; +use datafusion_common::{internal_err, Result}; +use datafusion_expr::ColumnarValue; +use std::sync::Arc; + +impl ExFindImplementation for dyn AsRef { + type Type = RefStrArgType; +} + +pub struct RefStrArgType; + +impl ExInstantiable for RefStrArgType { + type StackType<'a> = &'a str; +} + +impl ExArgType for RefStrArgType { + fn logical_type() -> NativeType { + NativeType::String + } + + fn decode( + arg: ColumnarValue, + consumer: impl for<'a> ExArrayReaderConsumer = Self::StackType<'a>>, + ) -> Result<()> { + match arg { + ColumnarValue::Array(array) => match array.data_type() { + DataType::Utf8 => consumer.consume(as_string_array(&array)?), + DataType::Utf8View => consumer.consume(as_string_view_array(&array)?), + dt => internal_err!("Expected string array, got {:?}", dt), + }, + + ColumnarValue::Scalar(ScalarValue::Utf8(value)) => { + consumer.consume(&ScalarString(value)) + } + ColumnarValue::Scalar(ScalarValue::Utf8View(value)) => { + consumer.consume(&ScalarString(value)) + } + + ColumnarValue::Scalar(scalar) => { + internal_err!("Expected string scalar, got {:?}", scalar) + } + } + } +} + +impl<'a> ExArrayReader<'a> for &'a StringArray { + type ValueType = &'a str; + + fn is_valid(&self, position: usize) -> bool { + Array::is_valid(&self, position) + } + + fn get(&self, position: usize) -> Self::ValueType { + self.value(position) + } +} + +impl<'a> ExArrayReader<'a> for &'a StringViewArray { + type ValueType = &'a str; + + fn is_valid(&self, position: usize) -> bool { + Array::is_valid(&self, position) + } + + fn get(&self, position: usize) -> Self::ValueType { + self.value(position) + } +} + +struct ScalarString(Option); + +impl<'a> ExArrayReader<'a> for &'a ScalarString { + type ValueType = &'a str; + + fn is_valid(&self, _position: usize) -> bool { + self.0.is_some() + } + + fn get(&self, _position: usize) -> Self::ValueType { + self.0.as_deref().unwrap() + } +} + +impl ExFindOutImplementation for dyn std::fmt::Write { + type Type = StringWriter; +} + +pub struct StringWriter; + +impl ExFullResultType for (StringWriter, Result) { + type BuilderType = StringBuilder; + + fn data_type() -> DataType { + DataType::Utf8 + } + + fn builder_with_capacity(number_rows: usize) -> Self::BuilderType { + StringBuilder::with_capacity(number_rows, number_rows * 10) + } +} + +impl ExInstantiable for StringWriter { + type StackType<'a> = StringBuilderWriter<'a>; +} + +pub struct StringBuilderWriter<'a> { + builder: &'a mut StringBuilder, +} + +impl std::fmt::Write for StringBuilderWriter<'_> { + fn write_str(&mut self, s: &str) -> std::fmt::Result { + self.builder.write_str(s).map_err(|_| std::fmt::Error) + } +} + +impl ExArrayBuilder for StringBuilder { + type OutArg = StringWriter; + type Return = Result; + + fn get_out_arg( + &mut self, + _position: usize, + ) -> ::StackType<'_> { + StringBuilderWriter { builder: self } + } + + fn append(&mut self, fn_ret: Self::Return) -> Result<()> { + match fn_ret? { + ValuePresence::Value => { + // Data passed via the out arg + self.append_value(""); + } + ValuePresence::Null => { + self.append_null(); + } + } + Ok(()) + } + + fn append_null(&mut self) -> Result<()> { + self.append_null(); + Ok(()) + } + + fn build(mut self) -> Result { + Ok(Arc::new(self.finish())) + } +} diff --git a/datafusion/excalibur/lib/tests/failing.rs b/datafusion/excalibur/lib/tests/failing.rs new file mode 100644 index 000000000000..52e127d9b18d --- /dev/null +++ b/datafusion/excalibur/lib/tests/failing.rs @@ -0,0 +1,182 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::BooleanArray; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::Result; +use datafusion_common::{exec_err, ScalarValue}; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn maybe_fail(fail: bool) -> Result { + if fail { + exec_err!("This test function just failed") + } else { + Ok(true) + } +} + +#[test] +fn test_function_signature() { + let udf = maybe_fail_udf(); + assert_eq!(udf.name(), "maybe_fail"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::Boolean + )))], + Volatility::Immutable + ) + ); + let return_type = udf.return_type(&[DataType::Boolean]).unwrap(); + assert_eq!(return_type, DataType::Boolean); +} + +#[test] +fn test_invoke_array() { + let udf = maybe_fail_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(BooleanArray::from(vec![ + false, false, false, + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::Boolean, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &BooleanArray::from(vec![true, true, true])); +} + +#[test] +fn test_invoke_array_fail() { + let udf = maybe_fail_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(BooleanArray::from(vec![ + false, true, false, + ])))]; + let error = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::Boolean, + }) + .err() + .unwrap(); + + assert_eq!( + error.strip_backtrace(), + "Execution error: This test function just failed" + ); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = maybe_fail_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(BooleanArray::from(vec![ + Some(false), + None, + Some(false), + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::Boolean, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &BooleanArray::from(vec![Some(true), None, Some(true)]) + ); +} + +#[test] +fn test_invoke_scalar() { + let udf = maybe_fail_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::Boolean(Some(false)))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Boolean, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &BooleanArray::from(vec![true])); +} + +#[test] +fn test_invoke_scalar_fail() { + let udf = maybe_fail_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::Boolean(Some(true)))]; + let error = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Boolean, + }) + .err() + .unwrap(); + + assert_eq!( + error.strip_backtrace(), + "Execution error: This test function just failed" + ); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = maybe_fail_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::Boolean(None))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Boolean, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &BooleanArray::from(vec![None])); +} diff --git a/datafusion/excalibur/lib/tests/null_arg.rs b/datafusion/excalibur/lib/tests/null_arg.rs new file mode 100644 index 000000000000..c8db4b3f263f --- /dev/null +++ b/datafusion/excalibur/lib/tests/null_arg.rs @@ -0,0 +1,162 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::Int32Array; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::ScalarValue; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn first_non_null(a: Option, b: Option) -> Option { + a.or(b) +} + +#[test] +fn test_function_signature() { + let udf = first_non_null_udf(); + assert_eq!(udf.name(), "first_non_null"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::Int32 + ))), + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::Int32 + ))), + ], + Volatility::Immutable + ) + ); + let return_type = udf + .return_type(&[DataType::Int32, DataType::Int32]) + .unwrap(); + assert_eq!(return_type, DataType::Int32); +} + +#[test] +fn test_invoke_array() { + let udf = first_non_null_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(Int32Array::from(vec![0, 3, 15, 0, 3, 60]))), + ColumnarValue::Array(Arc::new(Int32Array::from(vec![1, 3, 3, 0, 0, 15]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 6, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int32Array::from(vec![0, 3, 15, 0, 3, 60])); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = first_non_null_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(Int32Array::from(vec![ + Some(0), + None, + Some(15), + Some(0), + None, + Some(60), + ]))), + ColumnarValue::Array(Arc::new(Int32Array::from(vec![ + Some(1), + Some(3), + None, + Some(0), + None, + Some(15), + ]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 6, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &Int32Array::from(vec![Some(0), Some(3), Some(15), Some(0), None, Some(60)]) + ); +} + +#[test] +fn test_invoke_scalar() { + let udf = first_non_null_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Int32(Some(33))), + ColumnarValue::Scalar(ScalarValue::Int32(Some(0))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int32Array::from(vec![Some(33)])); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = first_non_null_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Int32(None)), + ColumnarValue::Scalar(ScalarValue::Int32(None)), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int32Array::from(vec![None])); +} diff --git a/datafusion/excalibur/lib/tests/null_result.rs b/datafusion/excalibur/lib/tests/null_result.rs new file mode 100644 index 000000000000..8f8c5f0860a3 --- /dev/null +++ b/datafusion/excalibur/lib/tests/null_result.rs @@ -0,0 +1,169 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::Int32Array; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::ScalarValue; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn try_div(a: i32, b: i32) -> Option { + if b == 0 { + None + } else { + Some(a / b) + } +} + +#[test] +fn test_function_signature() { + let udf = try_div_udf(); + assert_eq!(udf.name(), "try_div"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::Int32 + ))), + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::Int32 + ))), + ], + Volatility::Immutable + ) + ); + let return_type = udf + .return_type(&[DataType::Int32, DataType::Int32]) + .unwrap(); + assert_eq!(return_type, DataType::Int32); +} + +#[test] +fn test_invoke_array() { + let udf = try_div_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(Int32Array::from(vec![0, 3, 15, 0, 3, 60]))), + ColumnarValue::Array(Arc::new(Int32Array::from(vec![1, 3, 3, 0, 0, 15]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 6, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &Int32Array::from(vec![Some(0), Some(1), Some(5), None, None, Some(4)]) + ); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = try_div_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(Int32Array::from(vec![ + Some(0), + None, + Some(15), + Some(0), + Some(3), + Some(60), + ]))), + ColumnarValue::Array(Arc::new(Int32Array::from(vec![ + Some(1), + Some(3), + None, + Some(0), + Some(0), + Some(15), + ]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 6, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &Int32Array::from(vec![Some(0), None, None, None, None, Some(4)]) + ); +} + +#[test] +fn test_invoke_scalar() { + let udf = try_div_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Int32(Some(33))), + ColumnarValue::Scalar(ScalarValue::Int32(Some(0))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int32Array::from(vec![None])); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = try_div_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Int32(Some(-3))), + ColumnarValue::Scalar(ScalarValue::Int32(None)), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Int32, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int32Array::from(vec![None])); +} diff --git a/datafusion/excalibur/lib/tests/smoke_test.rs b/datafusion/excalibur/lib/tests/smoke_test.rs new file mode 100644 index 000000000000..2503927b0215 --- /dev/null +++ b/datafusion/excalibur/lib/tests/smoke_test.rs @@ -0,0 +1,140 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::UInt64Array; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::ScalarValue; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn add_one(a: u64) -> u64 { + a + 1 +} + +#[test] +fn test_function_signature() { + let udf = add_one_udf(); + assert_eq!(udf.name(), "add_one"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::UInt64 + )))], + Volatility::Immutable + ) + ); + let return_type = udf.return_type(&[DataType::UInt64]).unwrap(); + assert_eq!(return_type, DataType::UInt64); +} + +#[test] +fn test_invoke_array() { + let udf = add_one_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(UInt64Array::from(vec![ + 1000, 2000, 3000, 4000, 5000, + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 5, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &UInt64Array::from(vec![1001, 2001, 3001, 4001, 5001]) + ); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = add_one_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(UInt64Array::from(vec![ + Some(1000), + None, + Some(3000), + None, + Some(5000), + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 5, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &UInt64Array::from(vec![Some(1001), None, Some(3001), None, Some(5001)]) + ); +} + +#[test] +fn test_invoke_scalar() { + let udf = add_one_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::UInt64(Some(1000)))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![1001])); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = add_one_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::UInt64(None))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![None])); +} diff --git a/datafusion/excalibur/lib/tests/str_arg.rs b/datafusion/excalibur/lib/tests/str_arg.rs new file mode 100644 index 000000000000..f35fea98488b --- /dev/null +++ b/datafusion/excalibur/lib/tests/str_arg.rs @@ -0,0 +1,183 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{StringArray, StringViewArray, UInt64Array}; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::ScalarValue; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn character_length(s: &str) -> u64 { + s.chars().count() as u64 +} + +#[test] +fn test_function_signature() { + let udf = character_length_udf(); + assert_eq!(udf.name(), "character_length"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::String + )))], + Volatility::Immutable + ) + ); + let return_type = udf.return_type(&[DataType::Utf8]).unwrap(); + assert_eq!(return_type, DataType::UInt64); +} + +#[test] +fn test_invoke_string_array() { + let udf = character_length_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![ + "", + "abc", + "Idę piękną łąką pod Warszawą", + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![0, 3, 28])); +} + +#[test] +fn test_invoke_string_view_array() { + let udf = character_length_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(StringViewArray::from(vec![ + "", + "abc", + "Idę piękną łąką pod Warszawą", + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![0, 3, 28])); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = character_length_udf(); + + let invoke_args = vec![ColumnarValue::Array(Arc::new(StringArray::from(vec![ + Some(""), + None, + Some("Idę piękną łąką pod Warszawą"), + ])))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &UInt64Array::from(vec![Some(0), None, Some(28)]) + ); +} + +#[test] +fn test_invoke_scalar_utf8() { + let udf = character_length_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::Utf8(Some( + "Idę piękną łąką pod Warszawą".to_string(), + )))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![Some(28)])); +} + +#[test] +fn test_invoke_scalar_utf8view() { + let udf = character_length_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::Utf8View(Some( + "Idę piękną łąką pod Warszawą".to_string(), + )))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![Some(28)])); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = character_length_udf(); + + let invoke_args = vec![ColumnarValue::Scalar(ScalarValue::Utf8(None))]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::UInt64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &UInt64Array::from(vec![None])); +} diff --git a/datafusion/excalibur/lib/tests/str_ret.rs b/datafusion/excalibur/lib/tests/str_ret.rs new file mode 100644 index 000000000000..81528fa11dca --- /dev/null +++ b/datafusion/excalibur/lib/tests/str_ret.rs @@ -0,0 +1,196 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::StringArray; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::Result; +use datafusion_common::ScalarValue; +use datafusion_excalibur::ValuePresence; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn concat(a: &str, b: &str, out: &mut impl std::fmt::Write) -> Result { + if a.is_empty() && b.is_empty() { + // Be like Oracle + return Ok(ValuePresence::Null); + } + out.write_str(a)?; + out.write_str(b)?; + Ok(ValuePresence::Value) +} + +#[test] +fn test_function_signature() { + let udf = concat_udf(); + assert_eq!(udf.name(), "concat"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::String + ))), + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::String + ))), + ], + Volatility::Immutable + ) + ); + let return_type = udf.return_type(&[DataType::Utf8, DataType::Utf8]).unwrap(); + assert_eq!(return_type, DataType::Utf8); +} + +#[test] +fn test_invoke_array() { + let udf = concat_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(StringArray::from(vec![ + "hello, ", + "", + "Idę piękną łąką", + ]))), + ColumnarValue::Array(Arc::new(StringArray::from(vec![ + "world!", + "", + " pod Warszawą", + ]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 3, + return_type: &DataType::Utf8, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &StringArray::from(vec![ + Some("hello, world!"), + None, + Some("Idę piękną łąką pod Warszawą"), + ]) + ); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = concat_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(StringArray::from(vec![ + Some(""), + Some(""), + None, + None, + Some("Idę piękną łąką"), + Some("Idę piękną łąką"), + None, + ]))), + ColumnarValue::Array(Arc::new(StringArray::from(vec![ + Some(""), + None, + Some(""), + None, + Some("Idę piękną łąką"), + None, + Some("Idę piękną łąką"), + ]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 7, + return_type: &DataType::Utf8, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &StringArray::from(vec![ + None, + None, + None, + None, + Some("Idę piękną łąkąIdę piękną łąką"), + None, + None, + ]) + ); +} + +#[test] +fn test_invoke_scalar() { + let udf = concat_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Utf8(Some("Idę piękną łąką".to_string()))), + ColumnarValue::Scalar(ScalarValue::Utf8(Some(" pod Warszawą".to_string()))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Utf8, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &StringArray::from(vec![("Idę piękną łąką pod Warszawą"),]) + ); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = concat_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Utf8(None)), + ColumnarValue::Scalar(ScalarValue::Utf8(None)), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Utf8, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &StringArray::from(vec![None::<&str>])); +} diff --git a/datafusion/excalibur/lib/tests/two_args.rs b/datafusion/excalibur/lib/tests/two_args.rs new file mode 100644 index 000000000000..d512dbdf12c2 --- /dev/null +++ b/datafusion/excalibur/lib/tests/two_args.rs @@ -0,0 +1,163 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use arrow::array::{Int32Array, Int64Array, UInt32Array}; +use arrow::datatypes::DataType; +use datafusion_common::types::NativeType; +use datafusion_common::ScalarValue; +use datafusion_excalibur_macros::excalibur_function; +use datafusion_expr::{ + ColumnarValue, ScalarFunctionArgs, Signature, TypeSignatureClass, Volatility, +}; +use datafusion_expr_common::signature::Coercion; +use std::sync::Arc; + +#[excalibur_function] +fn add(a: i32, b: u32) -> i64 { + a as i64 + b as i64 +} + +#[test] +fn test_function_signature() { + let udf = add_udf(); + assert_eq!(udf.name(), "add"); + + assert_eq!( + udf.signature(), + &Signature::coercible( + vec![ + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::Int32 + ))), + Coercion::new_exact(TypeSignatureClass::Native(Arc::new( + NativeType::UInt32 + ))), + ], + Volatility::Immutable + ) + ); + let return_type = udf + .return_type(&[DataType::Int32, DataType::UInt32]) + .unwrap(); + assert_eq!(return_type, DataType::Int64); +} + +#[test] +fn test_invoke_array() { + let udf = add_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(Int32Array::from(vec![1000, 2, 3000, -4, 5000]))), + ColumnarValue::Array(Arc::new(UInt32Array::from(vec![5, 111, 3000, 0, 13]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 5, + return_type: &DataType::Int64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &Int64Array::from(vec![1005, 113, 6000, -4, 5013]) + ); +} + +#[test] +fn test_invoke_array_with_nulls() { + let udf = add_udf(); + + let invoke_args = vec![ + ColumnarValue::Array(Arc::new(Int32Array::from(vec![ + None, + Some(2), + Some(3000), + Some(-4), + Some(5000), + ]))), + ColumnarValue::Array(Arc::new(UInt32Array::from(vec![ + Some(5), + Some(111), + None, + Some(0), + Some(13), + ]))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 5, + return_type: &DataType::Int64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!( + &*result_array, + &Int64Array::from(vec![None, Some(113), None, Some(-4), Some(5013)]) + ); +} + +#[test] +fn test_invoke_scalar() { + let udf = add_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Int32(Some(-3))), + ColumnarValue::Scalar(ScalarValue::UInt32(Some(55))), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Int64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int64Array::from(vec![52])); +} + +#[test] +fn test_invoke_scalar_null() { + let udf = add_udf(); + + let invoke_args = vec![ + ColumnarValue::Scalar(ScalarValue::Int32(Some(-3))), + ColumnarValue::Scalar(ScalarValue::UInt32(None)), + ]; + let ColumnarValue::Array(result_array) = udf + .invoke_with_args(ScalarFunctionArgs { + args: invoke_args, + number_rows: 1, + return_type: &DataType::Int64, + }) + .unwrap() + else { + panic!("Expected array result"); + }; + + assert_eq!(&*result_array, &Int64Array::from(vec![None])); +} diff --git a/datafusion/excalibur/macros/Cargo.toml b/datafusion/excalibur/macros/Cargo.toml new file mode 100644 index 000000000000..d53ab8ff39e8 --- /dev/null +++ b/datafusion/excalibur/macros/Cargo.toml @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "datafusion-excalibur-macros" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +repository = { workspace = true } +license = { workspace = true } +authors = { workspace = true } +rust-version = { workspace = true } +publish = false + +[lints] +workspace = true + +[lib] +proc-macro = true + +[dependencies] +convert_case = "0.7.1" +proc-macro2 = "1.0.93" +quote = "1.0.38" +syn = { version = "2.0.98", features = ["full"] } + +[dev-dependencies] +trybuild = "1.0.103" diff --git a/datafusion/excalibur/macros/src/attr.rs b/datafusion/excalibur/macros/src/attr.rs new file mode 100644 index 000000000000..4bedf13e9e9e --- /dev/null +++ b/datafusion/excalibur/macros/src/attr.rs @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use syn::meta::ParseNestedMeta; +use syn::spanned::Spanned; +use syn::{Error, LitStr}; + +#[derive(Default)] +pub struct EFAttributes { + pub name: Option, +} + +impl EFAttributes { + pub fn parse(&mut self, meta: ParseNestedMeta) -> syn::Result<()> { + match meta.path.get_ident().map(syn::Ident::to_string).as_deref() { + Some("name") => { + let value: LitStr = meta.value()?.parse()?; + self.name = Some(value.value()); + Ok(()) + } + _ => Err(Error::new(meta.path.span(), "Unknown attribute")), + } + } +} diff --git a/datafusion/excalibur/macros/src/derive.rs b/datafusion/excalibur/macros/src/derive.rs new file mode 100644 index 000000000000..5c182f239f9e --- /dev/null +++ b/datafusion/excalibur/macros/src/derive.rs @@ -0,0 +1,200 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use crate::attr::EFAttributes; +use crate::input::{InputFnInfo, NameType}; +use crate::strings::to_camel_case; +use proc_macro2::{Ident, TokenStream}; +use quote::{format_ident, quote}; +use syn::spanned::Spanned; +use syn::{parse_quote, Error, Result, TraitBoundModifier, Type, TypeParamBound}; + +pub fn derive(attributes: EFAttributes, input: InputFnInfo) -> Result { + let orig_rust_function_name = &input.name; + let sql_function_name = sql_function_name(&attributes, &input); + let udf_factory_function_name = format_ident!("{}_udf", sql_function_name); + + let imports = common_imports(); + + let (impl_struct_name, struct_definition) = + struct_definition(&input, &sql_function_name)?; + + let function_doc = format!("Factory method for a ScalarUDFImpl based on the function [`{orig_rust_function_name}`]"); + let factory_function = quote! { + #[doc = #function_doc] + #[allow(unused_qualifications)] + #[automatically_derived] + pub fn #udf_factory_function_name() -> ::std::sync::Arc { + #imports + #struct_definition + static INSTANCE: LazyLock> = + LazyLock::new(|| create_excalibur_scalar_udf::<#impl_struct_name>()); + Arc::clone(INSTANCE.deref()) + } + }; + Ok(factory_function) +} + +fn common_imports() -> TokenStream { + // Imports for everything but the outermost function signature. Keep them sorted. + let imports = quote! { + use ::datafusion_excalibur::__private::ExInstantiable; + use ::datafusion_excalibur::__private::ExcaliburScalarUdf; + use ::datafusion_excalibur::__private::FindExArgType; + use ::datafusion_excalibur::__private::FindExOutArgType; + use ::datafusion_excalibur::__private::ScalarUDFImpl; + use ::datafusion_excalibur::__private::create_excalibur_scalar_udf; + use ::std::ops::Deref; + use ::std::sync::Arc; + use ::std::sync::LazyLock; + }; + imports +} + +fn struct_definition( + input: &InputFnInfo, + sql_function_name: &str, +) -> Result<(Ident, TokenStream)> { + let orig_rust_function_name = &input.name; + let impl_struct_name = format_ident!("{}", to_camel_case(sql_function_name)); + + let (out_arg_type, out_arg_name, out_arg_invoke_expr) = + if let Some(out_arg) = &input.out_arg { + let impl_type = implement_out_arg_type(&out_arg.ty)?; + let out_arg_name = out_arg.name.clone(); + let invoke_arg = quote! { #out_arg_name }; + (impl_type, out_arg_name, invoke_arg) + } else { + let unit_type = force_type::(parse_quote! { () }); + (unit_type, format_ident!("_"), quote! {}) + }; + + let (rust_arg_type_list, destruct_args, invoke_args) = input.args.iter().try_rfold( + ( + force_type::(parse_quote! { () }), + quote! { () }, + quote! { #out_arg_invoke_expr }, + ), + |(type_list, destruct_args, invoke_args), arg| -> Result<_> { + let ArgImplementation { + impl_type, + destruct_expr, + invoke_expr, + } = implement_arg(arg)?; + Ok(( + force_type::(parse_quote! { (#impl_type, #type_list) }), + quote! { (#destruct_expr, #destruct_args) }, + quote! { #invoke_expr, #invoke_args }, + )) + }, + )?; + let rust_return_type = &input.return_ty; + + let struct_definition = quote! { + struct #impl_struct_name {} + + impl ExcaliburScalarUdf for #impl_struct_name { + const SQL_NAME: &'static str = #sql_function_name; + type ArgumentRustTypes = #rust_arg_type_list; + type OutArgRustType = #out_arg_type; + type ReturnRustType = #rust_return_type; + + fn invoke( + regular_args: ::StackType<'_>, + #out_arg_name: &mut ::StackType<'_>, + ) -> Self::ReturnRustType { + // TODO real invoke body + let #destruct_args = regular_args; + #orig_rust_function_name(#invoke_args) + } + } + }; + Ok((impl_struct_name, struct_definition)) +} + +fn sql_function_name(attributes: &EFAttributes, input: &InputFnInfo) -> String { + if let Some(name) = &attributes.name { + name.to_owned() + } else { + input.name.to_string() + } +} + +fn implement_arg(arg: &NameType) -> Result { + let impl_type = implement_arg_type(&arg.ty)?; + let arg_name = &arg.name; + Ok(ArgImplementation { + impl_type, + destruct_expr: quote! { #arg_name }, + invoke_expr: quote! { #arg_name }, + }) +} + +struct ArgImplementation { + impl_type: Type, + destruct_expr: TokenStream, + invoke_expr: TokenStream, +} + +fn implement_arg_type(ty: &Type) -> Result { + match ty { + Type::Reference(type_reference) => { + if type_reference.mutability.is_none() && type_reference.lifetime.is_none() { + let referred = &type_reference.elem; + return Ok(force_type::( + parse_quote! { FindExArgType> }, + )); + } + } + + Type::Path(_) => { + return Ok(force_type::(parse_quote! { FindExArgType<#ty> })); + } + _ => {} + } + Err(Error::new( + ty.span(), + "Function argument has unsupported type for use with Excalibur", + )) +} + +fn implement_out_arg_type(ty: &Type) -> Result { + if let Type::Reference(type_reference) = ty { + if type_reference.mutability.is_some() && type_reference.lifetime.is_none() { + let referred = &type_reference.elem; + if let Type::ImplTrait(impl_trait) = &**referred { + if impl_trait.bounds.len() == 1 { + if let TypeParamBound::Trait(tr) = &impl_trait.bounds[0] { + if let TraitBoundModifier::None = tr.modifier { + return Ok(force_type::( + parse_quote! { FindExOutArgType }, + )); + } + } + } + } + } + } + Err(Error::new( + ty.span(), + "Function argument has unsupported type for use with Excalibur", + )) +} + +fn force_type(val: T) -> T { + val +} diff --git a/datafusion/excalibur/macros/src/input.rs b/datafusion/excalibur/macros/src/input.rs new file mode 100644 index 000000000000..4d70a4c5c50c --- /dev/null +++ b/datafusion/excalibur/macros/src/input.rs @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use proc_macro2::Ident; +use syn::spanned::Spanned; +use syn::{Error, FnArg, ItemFn, Pat, Result, ReturnType, Type}; + +pub struct InputFnInfo { + pub name: Ident, + pub args: Vec, + pub out_arg: Option, + pub return_ty: Type, +} + +pub struct NameType { + pub name: Ident, + pub ty: Type, +} + +impl InputFnInfo { + /// Validate the input and capture the necessary information + pub fn try_from(input_fn: ItemFn) -> Result { + let sig = input_fn.sig; + + if sig.asyncness.is_some() { + return Err(Error::new( + sig.span(), + "Function cannot be async for use with Excalibur", + )); + } + if sig.variadic.is_some() { + return Err(Error::new( + sig.span(), + "Function cannot be variadic for use with Excalibur", + )); + } + + let mut inputs: Vec<_> = sig.inputs.iter().collect(); + let mut out_arg = None; + if let Some(FnArg::Typed(typed)) = inputs.last() { + if let Pat::Ident(ident) = &*typed.pat { + if let Type::Reference(type_reference) = &*typed.ty { + if type_reference.mutability.is_some() { + out_arg = Some(NameType { + name: ident.ident.clone(), + ty: (*typed.ty).clone(), + }); + inputs.pop(); + } + } + } + } + + let args = inputs + .into_iter() + .map(|arg| { + if let FnArg::Typed(typed) = arg { + if let Pat::Ident(ident) = &*typed.pat { + if typed.attrs.is_empty() { + return Ok(NameType { + name: ident.ident.clone(), + ty: (*typed.ty).clone(), + }); + } + } + } + Err(Error::new( + arg.span(), + "Unsupported function argument (name, type or attributes) for use with Excalibur", + )) + }) + .collect::>>()?; + + let ReturnType::Type(_, return_ty) = sig.output else { + return Err(Error::new( + sig.output.span(), + "Function needs a return type for use with Excalibur", + )); + }; + + Ok(InputFnInfo { + name: sig.ident, + args, + out_arg, + return_ty: (*return_ty).to_owned(), + }) + } +} diff --git a/datafusion/excalibur/macros/src/lib.rs b/datafusion/excalibur/macros/src/lib.rs new file mode 100644 index 000000000000..ae22f67db142 --- /dev/null +++ b/datafusion/excalibur/macros/src/lib.rs @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +mod attr; +mod derive; +mod input; +mod strings; + +use attr::EFAttributes; +use input::InputFnInfo; +use proc_macro::TokenStream; +use syn::{parse_macro_input, Error, ItemFn}; + +#[proc_macro_attribute] +pub fn excalibur_function(attributes: TokenStream, input: TokenStream) -> TokenStream { + // parse attributes + let mut parsed_attributes = EFAttributes::default(); + let attribute_parser = syn::meta::parser(|meta| parsed_attributes.parse(meta)); + parse_macro_input!(attributes with attribute_parser); + // the original input should be output unchanged + let original_input = input.clone(); + // derive + let input_fn = parse_macro_input!(input as ItemFn); + let input_fn_info = InputFnInfo::try_from(input_fn).unwrap(); + TokenStream::from_iter([ + derive::derive(parsed_attributes, input_fn_info) + .unwrap_or_else(Error::into_compile_error) + .into(), + original_input, + ]) +} diff --git a/datafusion/excalibur/macros/src/strings.rs b/datafusion/excalibur/macros/src/strings.rs new file mode 100644 index 000000000000..9bb2e4f0c868 --- /dev/null +++ b/datafusion/excalibur/macros/src/strings.rs @@ -0,0 +1,21 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +pub fn to_camel_case(s: impl AsRef) -> String { + use convert_case::{Case, Casing}; + s.as_ref().to_case(Case::UpperCamel) +} diff --git a/datafusion/excalibur/macros/tests/tests.rs b/datafusion/excalibur/macros/tests/tests.rs new file mode 100644 index 000000000000..0575a39eb560 --- /dev/null +++ b/datafusion/excalibur/macros/tests/tests.rs @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#[test] +fn test_macro_errors() { + let test_cases = trybuild::TestCases::new(); + test_cases.compile_fail("tests/ui/errs/*.rs"); +} diff --git a/datafusion/excalibur/macros/tests/ui/errs/invalid_attribute.rs b/datafusion/excalibur/macros/tests/ui/errs/invalid_attribute.rs new file mode 100644 index 000000000000..735e6763e2c0 --- /dev/null +++ b/datafusion/excalibur/macros/tests/ui/errs/invalid_attribute.rs @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_excalibur_macros::excalibur_function; + +#[excalibur_function(hallucinated_attribute = "my_function")] +fn add_one(a: u64) -> u64 { + a + 1 +} + +// expected by trybuild +fn main() {} \ No newline at end of file diff --git a/datafusion/excalibur/macros/tests/ui/errs/invalid_attribute.stderr b/datafusion/excalibur/macros/tests/ui/errs/invalid_attribute.stderr new file mode 100644 index 000000000000..cdbb4abaa942 --- /dev/null +++ b/datafusion/excalibur/macros/tests/ui/errs/invalid_attribute.stderr @@ -0,0 +1,5 @@ +error: Unknown attribute + --> tests/ui/errs/invalid_attribute.rs:20:22 + | +20 | #[excalibur_function(hallucinated_attribute = "my_function")] + | ^^^^^^^^^^^^^^^^^^^^^^ diff --git a/datafusion/excalibur/macros/tests/ui/errs/invalid_name.rs b/datafusion/excalibur/macros/tests/ui/errs/invalid_name.rs new file mode 100644 index 000000000000..cc522e934c6e --- /dev/null +++ b/datafusion/excalibur/macros/tests/ui/errs/invalid_name.rs @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use datafusion_excalibur_macros::excalibur_function; + +#[excalibur_function(name = unquoted_name)] +fn add_one(a: u64) -> u64 { + a + 1 +} + +// not a string +#[excalibur_function(name = 123)] +fn add_two(a: u64) -> u64 { + a + 2 +} + +// expected by trybuild +fn main() {} \ No newline at end of file diff --git a/datafusion/excalibur/macros/tests/ui/errs/invalid_name.stderr b/datafusion/excalibur/macros/tests/ui/errs/invalid_name.stderr new file mode 100644 index 000000000000..e875a52fece1 --- /dev/null +++ b/datafusion/excalibur/macros/tests/ui/errs/invalid_name.stderr @@ -0,0 +1,11 @@ +error: expected string literal + --> tests/ui/errs/invalid_name.rs:20:29 + | +20 | #[excalibur_function(name = unquoted_name)] + | ^^^^^^^^^^^^^ + +error: expected string literal + --> tests/ui/errs/invalid_name.rs:26:29 + | +26 | #[excalibur_function(name = 123)] + | ^^^ diff --git a/taplo.toml b/taplo.toml index b7089c501680..47b33161c37e 100644 --- a/taplo.toml +++ b/taplo.toml @@ -18,6 +18,7 @@ ## https://taplo.tamasfe.dev/configuration/file.html include = ["**/Cargo.toml"] +exclude = ["target/*"] [formatting] # Align consecutive entries vertically.