From 6a954b2bd5c75a9cc2b14723a02ac601b559d286 Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Thu, 6 Mar 2025 13:31:36 +0100 Subject: [PATCH 1/7] fix: mark ScalarUDFImpl::invoke_batch as deprecated should use invoke_with_args instead See https://github.com/apache/datafusion/issues/14123#issuecomment-2703526303 --- datafusion/expr/src/udf.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index 8215b671a379..d724209d9dac 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -225,11 +225,13 @@ impl ScalarUDF { self.inner.is_nullable(args, schema) } + #[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")] pub fn invoke_batch( &self, args: &[ColumnarValue], number_rows: usize, ) -> Result { + #[allow(deprecated)] self.inner.invoke_batch(args, number_rows) } @@ -244,7 +246,7 @@ impl ScalarUDF { /// /// Note: This method is deprecated and will be removed in future releases. /// User defined functions should implement [`Self::invoke_with_args`] instead. - #[deprecated(since = "42.1.0", note = "Use `invoke_batch` instead")] + #[deprecated(since = "42.1.0", note = "Use `invoke_with_args` instead")] pub fn invoke_no_args(&self, number_rows: usize) -> Result { #[allow(deprecated)] self.inner.invoke_no_args(number_rows) @@ -252,7 +254,7 @@ impl ScalarUDF { /// Returns a `ScalarFunctionImplementation` that can invoke the function /// during execution - #[deprecated(since = "42.0.0", note = "Use `invoke_batch` instead")] + #[deprecated(since = "42.0.0", note = "Use `invoke_with_args` instead")] pub fn fun(&self) -> ScalarFunctionImplementation { let captured = Arc::clone(&self.inner); #[allow(deprecated)] @@ -613,6 +615,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// User defined functions should implement [`Self::invoke_with_args`] instead. /// /// See for more details. + #[deprecated(since = "46.0.0", note = "Use `invoke_with_args` instead")] fn invoke_batch( &self, args: &[ColumnarValue], @@ -643,6 +646,7 @@ pub trait ScalarUDFImpl: Debug + Send + Sync { /// [`ColumnarValue::values_to_arrays`] can be used to convert the arguments /// to arrays, which will likely be simpler code, but be slower. fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + #[allow(deprecated)] self.invoke_batch(&args.args, args.number_rows) } From ce73e6c9b060e7154b83bee30e969b5104c19798 Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Thu, 6 Mar 2025 14:33:48 +0100 Subject: [PATCH 2/7] fix deprecated usage that clippy warns about --- datafusion/functions-nested/benches/map.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/datafusion/functions-nested/benches/map.rs b/datafusion/functions-nested/benches/map.rs index 3726cac0752e..2774b24b902a 100644 --- a/datafusion/functions-nested/benches/map.rs +++ b/datafusion/functions-nested/benches/map.rs @@ -28,7 +28,7 @@ use std::sync::Arc; use datafusion_common::ScalarValue; use datafusion_expr::planner::ExprPlanner; -use datafusion_expr::{ColumnarValue, Expr}; +use datafusion_expr::{ColumnarValue, Expr, ScalarFunctionArgs}; use datafusion_functions_nested::map::map_udf; use datafusion_functions_nested::planner::NestedFunctionPlanner; @@ -94,11 +94,18 @@ fn criterion_benchmark(c: &mut Criterion) { let keys = ColumnarValue::Scalar(ScalarValue::List(Arc::new(key_list))); let values = ColumnarValue::Scalar(ScalarValue::List(Arc::new(value_list))); + let return_type = &map_udf() + .return_type(&[DataType::Utf8, DataType::Int32]) + .expect("should get return type"); + b.iter(|| { black_box( - // TODO use invoke_with_args map_udf() - .invoke_batch(&[keys.clone(), values.clone()], 1) + .invoke_with_args(ScalarFunctionArgs { + args: vec![keys.clone(), values.clone()], + number_rows: 1, + return_type, + }) .expect("map should work on valid values"), ); }); From 5eb52df02e58a0b955e13c714c7b4b9ece34b8c4 Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Thu, 6 Mar 2025 18:42:26 +0100 Subject: [PATCH 3/7] fix another deprecated usage that clippy warns about --- datafusion/functions/benches/strpos.rs | 31 ++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/datafusion/functions/benches/strpos.rs b/datafusion/functions/benches/strpos.rs index f4962380dfbf..df57c229e0ad 100644 --- a/datafusion/functions/benches/strpos.rs +++ b/datafusion/functions/benches/strpos.rs @@ -18,8 +18,9 @@ extern crate criterion; use arrow::array::{StringArray, StringViewArray}; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use rand::distributions::Alphanumeric; use rand::prelude::StdRng; use rand::{Rng, SeedableRng}; @@ -114,8 +115,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(strpos.invoke_batch(&args_string_ascii, n_rows)) + black_box(strpos.invoke_with_args(ScalarFunctionArgs { + args: args_string_ascii.clone(), + number_rows: n_rows, + return_type: &DataType::Int32, + })) }) }, ); @@ -126,8 +130,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(strpos.invoke_batch(&args_string_utf8, n_rows)) + black_box(strpos.invoke_with_args(ScalarFunctionArgs { + args: args_string_utf8.clone(), + number_rows: n_rows, + return_type: &DataType::Int32, + })) }) }, ); @@ -138,8 +145,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(strpos.invoke_batch(&args_string_view_ascii, n_rows)) + black_box(strpos.invoke_with_args(ScalarFunctionArgs { + args: args_string_view_ascii.clone(), + number_rows: n_rows, + return_type: &DataType::Int32, + })) }) }, ); @@ -150,8 +160,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("strpos_StringViewArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(strpos.invoke_batch(&args_string_view_utf8, n_rows)) + black_box(strpos.invoke_with_args(ScalarFunctionArgs { + args: args_string_view_utf8.clone(), + number_rows: n_rows, + return_type: &DataType::Int32, + })) }) }, ); From a981309f0c03f5108336228e95bcc365f3e6e493 Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Fri, 7 Mar 2025 15:33:23 +0100 Subject: [PATCH 4/7] fix the rest of benches --- .../functions/benches/character_length.rs | 36 ++++++---- datafusion/functions/benches/chr.rs | 15 +++- datafusion/functions/benches/cot.rs | 25 +++++-- datafusion/functions/benches/date_bin.rs | 14 ++-- datafusion/functions/benches/date_trunc.rs | 13 ++-- datafusion/functions/benches/encoding.rs | 45 ++++++++---- datafusion/functions/benches/isnan.rs | 25 +++++-- datafusion/functions/benches/iszero.rs | 24 +++++-- datafusion/functions/benches/make_date.rs | 37 ++++++---- datafusion/functions/benches/nullif.rs | 14 +++- datafusion/functions/benches/pad.rs | 70 +++++++++++++++---- datafusion/functions/benches/random.rs | 25 +++++-- datafusion/functions/benches/reverse.rs | 30 +++++--- datafusion/functions/benches/signum.rs | 25 +++++-- datafusion/functions/benches/substr.rs | 57 ++++++++++++--- datafusion/functions/benches/substr_index.rs | 12 ++-- datafusion/functions/benches/to_char.rs | 24 +++++-- datafusion/functions/benches/to_timestamp.rs | 53 +++++++++----- datafusion/functions/benches/trunc.rs | 25 +++++-- 19 files changed, 425 insertions(+), 144 deletions(-) diff --git a/datafusion/functions/benches/character_length.rs b/datafusion/functions/benches/character_length.rs index 3655d8409807..bbcfed021064 100644 --- a/datafusion/functions/benches/character_length.rs +++ b/datafusion/functions/benches/character_length.rs @@ -17,7 +17,9 @@ extern crate criterion; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; mod helper; @@ -26,6 +28,8 @@ fn criterion_benchmark(c: &mut Criterion) { // All benches are single batch run with 8192 rows let character_length = datafusion_functions::unicode::character_length(); + let return_type = DataType::Utf8; + let n_rows = 8192; for str_len in [8, 32, 128, 4096] { // StringArray ASCII only @@ -34,8 +38,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(character_length.invoke_batch(&args_string_ascii, n_rows)) + black_box(character_length.invoke_with_args(ScalarFunctionArgs { + args: args_string_ascii.clone(), + number_rows: n_rows, + return_type: &return_type, + })) }) }, ); @@ -46,8 +53,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(character_length.invoke_batch(&args_string_utf8, n_rows)) + black_box(character_length.invoke_with_args(ScalarFunctionArgs { + args: args_string_utf8.clone(), + number_rows: n_rows, + return_type: &return_type, + })) }) }, ); @@ -58,10 +68,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box( - character_length.invoke_batch(&args_string_view_ascii, n_rows), - ) + black_box(character_length.invoke_with_args(ScalarFunctionArgs { + args: args_string_view_ascii.clone(), + number_rows: n_rows, + return_type: &return_type, + })) }) }, ); @@ -72,10 +83,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("character_length_StringViewArray_utf8_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box( - character_length.invoke_batch(&args_string_view_utf8, n_rows), - ) + black_box(character_length.invoke_with_args(ScalarFunctionArgs { + args: args_string_view_utf8.clone(), + number_rows: n_rows, + return_type: &return_type, + })) }) }, ); diff --git a/datafusion/functions/benches/chr.rs b/datafusion/functions/benches/chr.rs index 58c5ee3d68f6..4750fb466653 100644 --- a/datafusion/functions/benches/chr.rs +++ b/datafusion/functions/benches/chr.rs @@ -19,10 +19,11 @@ extern crate criterion; use arrow::{array::PrimitiveArray, datatypes::Int64Type, util::test_util::seedable_rng}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::string::chr; use rand::Rng; +use arrow::datatypes::DataType; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -44,7 +45,17 @@ fn criterion_benchmark(c: &mut Criterion) { let input = Arc::new(input); let args = vec![ColumnarValue::Array(input)]; c.bench_function("chr", |b| { - b.iter(|| black_box(cot_fn.invoke_batch(&args, size).unwrap())) + b.iter(|| { + black_box( + cot_fn + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) + }) }); } diff --git a/datafusion/functions/benches/cot.rs b/datafusion/functions/benches/cot.rs index bb0585a2de9b..b2a9ca0b9f47 100644 --- a/datafusion/functions/benches/cot.rs +++ b/datafusion/functions/benches/cot.rs @@ -22,9 +22,10 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::cot; +use arrow::datatypes::DataType; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -34,16 +35,30 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("cot f32 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(cot_fn.invoke_batch(&f32_args, size).unwrap()) + black_box( + cot_fn + .invoke_with_args(ScalarFunctionArgs { + args: f32_args.clone(), + number_rows: size, + return_type: &DataType::Float32, + }) + .unwrap(), + ) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("cot f64 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(cot_fn.invoke_batch(&f64_args, size).unwrap()) + black_box( + cot_fn + .invoke_with_args(ScalarFunctionArgs { + args: f64_args.clone(), + number_rows: size, + return_type: &DataType::Float64, + }) + .unwrap(), + ) }) }); } diff --git a/datafusion/functions/benches/date_bin.rs b/datafusion/functions/benches/date_bin.rs index aa7c7710617d..7ea5fdcb2be2 100644 --- a/datafusion/functions/benches/date_bin.rs +++ b/datafusion/functions/benches/date_bin.rs @@ -25,7 +25,7 @@ use datafusion_common::ScalarValue; use rand::rngs::ThreadRng; use rand::Rng; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_bin; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { @@ -45,12 +45,18 @@ fn criterion_benchmark(c: &mut Criterion) { let interval = ColumnarValue::Scalar(ScalarValue::new_interval_dt(0, 1_000_000)); let timestamps = ColumnarValue::Array(timestamps_array); let udf = date_bin(); + let return_type = udf + .return_type(&[interval.data_type(), timestamps.data_type()]) + .unwrap(); b.iter(|| { - // TODO use invoke_with_args black_box( - udf.invoke_batch(&[interval.clone(), timestamps.clone()], batch_len) - .expect("date_bin should work on valid values"), + udf.invoke_with_args(ScalarFunctionArgs { + args: vec![interval.clone(), timestamps.clone()], + number_rows: batch_len, + return_type: &return_type, + }) + .expect("date_bin should work on valid values"), ) }) }); diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs index d420b8f6ac70..0503064de011 100644 --- a/datafusion/functions/benches/date_trunc.rs +++ b/datafusion/functions/benches/date_trunc.rs @@ -20,12 +20,13 @@ extern crate criterion; use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_common::ScalarValue; use rand::rngs::ThreadRng; use rand::Rng; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::date_trunc; fn timestamps(rng: &mut ThreadRng) -> TimestampSecondArray { @@ -46,11 +47,15 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Scalar(ScalarValue::Utf8(Some("minute".to_string()))); let timestamps = ColumnarValue::Array(timestamps_array); let udf = date_trunc(); - + let return_type = &udf.return_type(&[timestamps.data_type()]).unwrap(); b.iter(|| { black_box( - udf.invoke_batch(&[precision.clone(), timestamps.clone()], batch_len) - .expect("date_trunc should work on valid values"), + udf.invoke_with_args(ScalarFunctionArgs { + args: vec![precision.clone(), timestamps.clone()], + number_rows: batch_len, + return_type, + }) + .expect("date_trunc should work on valid values"), ) }) }); diff --git a/datafusion/functions/benches/encoding.rs b/datafusion/functions/benches/encoding.rs index e37842a62b4a..cf8f8d2fd62c 100644 --- a/datafusion/functions/benches/encoding.rs +++ b/datafusion/functions/benches/encoding.rs @@ -17,9 +17,10 @@ extern crate criterion; +use arrow::datatypes::DataType; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::encoding; use std::sync::Arc; @@ -29,35 +30,49 @@ fn criterion_benchmark(c: &mut Criterion) { let str_array = Arc::new(create_string_array_with_len::(size, 0.2, 32)); c.bench_function(&format!("base64_decode/{size}"), |b| { let method = ColumnarValue::Scalar("base64".into()); - // TODO: use invoke_with_args let encoded = encoding::encode() - .invoke_batch( - &[ColumnarValue::Array(str_array.clone()), method.clone()], - size, - ) + .invoke_with_args(ScalarFunctionArgs { + args: vec![ColumnarValue::Array(str_array.clone()), method.clone()], + number_rows: size, + return_type: &DataType::Utf8, + }) .unwrap(); let args = vec![encoded, method]; b.iter(|| { - // TODO use invoke_with_args - black_box(decode.invoke_batch(&args, size).unwrap()) + black_box( + decode + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); c.bench_function(&format!("hex_decode/{size}"), |b| { let method = ColumnarValue::Scalar("hex".into()); - // TODO use invoke_with_args let encoded = encoding::encode() - .invoke_batch( - &[ColumnarValue::Array(str_array.clone()), method.clone()], - size, - ) + .invoke_with_args(ScalarFunctionArgs { + args: vec![ColumnarValue::Array(str_array.clone()), method.clone()], + number_rows: size, + return_type: &DataType::Utf8, + }) .unwrap(); let args = vec![encoded, method]; b.iter(|| { - // TODO use invoke_with_args - black_box(decode.invoke_batch(&args, size).unwrap()) + black_box( + decode + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); } diff --git a/datafusion/functions/benches/isnan.rs b/datafusion/functions/benches/isnan.rs index 605a520715f4..42004cc24f69 100644 --- a/datafusion/functions/benches/isnan.rs +++ b/datafusion/functions/benches/isnan.rs @@ -17,12 +17,13 @@ extern crate criterion; +use arrow::datatypes::DataType; use arrow::{ datatypes::{Float32Type, Float64Type}, util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::isnan; use std::sync::Arc; @@ -33,16 +34,30 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("isnan f32 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(isnan.invoke_batch(&f32_args, size).unwrap()) + black_box( + isnan + .invoke_with_args(ScalarFunctionArgs { + args: f32_args.clone(), + number_rows: size, + return_type: &DataType::Boolean, + }) + .unwrap(), + ) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("isnan f64 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(isnan.invoke_batch(&f64_args, size).unwrap()) + black_box( + isnan + .invoke_with_args(ScalarFunctionArgs { + args: f64_args.clone(), + number_rows: size, + return_type: &DataType::Boolean, + }) + .unwrap(), + ) }) }); } diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 48fb6fbed9c3..1df2026ea4b5 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -17,12 +17,13 @@ extern crate criterion; +use arrow::datatypes::DataType; use arrow::{ datatypes::{Float32Type, Float64Type}, util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::iszero; use std::sync::Arc; @@ -34,8 +35,15 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("iszero f32 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(iszero.invoke_batch(&f32_args, batch_len).unwrap()) + black_box( + iszero + .invoke_with_args(ScalarFunctionArgs { + args: f32_args.clone(), + number_rows: batch_len, + return_type: &DataType::Boolean, + }) + .unwrap(), + ) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); @@ -44,7 +52,15 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function(&format!("iszero f64 array: {}", size), |b| { b.iter(|| { // TODO use invoke_with_args - black_box(iszero.invoke_batch(&f64_args, batch_len).unwrap()) + black_box( + iszero + .invoke_with_args(ScalarFunctionArgs { + args: f64_args.clone(), + number_rows: batch_len, + return_type: &DataType::Boolean, + }) + .unwrap(), + ) }) }); } diff --git a/datafusion/functions/benches/make_date.rs b/datafusion/functions/benches/make_date.rs index d9309bcd3db2..8dd7a7a59773 100644 --- a/datafusion/functions/benches/make_date.rs +++ b/datafusion/functions/benches/make_date.rs @@ -20,12 +20,13 @@ extern crate criterion; use std::sync::Arc; use arrow::array::{Array, ArrayRef, Int32Array}; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::rngs::ThreadRng; use rand::Rng; use datafusion_common::ScalarValue; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::make_date; fn years(rng: &mut ThreadRng) -> Int32Array { @@ -64,13 +65,13 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { - // TODO use invoke_with_args black_box( make_date() - .invoke_batch( - &[years.clone(), months.clone(), days.clone()], - batch_len, - ) + .invoke_with_args(ScalarFunctionArgs { + args: vec![years.clone(), months.clone(), days.clone()], + number_rows: batch_len, + return_type: &DataType::Date32, + }) .expect("make_date should work on valid values"), ) }) @@ -85,13 +86,13 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(Arc::new(days(&mut rng)) as ArrayRef); b.iter(|| { - // TODO use invoke_with_args black_box( make_date() - .invoke_batch( - &[year.clone(), months.clone(), days.clone()], - batch_len, - ) + .invoke_with_args(ScalarFunctionArgs { + args: vec![year.clone(), months.clone(), days.clone()], + number_rows: batch_len, + return_type: &DataType::Date32, + }) .expect("make_date should work on valid values"), ) }) @@ -106,10 +107,13 @@ fn criterion_benchmark(c: &mut Criterion) { let days = ColumnarValue::Array(day_arr); b.iter(|| { - // TODO use invoke_with_args black_box( make_date() - .invoke_batch(&[year.clone(), month.clone(), days.clone()], batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: vec![year.clone(), month.clone(), days.clone()], + number_rows: batch_len, + return_type: &DataType::Date32, + }) .expect("make_date should work on valid values"), ) }) @@ -121,10 +125,13 @@ fn criterion_benchmark(c: &mut Criterion) { let day = ColumnarValue::Scalar(ScalarValue::Int32(Some(26))); b.iter(|| { - // TODO use invoke_with_args black_box( make_date() - .invoke_batch(&[year.clone(), month.clone(), day.clone()], 1) + .invoke_with_args(ScalarFunctionArgs { + args: vec![year.clone(), month.clone(), day.clone()], + number_rows: 1, + return_type: &DataType::Date32, + }) .expect("make_date should work on valid values"), ) }) diff --git a/datafusion/functions/benches/nullif.rs b/datafusion/functions/benches/nullif.rs index e29fd03aa819..9096c976bf31 100644 --- a/datafusion/functions/benches/nullif.rs +++ b/datafusion/functions/benches/nullif.rs @@ -17,10 +17,11 @@ extern crate criterion; +use arrow::datatypes::DataType; use arrow::util::bench_util::create_string_array_with_len; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_common::ScalarValue; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::core::nullif; use std::sync::Arc; @@ -34,8 +35,15 @@ fn criterion_benchmark(c: &mut Criterion) { ]; c.bench_function(&format!("nullif scalar array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(nullif.invoke_batch(&args, size).unwrap()) + black_box( + nullif + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); } diff --git a/datafusion/functions/benches/pad.rs b/datafusion/functions/benches/pad.rs index 6f267b350a35..f78a53fbee19 100644 --- a/datafusion/functions/benches/pad.rs +++ b/datafusion/functions/benches/pad.rs @@ -16,12 +16,12 @@ // under the License. use arrow::array::{ArrayRef, ArrowPrimitiveType, OffsetSizeTrait, PrimitiveArray}; -use arrow::datatypes::Int64Type; +use arrow::datatypes::{DataType, Int64Type}; use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode::{lpad, rpad}; use rand::distributions::{Distribution, Uniform}; use rand::Rng; @@ -102,24 +102,45 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { - // TODO use invoke_with_args - criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) + criterion::black_box( + lpad() + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { - // TODO use invoke_with_args - criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) + criterion::black_box( + lpad() + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::LargeUtf8, + }) + .unwrap(), + ) }) }); let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { - // TODO use invoke_with_args - criterion::black_box(lpad().invoke_batch(&args, size).unwrap()) + criterion::black_box( + lpad() + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); @@ -130,16 +151,30 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("utf8 type", size), |b| { b.iter(|| { - // TODO use invoke_with_args - criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) + criterion::black_box( + rpad() + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); let args = create_args::(size, 32, false); group.bench_function(BenchmarkId::new("largeutf8 type", size), |b| { b.iter(|| { - // TODO use invoke_with_args - criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) + criterion::black_box( + rpad() + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::LargeUtf8, + }) + .unwrap(), + ) }) }); @@ -147,8 +182,15 @@ fn criterion_benchmark(c: &mut Criterion) { let args = create_args::(size, 32, true); group.bench_function(BenchmarkId::new("stringview type", size), |b| { b.iter(|| { - // TODO use invoke_with_args - criterion::black_box(rpad().invoke_batch(&args, size).unwrap()) + criterion::black_box( + rpad() + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8, + }) + .unwrap(), + ) }) }); diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs index bc20e0ff11c1..78ebf23e02e0 100644 --- a/datafusion/functions/benches/random.rs +++ b/datafusion/functions/benches/random.rs @@ -17,8 +17,9 @@ extern crate criterion; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ScalarUDFImpl; +use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl}; use datafusion_functions::math::random::RandomFunc; fn criterion_benchmark(c: &mut Criterion) { @@ -29,8 +30,15 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("random_1M_rows_batch_8192", |b| { b.iter(|| { for _ in 0..iterations { - #[allow(deprecated)] // TODO: migrate to invoke_with_args - black_box(random_func.invoke_batch(&[], 8192).unwrap()); + black_box( + random_func + .invoke_with_args(ScalarFunctionArgs { + args: vec![], + number_rows: 8192, + return_type: &DataType::Float64, + }) + .unwrap(), + ); } }) }); @@ -40,8 +48,15 @@ fn criterion_benchmark(c: &mut Criterion) { c.bench_function("random_1M_rows_batch_128", |b| { b.iter(|| { for _ in 0..iterations_128 { - #[allow(deprecated)] // TODO: migrate to invoke_with_args - black_box(random_func.invoke_batch(&[], 128).unwrap()); + black_box( + random_func + .invoke_with_args(ScalarFunctionArgs { + args: vec![], + number_rows: 128, + return_type: &DataType::Float64, + }) + .unwrap(), + ); } }) }); diff --git a/datafusion/functions/benches/reverse.rs b/datafusion/functions/benches/reverse.rs index 889ca59e2a14..d61f8fb80517 100644 --- a/datafusion/functions/benches/reverse.rs +++ b/datafusion/functions/benches/reverse.rs @@ -18,7 +18,9 @@ extern crate criterion; mod helper; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::ScalarFunctionArgs; use helper::gen_string_array; fn criterion_benchmark(c: &mut Criterion) { @@ -42,8 +44,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("reverse_StringArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(reverse.invoke_batch(&args_string_ascii, N_ROWS)) + black_box(reverse.invoke_with_args(ScalarFunctionArgs { + args: args_string_ascii.clone(), + number_rows: N_ROWS, + return_type: &DataType::Utf8, + })) }) }, ); @@ -58,8 +63,11 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(reverse.invoke_batch(&args_string_utf8, N_ROWS)) + black_box(reverse.invoke_with_args(ScalarFunctionArgs { + args: args_string_utf8.clone(), + number_rows: N_ROWS, + return_type: &DataType::Utf8, + })) }) }, ); @@ -76,8 +84,11 @@ fn criterion_benchmark(c: &mut Criterion) { &format!("reverse_StringViewArray_ascii_str_len_{}", str_len), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(reverse.invoke_batch(&args_string_view_ascii, N_ROWS)) + black_box(reverse.invoke_with_args(ScalarFunctionArgs { + args: args_string_view_ascii.clone(), + number_rows: N_ROWS, + return_type: &DataType::Utf8, + })) }) }, ); @@ -92,8 +103,11 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(reverse.invoke_batch(&args_string_view_utf8, N_ROWS)) + black_box(reverse.invoke_with_args(ScalarFunctionArgs { + args: args_string_view_utf8.clone(), + number_rows: N_ROWS, + return_type: &DataType::Utf8, + })) }) }, ); diff --git a/datafusion/functions/benches/signum.rs b/datafusion/functions/benches/signum.rs index a51b2ebe5ab7..01939fad5f34 100644 --- a/datafusion/functions/benches/signum.rs +++ b/datafusion/functions/benches/signum.rs @@ -17,12 +17,13 @@ extern crate criterion; +use arrow::datatypes::DataType; use arrow::{ datatypes::{Float32Type, Float64Type}, util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::signum; use std::sync::Arc; @@ -34,8 +35,15 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("signum f32 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(signum.invoke_batch(&f32_args, batch_len).unwrap()) + black_box( + signum + .invoke_with_args(ScalarFunctionArgs { + args: f32_args.clone(), + number_rows: batch_len, + return_type: &DataType::Float32, + }) + .unwrap(), + ) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); @@ -44,8 +52,15 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("signum f64 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(signum.invoke_batch(&f64_args, batch_len).unwrap()) + black_box( + signum + .invoke_with_args(ScalarFunctionArgs { + args: f64_args.clone(), + number_rows: batch_len, + return_type: &DataType::Float64, + }) + .unwrap(), + ) }) }); } diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 8b8e8dbc4279..8e3bff0f7e2d 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -18,11 +18,12 @@ extern crate criterion; use arrow::array::{ArrayRef, Int64Array, OffsetSizeTrait}; +use arrow::datatypes::DataType; use arrow::util::bench_util::{ create_string_array_with_len, create_string_view_array_with_len, }; use criterion::{black_box, criterion_group, criterion_main, Criterion, SamplingMode}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode; use std::sync::Arc; @@ -110,7 +111,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -121,7 +126,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -132,7 +141,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -155,7 +168,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -169,7 +186,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -183,7 +204,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -206,7 +231,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -220,7 +249,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); @@ -234,7 +267,11 @@ fn criterion_benchmark(c: &mut Criterion) { |b| { b.iter(|| { // TODO use invoke_with_args - black_box(substr.invoke_batch(&args, size)) + black_box(substr.invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: size, + return_type: &DataType::Utf8View, + })) }) }, ); diff --git a/datafusion/functions/benches/substr_index.rs b/datafusion/functions/benches/substr_index.rs index 1ea8e2606f0d..b1c1c3c34a95 100644 --- a/datafusion/functions/benches/substr_index.rs +++ b/datafusion/functions/benches/substr_index.rs @@ -20,12 +20,13 @@ extern crate criterion; use std::sync::Arc; use arrow::array::{ArrayRef, Int64Array, StringArray}; +use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use rand::distributions::{Alphanumeric, Uniform}; use rand::prelude::Distribution; use rand::Rng; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::unicode::substr_index; struct Filter { @@ -89,12 +90,15 @@ fn criterion_benchmark(c: &mut Criterion) { let delimiters = ColumnarValue::Array(Arc::new(delimiters) as ArrayRef); let counts = ColumnarValue::Array(Arc::new(counts) as ArrayRef); - let args = [strings, delimiters, counts]; + let args = vec![strings, delimiters, counts]; b.iter(|| { - #[allow(deprecated)] // TODO: invoke_with_args black_box( substr_index() - .invoke_batch(&args, batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: batch_len, + return_type: &DataType::Utf8, + }) .expect("substr_index should work on valid values"), ) }) diff --git a/datafusion/functions/benches/to_char.rs b/datafusion/functions/benches/to_char.rs index 72eae45b1e1b..6f20a20dc219 100644 --- a/datafusion/functions/benches/to_char.rs +++ b/datafusion/functions/benches/to_char.rs @@ -20,6 +20,7 @@ extern crate criterion; use std::sync::Arc; use arrow::array::{ArrayRef, Date32Array, StringArray}; +use arrow::datatypes::DataType; use chrono::prelude::*; use chrono::TimeDelta; use criterion::{black_box, criterion_group, criterion_main, Criterion}; @@ -29,7 +30,7 @@ use rand::Rng; use datafusion_common::ScalarValue; use datafusion_common::ScalarValue::TimestampNanosecond; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_char; fn random_date_in_range( @@ -88,10 +89,13 @@ fn criterion_benchmark(c: &mut Criterion) { let patterns = ColumnarValue::Array(Arc::new(patterns(&mut rng)) as ArrayRef); b.iter(|| { - // TODO use invoke_with_args black_box( to_char() - .invoke_batch(&[data.clone(), patterns.clone()], batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: vec![data.clone(), patterns.clone()], + number_rows: batch_len, + return_type: &DataType::Utf8, + }) .expect("to_char should work on valid values"), ) }) @@ -106,10 +110,13 @@ fn criterion_benchmark(c: &mut Criterion) { ColumnarValue::Scalar(ScalarValue::Utf8(Some("%Y-%m-%d".to_string()))); b.iter(|| { - // TODO use invoke_with_args black_box( to_char() - .invoke_batch(&[data.clone(), patterns.clone()], batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: vec![data.clone(), patterns.clone()], + number_rows: batch_len, + return_type: &DataType::Utf8, + }) .expect("to_char should work on valid values"), ) }) @@ -130,10 +137,13 @@ fn criterion_benchmark(c: &mut Criterion) { ))); b.iter(|| { - // TODO use invoke_with_args black_box( to_char() - .invoke_batch(&[data.clone(), pattern.clone()], 1) + .invoke_with_args(ScalarFunctionArgs { + args: vec![data.clone(), pattern.clone()], + number_rows: 1, + return_type: &DataType::Utf8, + }) .expect("to_char should work on valid values"), ) }) diff --git a/datafusion/functions/benches/to_timestamp.rs b/datafusion/functions/benches/to_timestamp.rs index 9f5f6661f998..aec56697691f 100644 --- a/datafusion/functions/benches/to_timestamp.rs +++ b/datafusion/functions/benches/to_timestamp.rs @@ -22,10 +22,10 @@ use std::sync::Arc; use arrow::array::builder::StringBuilder; use arrow::array::{Array, ArrayRef, StringArray}; use arrow::compute::cast; -use arrow::datatypes::DataType; +use arrow::datatypes::{DataType, TimeUnit}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::datetime::to_timestamp; fn data() -> StringArray { @@ -109,16 +109,20 @@ fn data_with_formats() -> (StringArray, StringArray, StringArray, StringArray) { ) } fn criterion_benchmark(c: &mut Criterion) { + let return_type = &DataType::Timestamp(TimeUnit::Nanosecond, None); c.bench_function("to_timestamp_no_formats_utf8", |b| { let arr_data = data(); let batch_len = arr_data.len(); let string_array = ColumnarValue::Array(Arc::new(arr_data) as ArrayRef); b.iter(|| { - // TODO use invoke_with_args black_box( to_timestamp() - .invoke_batch(&[string_array.clone()], batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: vec![string_array.clone()], + number_rows: batch_len, + return_type, + }) .expect("to_timestamp should work on valid values"), ) }) @@ -130,10 +134,13 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { - // TODO use invoke_with_args black_box( to_timestamp() - .invoke_batch(&[string_array.clone()], batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: vec![string_array.clone()], + number_rows: batch_len, + return_type, + }) .expect("to_timestamp should work on valid values"), ) }) @@ -145,10 +152,13 @@ fn criterion_benchmark(c: &mut Criterion) { let string_array = ColumnarValue::Array(Arc::new(data) as ArrayRef); b.iter(|| { - // TODO use invoke_with_args black_box( to_timestamp() - .invoke_batch(&[string_array.clone()], batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: vec![string_array.clone()], + number_rows: batch_len, + return_type, + }) .expect("to_timestamp should work on valid values"), ) }) @@ -158,17 +168,20 @@ fn criterion_benchmark(c: &mut Criterion) { let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); - let args = [ + let args = vec![ ColumnarValue::Array(Arc::new(inputs) as ArrayRef), ColumnarValue::Array(Arc::new(format1) as ArrayRef), ColumnarValue::Array(Arc::new(format2) as ArrayRef), ColumnarValue::Array(Arc::new(format3) as ArrayRef), ]; b.iter(|| { - // TODO use invoke_with_args black_box( to_timestamp() - .invoke_batch(&args.clone(), batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: batch_len, + return_type, + }) .expect("to_timestamp should work on valid values"), ) }) @@ -178,7 +191,7 @@ fn criterion_benchmark(c: &mut Criterion) { let (inputs, format1, format2, format3) = data_with_formats(); let batch_len = inputs.len(); - let args = [ + let args = vec![ ColumnarValue::Array( Arc::new(cast(&inputs, &DataType::LargeUtf8).unwrap()) as ArrayRef ), @@ -193,10 +206,13 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { - // TODO use invoke_with_args black_box( to_timestamp() - .invoke_batch(&args.clone(), batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: batch_len, + return_type, + }) .expect("to_timestamp should work on valid values"), ) }) @@ -207,7 +223,7 @@ fn criterion_benchmark(c: &mut Criterion) { let batch_len = inputs.len(); - let args = [ + let args = vec![ ColumnarValue::Array( Arc::new(cast(&inputs, &DataType::Utf8View).unwrap()) as ArrayRef ), @@ -222,10 +238,13 @@ fn criterion_benchmark(c: &mut Criterion) { ), ]; b.iter(|| { - // TODO use invoke_with_args black_box( to_timestamp() - .invoke_batch(&args.clone(), batch_len) + .invoke_with_args(ScalarFunctionArgs { + args: args.clone(), + number_rows: batch_len, + return_type, + }) .expect("to_timestamp should work on valid values"), ) }) diff --git a/datafusion/functions/benches/trunc.rs b/datafusion/functions/benches/trunc.rs index 83d5b761e809..7fc93921d2e7 100644 --- a/datafusion/functions/benches/trunc.rs +++ b/datafusion/functions/benches/trunc.rs @@ -22,9 +22,10 @@ use arrow::{ util::bench_util::create_primitive_array, }; use criterion::{black_box, criterion_group, criterion_main, Criterion}; -use datafusion_expr::ColumnarValue; +use datafusion_expr::{ColumnarValue, ScalarFunctionArgs}; use datafusion_functions::math::trunc; +use arrow::datatypes::DataType; use std::sync::Arc; fn criterion_benchmark(c: &mut Criterion) { @@ -34,16 +35,30 @@ fn criterion_benchmark(c: &mut Criterion) { let f32_args = vec![ColumnarValue::Array(f32_array)]; c.bench_function(&format!("trunc f32 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(trunc.invoke_batch(&f32_args, size).unwrap()) + black_box( + trunc + .invoke_with_args(ScalarFunctionArgs { + args: f32_args.clone(), + number_rows: size, + return_type: &DataType::Float32, + }) + .unwrap(), + ) }) }); let f64_array = Arc::new(create_primitive_array::(size, 0.2)); let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("trunc f64 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args - black_box(trunc.invoke_batch(&f64_args, size).unwrap()) + black_box( + trunc + .invoke_with_args(ScalarFunctionArgs { + args: f64_args.clone(), + number_rows: size, + return_type: &DataType::Float64, + }) + .unwrap(), + ) }) }); } From 80e30b1512f94fd96b4ae397a3c1662416877743 Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Fri, 7 Mar 2025 15:33:52 +0100 Subject: [PATCH 5/7] fix two more implementations - now all that's left is in udf.rs --- .../user_defined/user_defined_scalar_functions.rs | 8 ++------ .../optimizer/src/eliminate_group_by_constant.rs | 10 +++------- 2 files changed, 5 insertions(+), 13 deletions(-) diff --git a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs index 43e7ec9e45e4..57bac9c6dfca 100644 --- a/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs +++ b/datafusion/core/tests/user_defined/user_defined_scalar_functions.rs @@ -1228,12 +1228,8 @@ impl ScalarUDFImpl for MyRegexUdf { } } - fn invoke_batch( - &self, - args: &[ColumnarValue], - _number_rows: usize, - ) -> Result { - match args { + fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result { + match args.args.as_slice() { [ColumnarValue::Scalar(ScalarValue::Utf8(value))] => { Ok(ColumnarValue::Scalar(ScalarValue::Boolean( self.matches(value.as_deref()), diff --git a/datafusion/optimizer/src/eliminate_group_by_constant.rs b/datafusion/optimizer/src/eliminate_group_by_constant.rs index 1213c8ffb368..7e252d6dcea0 100644 --- a/datafusion/optimizer/src/eliminate_group_by_constant.rs +++ b/datafusion/optimizer/src/eliminate_group_by_constant.rs @@ -121,8 +121,8 @@ mod tests { use datafusion_common::Result; use datafusion_expr::expr::ScalarFunction; use datafusion_expr::{ - col, lit, ColumnarValue, LogicalPlanBuilder, ScalarUDF, ScalarUDFImpl, Signature, - TypeSignature, + col, lit, ColumnarValue, LogicalPlanBuilder, ScalarFunctionArgs, ScalarUDF, + ScalarUDFImpl, Signature, TypeSignature, }; use datafusion_functions_aggregate::expr_fn::count; @@ -155,11 +155,7 @@ mod tests { fn return_type(&self, _args: &[DataType]) -> Result { Ok(DataType::Int32) } - fn invoke_batch( - &self, - _args: &[ColumnarValue], - _number_rows: usize, - ) -> Result { + fn invoke_with_args(&self, _args: ScalarFunctionArgs) -> Result { unimplemented!() } } From e1e53e75e1fad123ac520035891505bdfc1552bb Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Fri, 7 Mar 2025 15:59:08 +0100 Subject: [PATCH 6/7] fix clippy --- datafusion/functions/benches/date_trunc.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/functions/benches/date_trunc.rs b/datafusion/functions/benches/date_trunc.rs index 0503064de011..b7efe7cc8d0a 100644 --- a/datafusion/functions/benches/date_trunc.rs +++ b/datafusion/functions/benches/date_trunc.rs @@ -20,7 +20,6 @@ extern crate criterion; use std::sync::Arc; use arrow::array::{Array, ArrayRef, TimestampSecondArray}; -use arrow::datatypes::DataType; use criterion::{black_box, criterion_group, criterion_main, Criterion}; use datafusion_common::ScalarValue; use rand::rngs::ThreadRng; From a0cacae3895a5d8bdf2e710b80b9551712f9c26b Mon Sep 17 00:00:00 2001 From: Arttu Voutilainen Date: Fri, 7 Mar 2025 16:30:02 +0100 Subject: [PATCH 7/7] cleanup some leftover comments --- datafusion/functions/benches/iszero.rs | 1 - datafusion/functions/benches/substr.rs | 9 --------- 2 files changed, 10 deletions(-) diff --git a/datafusion/functions/benches/iszero.rs b/datafusion/functions/benches/iszero.rs index 1df2026ea4b5..9e5f6a84804b 100644 --- a/datafusion/functions/benches/iszero.rs +++ b/datafusion/functions/benches/iszero.rs @@ -51,7 +51,6 @@ fn criterion_benchmark(c: &mut Criterion) { let f64_args = vec![ColumnarValue::Array(f64_array)]; c.bench_function(&format!("iszero f64 array: {}", size), |b| { b.iter(|| { - // TODO use invoke_with_args black_box( iszero .invoke_with_args(ScalarFunctionArgs { diff --git a/datafusion/functions/benches/substr.rs b/datafusion/functions/benches/substr.rs index 8e3bff0f7e2d..80ab70ef71b0 100644 --- a/datafusion/functions/benches/substr.rs +++ b/datafusion/functions/benches/substr.rs @@ -110,7 +110,6 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_string_view [size={}, strlen={}]", size, len), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -125,7 +124,6 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_string [size={}, strlen={}]", size, len), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -140,7 +138,6 @@ fn criterion_benchmark(c: &mut Criterion) { format!("substr_large_string [size={}, strlen={}]", size, len), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -167,7 +164,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -185,7 +181,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -203,7 +198,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -230,7 +224,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -248,7 +241,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size, @@ -266,7 +258,6 @@ fn criterion_benchmark(c: &mut Criterion) { ), |b| { b.iter(|| { - // TODO use invoke_with_args black_box(substr.invoke_with_args(ScalarFunctionArgs { args: args.clone(), number_rows: size,