diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 2b3f80fc930b..9ef020b772f0 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -156,3 +156,8 @@ required-features = ["unicode_expressions"] harness = false name = "repeat" required-features = ["string_expressions"] + +[[bench]] +harness = false +name = "random" +required-features = ["math_expressions"] diff --git a/datafusion/functions/benches/random.rs b/datafusion/functions/benches/random.rs new file mode 100644 index 000000000000..a721836bb68c --- /dev/null +++ b/datafusion/functions/benches/random.rs @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +extern crate criterion; + +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use datafusion_expr::ScalarUDFImpl; +use datafusion_functions::math::random::RandomFunc; + +fn criterion_benchmark(c: &mut Criterion) { + let random_func = RandomFunc::new(); + + // Benchmark to evaluate 1M rows in batch size 8192 + let iterations = 1_000_000 / 8192; // Calculate how many iterations are needed to reach approximately 1M rows + c.bench_function("random_1M_rows_batch_8192", |b| { + b.iter(|| { + for _ in 0..iterations { + black_box(random_func.invoke_no_args(8192).unwrap()); + } + }) + }); + + // Benchmark to evaluate 1M rows in batch size 128 + let iterations_128 = 1_000_000 / 128; // Calculate how many iterations are needed to reach approximately 1M rows with batch size 128 + c.bench_function("random_1M_rows_batch_128", |b| { + b.iter(|| { + for _ in 0..iterations_128 { + black_box(random_func.invoke_no_args(128).unwrap()); + } + }) + }); +} + +criterion_group!(benches, criterion_benchmark); +criterion_main!(benches); diff --git a/datafusion/functions/src/math/random.rs b/datafusion/functions/src/math/random.rs index b5eece212a3b..20591a02a930 100644 --- a/datafusion/functions/src/math/random.rs +++ b/datafusion/functions/src/math/random.rs @@ -69,8 +69,11 @@ impl ScalarUDFImpl for RandomFunc { fn invoke_no_args(&self, num_rows: usize) -> Result { let mut rng = thread_rng(); - let values = std::iter::repeat_with(|| rng.gen_range(0.0..1.0)).take(num_rows); - let array = Float64Array::from_iter_values(values); + let mut values = vec![0.0; num_rows]; + // Equivalent to set each element with rng.gen_range(0.0..1.0), but more efficient + rng.fill(&mut values[..]); + let array = Float64Array::from(values); + Ok(ColumnarValue::Array(Arc::new(array))) } }