From f46ce1eeedd0777cf1f9bcacef16c56325e95150 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 20 Feb 2025 15:54:09 +0100 Subject: [PATCH 1/3] Synchronize criterion version across workspace Use same version everywhere. This means update for some crates. --- Cargo.toml | 1 + datafusion/core/Cargo.toml | 2 +- datafusion/functions-aggregate-common/Cargo.toml | 2 +- datafusion/functions-aggregate/Cargo.toml | 2 +- datafusion/functions-nested/Cargo.toml | 2 +- datafusion/functions/Cargo.toml | 2 +- datafusion/physical-expr/Cargo.toml | 2 +- datafusion/physical-plan/Cargo.toml | 2 +- 8 files changed, 8 insertions(+), 7 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index ccf3f02a2fde..566287df265f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -95,6 +95,7 @@ async-trait = "0.1.73" bigdecimal = "0.4.7" bytes = "1.10" chrono = { version = "0.4.38", default-features = false } +criterion = "0.5.1" ctor = "0.2.9" dashmap = "6.0.1" datafusion = { path = "datafusion/core", version = "45.0.0", default-features = false } diff --git a/datafusion/core/Cargo.toml b/datafusion/core/Cargo.toml index e968967a2e75..87a37248a31d 100644 --- a/datafusion/core/Cargo.toml +++ b/datafusion/core/Cargo.toml @@ -131,7 +131,7 @@ zstd = { version = "0.13", optional = true, default-features = false } [dev-dependencies] async-trait = { workspace = true } -criterion = { version = "0.5", features = ["async_tokio"] } +criterion = { workspace = true, features = ["async_tokio"] } ctor = { workspace = true } dashmap = "6.1.0" datafusion-doc = { workspace = true } diff --git a/datafusion/functions-aggregate-common/Cargo.toml b/datafusion/functions-aggregate-common/Cargo.toml index 72c8a58a7e45..cf065ca1cb17 100644 --- a/datafusion/functions-aggregate-common/Cargo.toml +++ b/datafusion/functions-aggregate-common/Cargo.toml @@ -44,7 +44,7 @@ datafusion-expr-common = { workspace = true } datafusion-physical-expr-common = { workspace = true } [dev-dependencies] -criterion = "0.5" +criterion = { workspace = true } rand = { workspace = true } [[bench]] diff --git a/datafusion/functions-aggregate/Cargo.toml b/datafusion/functions-aggregate/Cargo.toml index 38052835f197..ec6e6b633bb8 100644 --- a/datafusion/functions-aggregate/Cargo.toml +++ b/datafusion/functions-aggregate/Cargo.toml @@ -54,7 +54,7 @@ paste = "1.0.14" [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } -criterion = "0.5" +criterion = { workspace = true } rand = { workspace = true } [[bench]] diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index b33b415a868d..609cda0c16b5 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -53,7 +53,7 @@ log = { workspace = true } paste = "1.0.14" [dev-dependencies] -criterion = { version = "0.5", features = ["async_tokio"] } +criterion = { workspace = true, features = ["async_tokio"] } rand = "0.8.5" [[bench]] diff --git a/datafusion/functions/Cargo.toml b/datafusion/functions/Cargo.toml index 788bc67d970c..b44127d6a1b7 100644 --- a/datafusion/functions/Cargo.toml +++ b/datafusion/functions/Cargo.toml @@ -89,7 +89,7 @@ uuid = { version = "1.13", features = ["v4"], optional = true } [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } -criterion = "0.5" +criterion = { workspace = true } rand = { workspace = true } tokio = { workspace = true, features = ["macros", "rt", "sync"] } diff --git a/datafusion/physical-expr/Cargo.toml b/datafusion/physical-expr/Cargo.toml index a3321f493388..72baa0db00a2 100644 --- a/datafusion/physical-expr/Cargo.toml +++ b/datafusion/physical-expr/Cargo.toml @@ -55,7 +55,7 @@ petgraph = "0.7.1" [dev-dependencies] arrow = { workspace = true, features = ["test_utils"] } -criterion = "0.5" +criterion = { workspace = true } datafusion-functions = { workspace = true } rand = { workspace = true } rstest = { workspace = true } diff --git a/datafusion/physical-plan/Cargo.toml b/datafusion/physical-plan/Cargo.toml index 676663114702..ae0ea60e1a3e 100644 --- a/datafusion/physical-plan/Cargo.toml +++ b/datafusion/physical-plan/Cargo.toml @@ -65,7 +65,7 @@ pin-project-lite = "^0.2.7" tokio = { workspace = true } [dev-dependencies] -criterion = { version = "0.5", features = ["async_futures"] } +criterion = { workspace = true, features = ["async_futures"] } datafusion-functions-aggregate = { workspace = true } datafusion-functions-window = { workspace = true } rand = { workspace = true } From e7af47db7788a56cc014a39668cdf41a486f1500 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 20 Feb 2025 15:55:47 +0100 Subject: [PATCH 2/3] Synchronize and update rand version across workspace Use same version everywhere and update. --- Cargo.toml | 2 +- datafusion/functions-nested/Cargo.toml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 566287df265f..b6098a636954 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -145,7 +145,7 @@ pbjson-types = "0.7" # Should match arrow-flight's version of prost. prost = "0.13.1" prost-derive = "0.13.1" -rand = "0.8" +rand = "0.8.5" recursive = "0.1.1" regex = "1.8" rstest = "0.24.0" diff --git a/datafusion/functions-nested/Cargo.toml b/datafusion/functions-nested/Cargo.toml index 609cda0c16b5..9a7b1f460ef5 100644 --- a/datafusion/functions-nested/Cargo.toml +++ b/datafusion/functions-nested/Cargo.toml @@ -54,7 +54,7 @@ paste = "1.0.14" [dev-dependencies] criterion = { workspace = true, features = ["async_tokio"] } -rand = "0.8.5" +rand = { workspace = true } [[bench]] harness = false From e23a4af44d2e82ec4a0d8aefadae7c7d9069c043 Mon Sep 17 00:00:00 2001 From: Piotr Findeisen Date: Thu, 20 Feb 2025 18:57:02 +0100 Subject: [PATCH 3/3] Avoid linear search for character in bench helper Get the char in O(1) instead of O(n). --- datafusion/functions/benches/helper.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datafusion/functions/benches/helper.rs b/datafusion/functions/benches/helper.rs index c7c405bc4696..0dbb4b0027d4 100644 --- a/datafusion/functions/benches/helper.rs +++ b/datafusion/functions/benches/helper.rs @@ -35,7 +35,7 @@ pub fn gen_string_array( let rng_ref = &mut rng; let corpus = "DataFusionДатаФусион数据融合📊🔥"; // includes utf8 encoding with 1~4 bytes - let corpus_char_count = corpus.chars().count(); + let corpus = corpus.chars().collect::>(); let mut output_string_vec: Vec> = Vec::with_capacity(n_rows); for _ in 0..n_rows { @@ -46,8 +46,7 @@ pub fn gen_string_array( // Generate random UTF8 string let mut generated_string = String::with_capacity(str_len_chars); for _ in 0..str_len_chars { - let idx = rng_ref.gen_range(0..corpus_char_count); - let char = corpus.chars().nth(idx).unwrap(); + let char = corpus[rng_ref.gen_range(0..corpus.len())]; generated_string.push(char); } output_string_vec.push(Some(generated_string));