Merge branch 'next' into krushimir/tests

# Conflicts: # src/merkle/smt/full/mod.rs
0xPolygonMiden · Feb 26, 2025 · 7346c3f · 7346c3f
2 parents af2babb + 1e87cd6
commit 7346c3f
Show file tree

Hide file tree

Showing 5 changed files with 77 additions and 37 deletions.
diff --git a/benches/README.md b/benches/README.md
@@ -1,4 +1,6 @@
-# Miden VM Hash Functions
+# Benchmarks
+
+## Hash Functions
 In the Miden VM, we make use of different hash functions. Some of these are "traditional" hash functions, like `BLAKE3`, which are optimized for out-of-STARK performance, while others are algebraic hash functions, like `Rescue Prime`, and are more optimized for a better performance inside the STARK. In what follows, we benchmark several such hash functions and compare against other constructions that are used by other proving systems. More precisely, we benchmark:
 
 * **BLAKE3** as specified [here](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf) and implemented [here](https://github.com/BLAKE3-team/BLAKE3) (with a wrapper exposed via this crate).
@@ -8,13 +10,10 @@ In the Miden VM, we make use of different hash functions. Some of these are "tra
 * **Rescue Prime Optimized (RPO)** as specified [here](https://eprint.iacr.org/2022/1577) and implemented in this crate.
 * **Rescue Prime Extended (RPX)** a variant of the [xHash](https://eprint.iacr.org/2023/1045) hash function as implemented in this crate.
 
-## Comparison and Instructions
-
-### Comparison
 We benchmark the above hash functions using two scenarios. The first is a 2-to-1 $(a,b)\mapsto h(a,b)$ hashing where both $a$, $b$ and $h(a,b)$ are the digests corresponding to each of the hash functions.
 The second scenario is that of sequential hashing where we take a sequence of length $100$ field elements and hash these to produce a single digest. The digests are $4$ field elements in a prime field with modulus $2^{64} - 2^{32} + 1$ (i.e., 32 bytes) for Poseidon, Rescue Prime and RPO, and an array `[u8; 32]` for SHA3 and BLAKE3.
 
-#### Scenario 1: 2-to-1 hashing `h(a,b)`
+### Scenario 1: 2-to-1 hashing `h(a,b)`
 
 | Function            | BLAKE3 | SHA3    | Poseidon  | Rp64_256  | RPO_256 | RPX_256 |
 | ------------------- | ------ | ------- | --------- | --------- | ------- | ------- |
@@ -27,7 +26,7 @@ The second scenario is that of sequential hashing where we take a sequence of le
 | Intel Core i5-8279U | 68 ns  | 536 ns  |  2.0 µs   |  13.6 µs  | 8.5 µs  | 4.4 µs  |
 | Intel Xeon 8375C    | 67 ns  |         |           |           | 8.2 µs  |         |
 
-#### Scenario 2: Sequential hashing of 100 elements `h([a_0,...,a_99])`
+### Scenario 2: Sequential hashing of 100 elements `h([a_0,...,a_99])`
 
 | Function            | BLAKE3 | SHA3    | Poseidon  | Rp64_256  | RPO_256 | RPX_256 |
 | ------------------- | -------| ------- | --------- | --------- | ------- | ------- |
@@ -44,7 +43,42 @@ Notes:
 - On Graviton 3 and 4, RPO256 and RPX256 are run with SVE acceleration enabled.
 - On AMD EPYC 9R14, RPO256 and RPX256 are run with AVX2 acceleration enabled.
 
-### Instructions
+## Sparse Merkle Tree
+We build cryptographic data structures incorporating these hash functions.
+What follows are benchmarks of operations on sparse Merkle trees (SMTs) which use the above `RPO_256` hash function.
+We perform a batched modification of 1,000 values in a tree with 1,000,000 leaves (with the `smt_hashmaps` feature to use the `hashbrown` crate).
+
+### Scenario 1: SMT Construction (1M pairs)
+
+| Hardware          | Sequential | Concurrent | Improvement |
+| ----------------- | ---------- | ---------- | ----------- |
+| AMD Ryzen 9 7950X | 196 sec    | 15 sec     |  13x        |
+| Apple M1 Air      | 352 sec    | 57 sec     | 6.2x        |
+| Apple M1 Pro      | 351 sec    | 37 sec     | 9.5x        |
+| Apple M4 Max      | 195 sec    | 15 sec     |  13x        |
+
+### Scenario 2: SMT Batched Insertion (1k pairs, 1M leaves)
+
+| Function          | Sequential | Concurrent | Improvement |
+| ----------------- | ---------- | ---------- | ----------- |
+| AMD Ryzen 9 7950X | 201 ms     | 19 ms      |  11x        |
+| Apple M1 Air      | 729 ms     | 406 ms     | 1.8x        |
+| Apple M1 Pro      | 623 ms     | 86 ms      | 7.2x        |
+| Apple M4 Max      | 212 ms     | 28 ms      | 7.6x        |
+
+### Scenario 3: SMT Batched Update (1k pairs, 1M leaves)
+
+| Function          | Sequential | Concurrent | Improvement |
+| ----------------- | ---------- | ---------- | ----------- |
+| AMD Ryzen 9 7950X | 202 ms     | 19 ms      |  11x        |
+| Apple M1 Air      | 691 ms     | 307 ms     | 2.3x        |
+| Apple M1 Pro      | 419 ms     | 56 ms      | 7.5x        |
+| Apple M4 Max      | 218 ms     | 24 ms      | 9.1x        |
+
+Notes:
+- On AMD Ryzen 9 7950X, benchmarks are run with AVX2 acceleration enabled.
+
+## Instructions
 Before you can run the benchmarks, you'll need to make sure you have Rust [installed](https://www.rust-lang.org/tools/install). After that, to run the benchmarks for RPO and BLAKE3, clone the current repository, and from the root directory of the repo run the following:
 
  ```
@@ -56,3 +90,18 @@ To run the benchmarks for Rescue Prime, Poseidon and SHA3, clone the following [
 ```
 cargo bench hash
 ```
+
+To run the benchmarks for SMT operations, run the binary target with the `executable` feature:
+
+```
+cargo run --features=executable
+```
+
+The `concurrent` feature enables the concurrent benchmark, and is enabled by default. To run a sequential benchmark,
+disable the crate's default features:
+
+```
+cargo run --no-default-features --features=executable,smt_hashmaps
+```
+
+The benchmark parameters may also be customized with the `-s`/`--size`, `-i`/`--insertions`, and `-u`/`--updates` options.
diff --git a/src/merkle/smt/full/concurrent/mod.rs b/src/merkle/smt/full/concurrent/mod.rs
@@ -261,6 +261,26 @@ impl Smt {
         })
     }
 
+    /// Constructs a single leaf from an arbitrary amount of key-value pairs.
+    /// Those pairs must all have the same leaf index.
+    fn pairs_to_leaf(mut pairs: Vec<(RpoDigest, Word)>) -> SmtLeaf {
+        assert!(!pairs.is_empty());
+
+        if pairs.len() > 1 {
+            pairs.sort_by(|(key_1, _), (key_2, _)| leaf::cmp_keys(*key_1, *key_2));
+            SmtLeaf::new_multiple(pairs).unwrap()
+        } else {
+            let (key, value) = pairs.pop().unwrap();
+            // TODO: should we ever be constructing empty leaves from pairs?
+            if value == Self::EMPTY_VALUE {
+                let index = Self::key_to_leaf_index(&key);
+                SmtLeaf::new_empty(index)
+            } else {
+                SmtLeaf::new_single(key, value)
+            }
+        }
+    }
+
     /// Computes leaves from a set of key-value pairs and current leaf values.
     /// Derived from `sorted_pairs_to_leaves`
     fn sorted_pairs_to_mutated_subtree_leaves(

diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs
@@ -430,24 +430,6 @@ impl SparseMerkleTree<SMT_DEPTH> for Smt {
     fn path_and_leaf_to_opening(path: MerklePath, leaf: SmtLeaf) -> SmtProof {
         SmtProof::new_unchecked(path, leaf)
     }
-
-    fn pairs_to_leaf(mut pairs: Vec<(RpoDigest, Word)>) -> SmtLeaf {
-        assert!(!pairs.is_empty());
-
-        if pairs.len() > 1 {
-            pairs.sort_by(|(key_1, _), (key_2, _)| leaf::cmp_keys(*key_1, *key_2));
-            SmtLeaf::new_multiple(pairs).unwrap()
-        } else {
-            let (key, value) = pairs.pop().unwrap();
-            // TODO: should we ever be constructing empty leaves from pairs?
-            if value == Self::EMPTY_VALUE {
-                let index = Self::key_to_leaf_index(&key);
-                SmtLeaf::new_empty(index)
-            } else {
-                SmtLeaf::new_single(key, value)
-            }
-        }
-    }
 }
 
 impl Default for Smt {

diff --git a/src/merkle/smt/mod.rs b/src/merkle/smt/mod.rs
@@ -451,10 +451,6 @@ pub(crate) trait SparseMerkleTree<const DEPTH: u8> {
     /// Maps a key to a leaf index
     fn key_to_leaf_index(key: &Self::Key) -> LeafIndex<DEPTH>;
 
-    /// Constructs a single leaf from an arbitrary amount of key-value pairs.
-    /// Those pairs must all have the same leaf index.
-    fn pairs_to_leaf(pairs: Vec<(Self::Key, Self::Value)>) -> Self::Leaf;
-
     /// Maps a (MerklePath, Self::Leaf) to an opening.
     ///
     /// The length `path` is guaranteed to be equal to `DEPTH`

diff --git a/src/merkle/smt/simple/mod.rs b/src/merkle/smt/simple/mod.rs
@@ -1,4 +1,4 @@
-use alloc::{collections::BTreeSet, vec::Vec};
+use alloc::collections::BTreeSet;
 
 use super::{
     super::ValuePath, EmptySubtreeRoots, InnerNode, InnerNodeInfo, InnerNodes, LeafIndex,
@@ -415,11 +415,4 @@ impl<const DEPTH: u8> SparseMerkleTree<DEPTH> for SimpleSmt<DEPTH> {
     fn path_and_leaf_to_opening(path: MerklePath, leaf: Word) -> ValuePath {
         (path, leaf).into()
     }
-
-    fn pairs_to_leaf(mut pairs: Vec<(LeafIndex<DEPTH>, Word)>) -> Word {
-        // SimpleSmt can't have more than one value per key.
-        assert_eq!(pairs.len(), 1);
-        let (_key, value) = pairs.pop().unwrap();
-        value
-    }
 }