feat: implement concurrent Smt construction (#341)

* merkle: add parent() helper function on NodeIndex * smt: add pairs_to_leaf() to trait * smt: add sorted_pairs_to_leaves() and test for it * smt: implement single subtree-8 hashing, w/ benchmarks & tests This will be composed into depth-8-subtree-based computation of entire sparse Merkle trees. * merkle: add a benchmark for constructing 256-balanced trees This is intended for comparison with the benchmarks from the previous commit. This benchmark represents the theoretical perfect-efficiency performance we could possibly (but impractically) get for computing depth-8 sparse Merkle subtrees. * smt: test that SparseMerkleTree::build_subtree() is composable * smt: test that subtree logic can correctly construct an entire tree This commit ensures that `SparseMerkleTree::build_subtree()` can correctly compose into building an entire sparse Merkle tree, without yet getting into potential complications concurrency introduces. * smt: implement test for basic parallelized subtree computation w/ rayon Building on the previous commit, this commit implements a test proving that `SparseMerkleTree::build_subtree()` can be composed into itself not just concurrently, but in parallel, without issue. * smt: add from_raw_parts() to trait interface This commit adds a new required method to the SparseMerkleTree trait, to allow generic construction from pre-computed parts. This will be used to add a generic version of `with_entries()` in a later commit. * smt: add parallel constructors to Smt and SimpleSmt What the previous few commits have been leading up to: SparseMerkleTree now has a function to construct the tree from existing data in parallel. This is significantly faster than the singlethreaded equivalent. Benchmarks incoming! --------- Co-authored-by: krushimir <krushimir@reilabs.co> Co-authored-by: krushimir <kresimir.grofelnik@reilabs.io>
2026-01-09 15:41:30 +01:00 · 2024-12-04 11:54:41 -07:00
parent 1867f842d3
commit b151773b0d
14 changed files with 1194 additions and 6 deletions
--- a/benches/merkle.rs
+++ b/benches/merkle.rs
@@ -0,0 +1,66 @@
+//! Benchmark for building a [`miden_crypto::merkle::MerkleTree`]. This is intended to be compared
+//! with the results from `benches/smt-subtree.rs`, as building a fully balanced Merkle tree with
+//! 256 leaves should indicate the *absolute best* performance we could *possibly* get for building
+//! a depth-8 sparse Merkle subtree, though practically speaking building a fully balanced Merkle
+//! tree will perform better than the sparse version. At the time of this writing (2024/11/24), this
+//! benchmark is about four times more efficient than the equivalent benchmark in
+//! `benches/smt-subtree.rs`.
+use std::{hint, mem, time::Duration};
+
+use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
+use miden_crypto::{merkle::MerkleTree, Felt, Word, ONE};
+use rand_utils::prng_array;
+
+fn balanced_merkle_even(c: &mut Criterion) {
+    c.bench_function("balanced-merkle-even", |b| {
+        b.iter_batched(
+            || {
+                let entries: Vec<Word> =
+                    (0..256).map(|i| [Felt::new(i), ONE, ONE, Felt::new(i)]).collect();
+                assert_eq!(entries.len(), 256);
+                entries
+            },
+            |leaves| {
+                let tree = MerkleTree::new(hint::black_box(leaves)).unwrap();
+                assert_eq!(tree.depth(), 8);
+            },
+            BatchSize::SmallInput,
+        );
+    });
+}
+
+fn balanced_merkle_rand(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+    c.bench_function("balanced-merkle-rand", |b| {
+        b.iter_batched(
+            || {
+                let entries: Vec<Word> = (0..256).map(|_| generate_word(&mut seed)).collect();
+                assert_eq!(entries.len(), 256);
+                entries
+            },
+            |leaves| {
+                let tree = MerkleTree::new(hint::black_box(leaves)).unwrap();
+                assert_eq!(tree.depth(), 8);
+            },
+            BatchSize::SmallInput,
+        );
+    });
+}
+
+criterion_group! {
+    name = smt_subtree_group;
+    config = Criterion::default()
+        .measurement_time(Duration::from_secs(20))
+        .configure_from_args();
+    targets = balanced_merkle_even, balanced_merkle_rand
+}
+criterion_main!(smt_subtree_group);
+
+// HELPER FUNCTIONS
+// --------------------------------------------------------------------------------------------
+
+fn generate_word(seed: &mut [u8; 32]) -> Word {
+    mem::swap(seed, &mut prng_array(*seed));
+    let nums: [u64; 4] = prng_array(*seed);
+    [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
+}
--- a/benches/smt-subtree.rs
+++ b/benches/smt-subtree.rs
@@ -0,0 +1,142 @@
+use std::{fmt::Debug, hint, mem, time::Duration};
+
+use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
+use miden_crypto::{
+    hash::rpo::RpoDigest,
+    merkle::{build_subtree_for_bench, NodeIndex, SmtLeaf, SubtreeLeaf, SMT_DEPTH},
+    Felt, Word, ONE,
+};
+use rand_utils::prng_array;
+use winter_utils::Randomizable;
+
+const PAIR_COUNTS: [u64; 5] = [1, 64, 128, 192, 256];
+
+fn smt_subtree_even(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+
+    let mut group = c.benchmark_group("subtree8-even");
+
+    for pair_count in PAIR_COUNTS {
+        let bench_id = BenchmarkId::from_parameter(pair_count);
+        group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
+            b.iter_batched(
+                || {
+                    // Setup.
+                    let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
+                        .map(|n| {
+                            // A single depth-8 subtree can have a maximum of 255 leaves.
+                            let leaf_index = ((n as f64 / pair_count as f64) * 255.0) as u64;
+                            let key = RpoDigest::new([
+                                generate_value(&mut seed),
+                                ONE,
+                                Felt::new(n),
+                                Felt::new(leaf_index),
+                            ]);
+                            let value = generate_word(&mut seed);
+                            (key, value)
+                        })
+                        .collect();
+
+                    let mut leaves: Vec<_> = entries
+                        .iter()
+                        .map(|(key, value)| {
+                            let leaf = SmtLeaf::new_single(*key, *value);
+                            let col = NodeIndex::from(leaf.index()).value();
+                            let hash = leaf.hash();
+                            SubtreeLeaf { col, hash }
+                        })
+                        .collect();
+                    leaves.sort();
+                    leaves.dedup_by_key(|leaf| leaf.col);
+                    leaves
+                },
+                |leaves| {
+                    // Benchmarked function.
+                    let (subtree, _) = build_subtree_for_bench(
+                        hint::black_box(leaves),
+                        hint::black_box(SMT_DEPTH),
+                        hint::black_box(SMT_DEPTH),
+                    );
+                    assert!(!subtree.is_empty());
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+}
+
+fn smt_subtree_random(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+
+    let mut group = c.benchmark_group("subtree8-rand");
+
+    for pair_count in PAIR_COUNTS {
+        let bench_id = BenchmarkId::from_parameter(pair_count);
+        group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
+            b.iter_batched(
+                || {
+                    // Setup.
+                    let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
+                        .map(|i| {
+                            let leaf_index: u8 = generate_value(&mut seed);
+                            let key = RpoDigest::new([
+                                ONE,
+                                ONE,
+                                Felt::new(i),
+                                Felt::new(leaf_index as u64),
+                            ]);
+                            let value = generate_word(&mut seed);
+                            (key, value)
+                        })
+                        .collect();
+
+                    let mut leaves: Vec<_> = entries
+                        .iter()
+                        .map(|(key, value)| {
+                            let leaf = SmtLeaf::new_single(*key, *value);
+                            let col = NodeIndex::from(leaf.index()).value();
+                            let hash = leaf.hash();
+                            SubtreeLeaf { col, hash }
+                        })
+                        .collect();
+                    leaves.sort();
+                    leaves
+                },
+                |leaves| {
+                    let (subtree, _) = build_subtree_for_bench(
+                        hint::black_box(leaves),
+                        hint::black_box(SMT_DEPTH),
+                        hint::black_box(SMT_DEPTH),
+                    );
+                    assert!(!subtree.is_empty());
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+}
+
+criterion_group! {
+    name = smt_subtree_group;
+    config = Criterion::default()
+        .measurement_time(Duration::from_secs(40))
+        .sample_size(60)
+        .configure_from_args();
+    targets = smt_subtree_even, smt_subtree_random
+}
+criterion_main!(smt_subtree_group);
+
+// HELPER FUNCTIONS
+// --------------------------------------------------------------------------------------------
+
+fn generate_value<T: Copy + Debug + Randomizable>(seed: &mut [u8; 32]) -> T {
+    mem::swap(seed, &mut prng_array(*seed));
+    let value: [T; 1] = rand_utils::prng_array(*seed);
+    value[0]
+}
+
+fn generate_word(seed: &mut [u8; 32]) -> Word {
+    mem::swap(seed, &mut prng_array(*seed));
+    let nums: [u64; 4] = prng_array(*seed);
+    [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
+}
--- a/benches/smt-with-entries.rs
+++ b/benches/smt-with-entries.rs
@@ -0,0 +1,71 @@
+use std::{fmt::Debug, hint, mem, time::Duration};
+
+use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
+use miden_crypto::{hash::rpo::RpoDigest, merkle::Smt, Felt, Word, ONE};
+use rand_utils::prng_array;
+use winter_utils::Randomizable;
+
+// 2^0, 2^4, 2^8, 2^12, 2^16
+const PAIR_COUNTS: [u64; 6] = [1, 16, 256, 4096, 65536, 1_048_576];
+
+fn smt_with_entries(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+
+    let mut group = c.benchmark_group("smt-with-entries");
+
+    for pair_count in PAIR_COUNTS {
+        let bench_id = BenchmarkId::from_parameter(pair_count);
+        group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
+            b.iter_batched(
+                || {
+                    // Setup.
+                    prepare_entries(pair_count, &mut seed)
+                },
+                |entries| {
+                    // Benchmarked function.
+                    Smt::with_entries(hint::black_box(entries)).unwrap();
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+}
+
+criterion_group! {
+    name = smt_with_entries_group;
+    config = Criterion::default()
+        //.measurement_time(Duration::from_secs(960))
+        .measurement_time(Duration::from_secs(60))
+        .sample_size(10)
+        .configure_from_args();
+    targets = smt_with_entries
+}
+criterion_main!(smt_with_entries_group);
+
+// HELPER FUNCTIONS
+// --------------------------------------------------------------------------------------------
+
+fn prepare_entries(pair_count: u64, seed: &mut [u8; 32]) -> Vec<(RpoDigest, [Felt; 4])> {
+    let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
+        .map(|i| {
+            let count = pair_count as f64;
+            let idx = ((i as f64 / count) * (count)) as u64;
+            let key = RpoDigest::new([generate_value(seed), ONE, Felt::new(i), Felt::new(idx)]);
+            let value = generate_word(seed);
+            (key, value)
+        })
+        .collect();
+    entries
+}
+
+fn generate_value<T: Copy + Debug + Randomizable>(seed: &mut [u8; 32]) -> T {
+    mem::swap(seed, &mut prng_array(*seed));
+    let value: [T; 1] = rand_utils::prng_array(*seed);
+    value[0]
+}
+
+fn generate_word(seed: &mut [u8; 32]) -> Word {
+    mem::swap(seed, &mut prng_array(*seed));
+    let nums: [u64; 4] = prng_array(*seed);
+    [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
+}