feat: implement concurrent Smt construction (#341)

* merkle: add parent() helper function on NodeIndex
* smt: add pairs_to_leaf() to trait
* smt: add sorted_pairs_to_leaves() and test for it
* smt: implement single subtree-8 hashing, w/ benchmarks & tests

This will be composed into depth-8-subtree-based computation of entire
sparse Merkle trees.

* merkle: add a benchmark for constructing 256-balanced trees

This is intended for comparison with the benchmarks from the previous
commit. This benchmark represents the theoretical perfect-efficiency
performance we could possibly (but impractically) get for computing
depth-8 sparse Merkle subtrees.

* smt: test that SparseMerkleTree::build_subtree() is composable

* smt: test that subtree logic can correctly construct an entire tree

This commit ensures that `SparseMerkleTree::build_subtree()` can
correctly compose into building an entire sparse Merkle tree, without
yet getting into potential complications concurrency introduces.

* smt: implement test for basic parallelized subtree computation w/ rayon

Building on the previous commit, this commit implements a test proving
that `SparseMerkleTree::build_subtree()` can be composed into itself not
just concurrently, but in parallel, without issue.

* smt: add from_raw_parts() to trait interface

This commit adds a new required method to the SparseMerkleTree trait,
to allow generic construction from pre-computed parts.

This will be used to add a generic version of `with_entries()` in a
later commit.

* smt: add parallel constructors to Smt and SimpleSmt

What the previous few commits have been leading up to: SparseMerkleTree
now has a function to construct the tree from existing data in parallel.
This is significantly faster than the singlethreaded equivalent.
Benchmarks incoming!

---------

Co-authored-by: krushimir <krushimir@reilabs.co>
Co-authored-by: krushimir <kresimir.grofelnik@reilabs.io>
This commit is contained in:
Qyriad
2024-12-04 11:54:41 -07:00
committed by GitHub
parent 1867f842d3
commit b151773b0d
14 changed files with 1194 additions and 6 deletions

66
benches/merkle.rs Normal file
View File

@@ -0,0 +1,66 @@
//! Benchmark for building a [`miden_crypto::merkle::MerkleTree`]. This is intended to be compared
//! with the results from `benches/smt-subtree.rs`, as building a fully balanced Merkle tree with
//! 256 leaves should indicate the *absolute best* performance we could *possibly* get for building
//! a depth-8 sparse Merkle subtree, though practically speaking building a fully balanced Merkle
//! tree will perform better than the sparse version. At the time of this writing (2024/11/24), this
//! benchmark is about four times more efficient than the equivalent benchmark in
//! `benches/smt-subtree.rs`.
use std::{hint, mem, time::Duration};
use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use miden_crypto::{merkle::MerkleTree, Felt, Word, ONE};
use rand_utils::prng_array;
fn balanced_merkle_even(c: &mut Criterion) {
c.bench_function("balanced-merkle-even", |b| {
b.iter_batched(
|| {
let entries: Vec<Word> =
(0..256).map(|i| [Felt::new(i), ONE, ONE, Felt::new(i)]).collect();
assert_eq!(entries.len(), 256);
entries
},
|leaves| {
let tree = MerkleTree::new(hint::black_box(leaves)).unwrap();
assert_eq!(tree.depth(), 8);
},
BatchSize::SmallInput,
);
});
}
fn balanced_merkle_rand(c: &mut Criterion) {
let mut seed = [0u8; 32];
c.bench_function("balanced-merkle-rand", |b| {
b.iter_batched(
|| {
let entries: Vec<Word> = (0..256).map(|_| generate_word(&mut seed)).collect();
assert_eq!(entries.len(), 256);
entries
},
|leaves| {
let tree = MerkleTree::new(hint::black_box(leaves)).unwrap();
assert_eq!(tree.depth(), 8);
},
BatchSize::SmallInput,
);
});
}
criterion_group! {
name = smt_subtree_group;
config = Criterion::default()
.measurement_time(Duration::from_secs(20))
.configure_from_args();
targets = balanced_merkle_even, balanced_merkle_rand
}
criterion_main!(smt_subtree_group);
// HELPER FUNCTIONS
// --------------------------------------------------------------------------------------------
fn generate_word(seed: &mut [u8; 32]) -> Word {
mem::swap(seed, &mut prng_array(*seed));
let nums: [u64; 4] = prng_array(*seed);
[Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
}

142
benches/smt-subtree.rs Normal file
View File

@@ -0,0 +1,142 @@
use std::{fmt::Debug, hint, mem, time::Duration};
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use miden_crypto::{
hash::rpo::RpoDigest,
merkle::{build_subtree_for_bench, NodeIndex, SmtLeaf, SubtreeLeaf, SMT_DEPTH},
Felt, Word, ONE,
};
use rand_utils::prng_array;
use winter_utils::Randomizable;
const PAIR_COUNTS: [u64; 5] = [1, 64, 128, 192, 256];
fn smt_subtree_even(c: &mut Criterion) {
let mut seed = [0u8; 32];
let mut group = c.benchmark_group("subtree8-even");
for pair_count in PAIR_COUNTS {
let bench_id = BenchmarkId::from_parameter(pair_count);
group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
b.iter_batched(
|| {
// Setup.
let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
.map(|n| {
// A single depth-8 subtree can have a maximum of 255 leaves.
let leaf_index = ((n as f64 / pair_count as f64) * 255.0) as u64;
let key = RpoDigest::new([
generate_value(&mut seed),
ONE,
Felt::new(n),
Felt::new(leaf_index),
]);
let value = generate_word(&mut seed);
(key, value)
})
.collect();
let mut leaves: Vec<_> = entries
.iter()
.map(|(key, value)| {
let leaf = SmtLeaf::new_single(*key, *value);
let col = NodeIndex::from(leaf.index()).value();
let hash = leaf.hash();
SubtreeLeaf { col, hash }
})
.collect();
leaves.sort();
leaves.dedup_by_key(|leaf| leaf.col);
leaves
},
|leaves| {
// Benchmarked function.
let (subtree, _) = build_subtree_for_bench(
hint::black_box(leaves),
hint::black_box(SMT_DEPTH),
hint::black_box(SMT_DEPTH),
);
assert!(!subtree.is_empty());
},
BatchSize::SmallInput,
);
});
}
}
fn smt_subtree_random(c: &mut Criterion) {
let mut seed = [0u8; 32];
let mut group = c.benchmark_group("subtree8-rand");
for pair_count in PAIR_COUNTS {
let bench_id = BenchmarkId::from_parameter(pair_count);
group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
b.iter_batched(
|| {
// Setup.
let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
.map(|i| {
let leaf_index: u8 = generate_value(&mut seed);
let key = RpoDigest::new([
ONE,
ONE,
Felt::new(i),
Felt::new(leaf_index as u64),
]);
let value = generate_word(&mut seed);
(key, value)
})
.collect();
let mut leaves: Vec<_> = entries
.iter()
.map(|(key, value)| {
let leaf = SmtLeaf::new_single(*key, *value);
let col = NodeIndex::from(leaf.index()).value();
let hash = leaf.hash();
SubtreeLeaf { col, hash }
})
.collect();
leaves.sort();
leaves
},
|leaves| {
let (subtree, _) = build_subtree_for_bench(
hint::black_box(leaves),
hint::black_box(SMT_DEPTH),
hint::black_box(SMT_DEPTH),
);
assert!(!subtree.is_empty());
},
BatchSize::SmallInput,
);
});
}
}
criterion_group! {
name = smt_subtree_group;
config = Criterion::default()
.measurement_time(Duration::from_secs(40))
.sample_size(60)
.configure_from_args();
targets = smt_subtree_even, smt_subtree_random
}
criterion_main!(smt_subtree_group);
// HELPER FUNCTIONS
// --------------------------------------------------------------------------------------------
fn generate_value<T: Copy + Debug + Randomizable>(seed: &mut [u8; 32]) -> T {
mem::swap(seed, &mut prng_array(*seed));
let value: [T; 1] = rand_utils::prng_array(*seed);
value[0]
}
fn generate_word(seed: &mut [u8; 32]) -> Word {
mem::swap(seed, &mut prng_array(*seed));
let nums: [u64; 4] = prng_array(*seed);
[Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
}

View File

@@ -0,0 +1,71 @@
use std::{fmt::Debug, hint, mem, time::Duration};
use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
use miden_crypto::{hash::rpo::RpoDigest, merkle::Smt, Felt, Word, ONE};
use rand_utils::prng_array;
use winter_utils::Randomizable;
// 2^0, 2^4, 2^8, 2^12, 2^16
const PAIR_COUNTS: [u64; 6] = [1, 16, 256, 4096, 65536, 1_048_576];
fn smt_with_entries(c: &mut Criterion) {
let mut seed = [0u8; 32];
let mut group = c.benchmark_group("smt-with-entries");
for pair_count in PAIR_COUNTS {
let bench_id = BenchmarkId::from_parameter(pair_count);
group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
b.iter_batched(
|| {
// Setup.
prepare_entries(pair_count, &mut seed)
},
|entries| {
// Benchmarked function.
Smt::with_entries(hint::black_box(entries)).unwrap();
},
BatchSize::SmallInput,
);
});
}
}
criterion_group! {
name = smt_with_entries_group;
config = Criterion::default()
//.measurement_time(Duration::from_secs(960))
.measurement_time(Duration::from_secs(60))
.sample_size(10)
.configure_from_args();
targets = smt_with_entries
}
criterion_main!(smt_with_entries_group);
// HELPER FUNCTIONS
// --------------------------------------------------------------------------------------------
fn prepare_entries(pair_count: u64, seed: &mut [u8; 32]) -> Vec<(RpoDigest, [Felt; 4])> {
let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
.map(|i| {
let count = pair_count as f64;
let idx = ((i as f64 / count) * (count)) as u64;
let key = RpoDigest::new([generate_value(seed), ONE, Felt::new(i), Felt::new(idx)]);
let value = generate_word(seed);
(key, value)
})
.collect();
entries
}
fn generate_value<T: Copy + Debug + Randomizable>(seed: &mut [u8; 32]) -> T {
mem::swap(seed, &mut prng_array(*seed));
let value: [T; 1] = rand_utils::prng_array(*seed);
value[0]
}
fn generate_word(seed: &mut [u8; 32]) -> Word {
mem::swap(seed, &mut prng_array(*seed));
let nums: [u64; 4] = prng_array(*seed);
[Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
}