From 004a3bc7a8384b678231b004dfa53af38625f168 Mon Sep 17 00:00:00 2001
From: Bobbin Threadbare <bobbinth@protonmail.com>
Date: Fri, 5 Jan 2024 16:38:32 -0800
Subject: [PATCH] docs: update changelog and readme

---
 CHANGELOG.md                   |  2 ++
 README.md                      |  8 +++++++-
 benches/README.md              |  3 +++
 benches/hash.rs                |  6 ------
 benches/smt.rs                 |  4 ++--
 benches/store.rs               |  2 +-
 src/merkle/partial_mt/tests.rs |  4 ++--
 src/merkle/simple_smt/tests.rs | 21 ++++++++-------------
 src/merkle/store/tests.rs      | 11 ++++-------
 9 files changed, 29 insertions(+), 32 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index afa852b..4798912 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,7 +4,9 @@
 * Updated Winterfell dependency to v0.7 (#200)
 * Implemented RPX hash function (#201).
 * Added `FeltRng` and `RpoRandomCoin` (#237).
+* Accelerated RPO/RPX hash functions using AVX512 instructions (#234).
 * Added `inner_nodes()` method to `PartialMmr` (#238).
+* Improved `PartialMmr::apply_delta()` (#242).
 
 ## 0.7.1 (2023-10-10)
 
diff --git a/README.md b/README.md
index f43a742..3b04dc1 100644
--- a/README.md
+++ b/README.md
@@ -46,8 +46,14 @@ Both of these features imply the use of [alloc](https://doc.rust-lang.org/alloc/
 
 To compile with `no_std`, disable default features via `--no-default-features` flag.
 
+### AVX2 acceleration
+On platforms with [AVX2](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) support, RPO and RPX hash function can be accelerated by using the vector processing unit. To enable AVX2 acceleration, the code needs to be compiled with the `avx2` target feature enabled. For example:
+```shell
+RUSTFLAGS="-C target-feature=+avx2" cargo build --release
+```
+
 ### SVE acceleration
-On platforms with [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE)) support, RPO hash function can be accelerated by using the vector processing unit. To enable SVE acceleration, the code needs to be compiled with the `sve` feature enabled. This feature has an effect only if the platform exposes `target-feature=sve` flag. On some platforms (e.g., Graviton 3), for this flag to be set, the compilation must be done in "native" mode. For example, to enable SVE acceleration on Graviton 3, we can execute the following:
+On platforms with [SVE](https://en.wikipedia.org/wiki/AArch64#Scalable_Vector_Extension_(SVE)) support, RPO and RPX hash function can be accelerated by using the vector processing unit. To enable SVE acceleration, the code needs to be compiled with the `sve` feature enabled. This feature has an effect only if the platform exposes `target-feature=sve` flag. On some platforms (e.g., Graviton 3), for this flag to be set, the compilation must be done in "native" mode. For example, to enable SVE acceleration on Graviton 3, we can execute the following:
 ```shell
 RUSTFLAGS="-C target-cpu=native" cargo build --release --features sve
 ```
diff --git a/benches/README.md b/benches/README.md
index 1ba848a..3113d62 100644
--- a/benches/README.md
+++ b/benches/README.md
@@ -22,6 +22,7 @@ The second scenario is that of sequential hashing where we take a sequence of le
 | Apple M2 Max        | 71 ns  | 233 ns  |  1.3 µs   |  7.9 µs   | 4.6 µs  | 2.4 µs  |
 | Amazon Graviton 3   | 108 ns |         |           |           | 5.3 µs  | 3.1 µs  |
 | AMD Ryzen 9 5950X   | 64 ns  | 273 ns  |  1.2 µs   |  9.1 µs   | 5.5 µs  |         |
+| AMD EPYC 9R14       | 83 ns  |         |           |           | 4.3 µs  | 2.4 µs  |
 | Intel Core i5-8279U | 68 ns  | 536 ns  |  2.0 µs   |  13.6 µs  | 8.5 µs  | 4.4 µs  |
 | Intel Xeon 8375C    | 67 ns  |         |           |           | 8.2 µs  |         |
 
@@ -33,11 +34,13 @@ The second scenario is that of sequential hashing where we take a sequence of le
 | Apple M2 Max        | 0.9 µs | 1.5 µs  |  17.4 µs  |   103 µs  | 60 µs   | 31 µs   |
 | Amazon Graviton 3   | 1.4 µs |         |           |           | 69 µs   | 41 µs   |
 | AMD Ryzen 9 5950X   | 0.8 µs | 1.7 µs  |  15.7 µs  |   120 µs  | 72 µs   |         |
+| AMD EPYC 9R14       | 0.9 µs |         |           |           | 56 µs   | 32 µs   |
 | Intel Core i5-8279U | 0.9 µs |         |           |           | 107 µs  | 56 µs   |
 | Intel Xeon 8375C    | 0.8 µs |         |           |           | 110 µs  |         |
 
 Notes:
 - On Graviton 3, RPO256 and RPX256 are run with SVE acceleration enabled.
+- On AMD EPYC 9R14, RPO256 and RPX256 are run with AVX2 acceleration enabled.
 
 ### Instructions
 Before you can run the benchmarks, you'll need to make sure you have Rust [installed](https://www.rust-lang.org/tools/install). After that, to run the benchmarks for RPO and BLAKE3, clone the current repository, and from the root directory of the repo run the following:
diff --git a/benches/hash.rs b/benches/hash.rs
index ea5e1e0..4f79eb8 100644
--- a/benches/hash.rs
+++ b/benches/hash.rs
@@ -32,7 +32,6 @@ fn rpo256_2to1(c: &mut Criterion) {
 
 fn rpo256_sequential(c: &mut Criterion) {
     let v: [Felt; 100] = (0..100)
-        .into_iter()
         .map(Felt::new)
         .collect::<Vec<Felt>>()
         .try_into()
@@ -45,7 +44,6 @@ fn rpo256_sequential(c: &mut Criterion) {
         bench.iter_batched(
             || {
                 let v: [Felt; 100] = (0..100)
-                    .into_iter()
                     .map(|_| Felt::new(rand_value()))
                     .collect::<Vec<Felt>>()
                     .try_into()
@@ -80,7 +78,6 @@ fn rpx256_2to1(c: &mut Criterion) {
 
 fn rpx256_sequential(c: &mut Criterion) {
     let v: [Felt; 100] = (0..100)
-        .into_iter()
         .map(Felt::new)
         .collect::<Vec<Felt>>()
         .try_into()
@@ -93,7 +90,6 @@ fn rpx256_sequential(c: &mut Criterion) {
         bench.iter_batched(
             || {
                 let v: [Felt; 100] = (0..100)
-                    .into_iter()
                     .map(|_| Felt::new(rand_value()))
                     .collect::<Vec<Felt>>()
                     .try_into()
@@ -129,7 +125,6 @@ fn blake3_2to1(c: &mut Criterion) {
 
 fn blake3_sequential(c: &mut Criterion) {
     let v: [Felt; 100] = (0..100)
-        .into_iter()
         .map(Felt::new)
         .collect::<Vec<Felt>>()
         .try_into()
@@ -142,7 +137,6 @@ fn blake3_sequential(c: &mut Criterion) {
         bench.iter_batched(
             || {
                 let v: [Felt; 100] = (0..100)
-                    .into_iter()
                     .map(|_| Felt::new(rand_value()))
                     .collect::<Vec<Felt>>()
                     .try_into()
diff --git a/benches/smt.rs b/benches/smt.rs
index 44e3ea5..0f9c371 100644
--- a/benches/smt.rs
+++ b/benches/smt.rs
@@ -27,7 +27,7 @@ fn smt_rpo(c: &mut Criterion) {
 
     // benchmarks
 
-    let mut insert = c.benchmark_group(format!("smt update_leaf"));
+    let mut insert = c.benchmark_group("smt update_leaf".to_string());
 
     for (tree, count) in trees.iter_mut() {
         let depth = tree.depth();
@@ -45,7 +45,7 @@ fn smt_rpo(c: &mut Criterion) {
 
     insert.finish();
 
-    let mut path = c.benchmark_group(format!("smt get_leaf_path"));
+    let mut path = c.benchmark_group("smt get_leaf_path".to_string());
 
     for (tree, count) in trees.iter_mut() {
         let depth = tree.depth();
diff --git a/benches/store.rs b/benches/store.rs
index d6da04b..0abeb98 100644
--- a/benches/store.rs
+++ b/benches/store.rs
@@ -15,7 +15,7 @@ fn random_rpo_digest() -> RpoDigest {
 
 /// Generates a random `Word`.
 fn random_word() -> Word {
-    rand_array::<Felt, 4>().into()
+    rand_array::<Felt, 4>()
 }
 
 /// Generates an index at the specified depth in `0..range`.
diff --git a/src/merkle/partial_mt/tests.rs b/src/merkle/partial_mt/tests.rs
index 4e580d2..da11fc4 100644
--- a/src/merkle/partial_mt/tests.rs
+++ b/src/merkle/partial_mt/tests.rs
@@ -209,7 +209,7 @@ fn get_paths() {
     // Which have leaf nodes 20, 22, 23, 32 and 33. Hence overall we will have 5 paths -- one path
     // for each leaf.
 
-    let leaves = vec![NODE20, NODE22, NODE23, NODE32, NODE33];
+    let leaves = [NODE20, NODE22, NODE23, NODE32, NODE33];
     let expected_paths: Vec<(NodeIndex, ValuePath)> = leaves
         .iter()
         .map(|&leaf| {
@@ -257,7 +257,7 @@ fn leaves() {
     let value32 = mt.get_node(NODE32).unwrap();
     let value33 = mt.get_node(NODE33).unwrap();
 
-    let leaves = vec![(NODE11, value11), (NODE20, value20), (NODE32, value32), (NODE33, value33)];
+    let leaves = [(NODE11, value11), (NODE20, value20), (NODE32, value32), (NODE33, value33)];
 
     let expected_leaves = leaves.iter().copied();
     assert!(expected_leaves.eq(pmt.leaves()));
diff --git a/src/merkle/simple_smt/tests.rs b/src/merkle/simple_smt/tests.rs
index 3d270be..7949646 100644
--- a/src/merkle/simple_smt/tests.rs
+++ b/src/merkle/simple_smt/tests.rs
@@ -35,7 +35,7 @@ const ZERO_VALUES8: [Word; 8] = [int_to_leaf(0); 8];
 fn build_empty_tree() {
     // tree of depth 3
     let smt = SimpleSmt::new(3).unwrap();
-    let mt = MerkleTree::new(ZERO_VALUES8.to_vec()).unwrap();
+    let mt = MerkleTree::new(ZERO_VALUES8).unwrap();
     assert_eq!(mt.root(), smt.root());
 }
 
@@ -74,14 +74,12 @@ fn build_sparse_tree() {
 /// Tests that [`SimpleSmt::with_contiguous_leaves`] works as expected
 #[test]
 fn build_contiguous_tree() {
-    let tree_with_leaves = SimpleSmt::with_leaves(
-        2,
-        [0, 1, 2, 3].into_iter().zip(digests_to_words(&VALUES4).into_iter()),
-    )
-    .unwrap();
+    let tree_with_leaves =
+        SimpleSmt::with_leaves(2, [0, 1, 2, 3].into_iter().zip(digests_to_words(&VALUES4)))
+            .unwrap();
 
     let tree_with_contiguous_leaves =
-        SimpleSmt::with_contiguous_leaves(2, digests_to_words(&VALUES4).into_iter()).unwrap();
+        SimpleSmt::with_contiguous_leaves(2, digests_to_words(&VALUES4)).unwrap();
 
     assert_eq!(tree_with_leaves, tree_with_contiguous_leaves);
 }
@@ -89,8 +87,7 @@ fn build_contiguous_tree() {
 #[test]
 fn test_depth2_tree() {
     let tree =
-        SimpleSmt::with_leaves(2, KEYS4.into_iter().zip(digests_to_words(&VALUES4).into_iter()))
-            .unwrap();
+        SimpleSmt::with_leaves(2, KEYS4.into_iter().zip(digests_to_words(&VALUES4))).unwrap();
 
     // check internal structure
     let (root, node2, node3) = compute_internal_nodes();
@@ -118,8 +115,7 @@ fn test_depth2_tree() {
 #[test]
 fn test_inner_node_iterator() -> Result<(), MerkleError> {
     let tree =
-        SimpleSmt::with_leaves(2, KEYS4.into_iter().zip(digests_to_words(&VALUES4).into_iter()))
-            .unwrap();
+        SimpleSmt::with_leaves(2, KEYS4.into_iter().zip(digests_to_words(&VALUES4))).unwrap();
 
     // check depth 2
     assert_eq!(VALUES4[0], tree.get_node(NodeIndex::make(2, 0)).unwrap());
@@ -150,8 +146,7 @@ fn test_inner_node_iterator() -> Result<(), MerkleError> {
 #[test]
 fn update_leaf() {
     let mut tree =
-        SimpleSmt::with_leaves(3, KEYS8.into_iter().zip(digests_to_words(&VALUES8).into_iter()))
-            .unwrap();
+        SimpleSmt::with_leaves(3, KEYS8.into_iter().zip(digests_to_words(&VALUES8))).unwrap();
 
     // update one value
     let key = 3;
diff --git a/src/merkle/store/tests.rs b/src/merkle/store/tests.rs
index 8553e25..004e020 100644
--- a/src/merkle/store/tests.rs
+++ b/src/merkle/store/tests.rs
@@ -210,7 +210,7 @@ fn test_get_invalid_node() {
 fn test_add_sparse_merkle_tree_one_level() -> Result<(), MerkleError> {
     let keys2: [u64; 2] = [0, 1];
     let leaves2: [Word; 2] = [int_to_leaf(1), int_to_leaf(2)];
-    let smt = SimpleSmt::with_leaves(1, keys2.into_iter().zip(leaves2.into_iter())).unwrap();
+    let smt = SimpleSmt::with_leaves(1, keys2.into_iter().zip(leaves2)).unwrap();
     let store = MerkleStore::from(&smt);
 
     let idx = NodeIndex::make(1, 0);
@@ -228,7 +228,7 @@ fn test_add_sparse_merkle_tree_one_level() -> Result<(), MerkleError> {
 fn test_sparse_merkle_tree() -> Result<(), MerkleError> {
     let smt = SimpleSmt::with_leaves(
         SimpleSmt::MAX_DEPTH,
-        KEYS4.into_iter().zip(digests_to_words(&VALUES4).into_iter()),
+        KEYS4.into_iter().zip(digests_to_words(&VALUES4)),
     )
     .unwrap();
 
@@ -553,11 +553,8 @@ fn test_constructors() -> Result<(), MerkleError> {
     }
 
     let depth = 32;
-    let smt = SimpleSmt::with_leaves(
-        depth,
-        KEYS4.into_iter().zip(digests_to_words(&VALUES4).into_iter()),
-    )
-    .unwrap();
+    let smt =
+        SimpleSmt::with_leaves(depth, KEYS4.into_iter().zip(digests_to_words(&VALUES4))).unwrap();
     let store = MerkleStore::from(&smt);
     let depth = smt.depth();