From 433b467953c15d760166fd4c9a82f2be62dd5481 Mon Sep 17 00:00:00 2001 From: Bobbin Threadbare Date: Thu, 23 Feb 2023 17:06:19 -0800 Subject: [PATCH] feat: optimized hash_elements for blake3 hasher --- benches/README.md | 2 +- src/hash/blake/mod.rs | 26 +++++++++++++++++--------- src/hash/blake/tests.rs | 27 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 10 deletions(-) diff --git a/benches/README.md b/benches/README.md index 9e14f78..385e01e 100644 --- a/benches/README.md +++ b/benches/README.md @@ -28,7 +28,7 @@ The second scenario is that of sequential hashing where we take a sequence of le | Function | BLAKE3 | SHA3 | Poseidon | Rp64_256 | RPO_256 | | ------------------- | -------| ------- | --------- | --------- | ------- | -| Apple M1 Pro | 1.1 us | 1.5 us | 19.4 us | 118 us | 70 us | +| Apple M1 Pro | 1.0 us | 1.5 us | 19.4 us | 118 us | 70 us | | Apple M2 | 1.0 us | 1.5 us | 17.4 us | 103 us | 65 us | | Amazon Graviton 3 | 1.4 us | | | | 114 us | | AMD Ryzen 9 5950X | 0.8 us | 1.7 us | 15.7 us | 120 us | 72 us | diff --git a/src/hash/blake/mod.rs b/src/hash/blake/mod.rs index eb07ad6..a3bcfd0 100644 --- a/src/hash/blake/mod.rs +++ b/src/hash/blake/mod.rs @@ -1,7 +1,5 @@ use super::{Digest, ElementHasher, Felt, FieldElement, Hasher, StarkField}; -use crate::utils::{ - uninit_vector, ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable, -}; +use crate::utils::{ByteReader, ByteWriter, Deserializable, DeserializationError, Serializable}; use core::{ mem::{size_of, transmute, transmute_copy}, ops::Deref, @@ -290,15 +288,25 @@ where let digest = if Felt::IS_CANONICAL { blake3::hash(E::elements_as_bytes(elements)) } else { - let base_elements = E::slice_as_base_elements(elements); - let blen = base_elements.len() << 3; + let mut hasher = blake3::Hasher::new(); + + // BLAKE3 state is 64 bytes - so, we can absorb 64 bytes into the state in a single + // permutation. we move the elements into the hasher via the buffer to give the CPU + // a chance to process multiple element-to-byte conversions in parallel + let mut buf = [0_u8; 64]; + let mut chunk_iter = E::slice_as_base_elements(elements).chunks_exact(8); + for chunk in chunk_iter.by_ref() { + for i in 0..8 { + buf[i * 8..(i + 1) * 8].copy_from_slice(&chunk[i].as_int().to_le_bytes()); + } + hasher.update(&buf); + } - let mut bytes = unsafe { uninit_vector(blen) }; - for (idx, element) in base_elements.iter().enumerate() { - bytes[idx * 8..(idx + 1) * 8].copy_from_slice(&element.as_int().to_le_bytes()); + for element in chunk_iter.remainder() { + hasher.update(&element.as_int().to_le_bytes()); } - blake3::hash(&bytes) + hasher.finalize() }; *shrink_bytes(&digest.into()) } diff --git a/src/hash/blake/tests.rs b/src/hash/blake/tests.rs index 8897611..b03b06a 100644 --- a/src/hash/blake/tests.rs +++ b/src/hash/blake/tests.rs @@ -1,6 +1,22 @@ use super::*; use crate::utils::collections::Vec; use proptest::prelude::*; +use rand_utils::rand_vector; + +#[test] +fn blake3_hash_elements() { + // test multiple of 8 + let elements = rand_vector::(16); + let expected = compute_expected_element_hash(&elements); + let actual: [u8; 32] = hash_elements(&elements); + assert_eq!(&expected, &actual); + + // test not multiple of 8 + let elements = rand_vector::(17); + let expected = compute_expected_element_hash(&elements); + let actual: [u8; 32] = hash_elements(&elements); + assert_eq!(&expected, &actual); +} proptest! { #[test] @@ -18,3 +34,14 @@ proptest! { Blake3_256::hash(vec); } } + +// HELPER FUNCTIONS +// ================================================================================================ + +fn compute_expected_element_hash(elements: &[Felt]) -> blake3::Hash { + let mut bytes = Vec::new(); + for element in elements.iter() { + bytes.extend_from_slice(&element.as_int().to_le_bytes()); + } + blake3::hash(&bytes) +}