diff --git a/Cargo.toml b/Cargo.toml index 9bfc91a..fdc7dd2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,4 +13,8 @@ criterion = "0.5.1" [[bench]] name = "ntt" +harness = false + +[[bench]] +name = "operations" harness = false \ No newline at end of file diff --git a/benches/operations.rs b/benches/operations.rs new file mode 100644 index 0000000..8b20715 --- /dev/null +++ b/benches/operations.rs @@ -0,0 +1,40 @@ +use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; +use math::ring::Ring; +use math::modulus::Operations; + +fn add_vec_unary(c: &mut Criterion) { + fn runner(r: Ring) -> Box { + + let mut p0: math::poly::Poly = r.new_poly(); + let mut p1: math::poly::Poly = r.new_poly(); + for i in 0..p0.n(){ + p0.0[i] = i as u64; + p1.0[i] = i as u64; + } + println!("{}", r.n()); + Box::new(move || { + r.modulus.add_vec_unary_assign::<8>(&p0.0, &mut p1.0); + }) + } + + let mut b: criterion::BenchmarkGroup<'_, criterion::measurement::WallTime> = c.benchmark_group("add_vec_unary"); + for log_n in 11..17 { + + let n: usize = 1< = Ring::::new(n, q_base, q_power); + let runners = [ + ("prime", { + runner(r) + }), + ]; + for (name, mut runner) in runners { + let id = BenchmarkId::new(name, n); + b.bench_with_input(id, &(), |b, _| b.iter(&mut runner)); + } + } +} + +criterion_group!(benches, add_vec_unary); +criterion_main!(benches); diff --git a/src/lib.rs b/src/lib.rs index 89beb36..1306749 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,4 +4,115 @@ pub mod modulus; pub mod dft; pub mod ring; -pub mod poly; \ No newline at end of file +pub mod poly; + +pub mod macros{ + + #[macro_export] + macro_rules! apply_unary { + + ($self:expr, $f:expr, $a:expr, $CHUNK:expr) => { + + match CHUNK{ + 8 => { + + $a.chunks_exact_mut(8).for_each(|a| { + $f(&$self, &mut a[0]); + $f(&$self, &mut a[1]); + $f(&$self, &mut a[2]); + $f(&$self, &mut a[3]); + $f(&$self, &mut a[4]); + $f(&$self, &mut a[5]); + $f(&$self, &mut a[6]); + $f(&$self, &mut a[7]); + }); + + let n: usize = $a.len(); + let m = n - (n&(CHUNK-1)); + $a[m..].iter_mut().for_each(|a| { + $f(&$self, a); + }); + }, + _=>{ + $a.iter_mut().for_each(|a| { + $f(&$self, a); + }); + } + } + }; + } + + #[macro_export] + macro_rules! apply_binary { + + ($self:expr, $f:expr, $a:expr, $b:expr, $CHUNK:expr) => { + + let n: usize = $a.len(); + debug_assert!($b.len() == n, "invalid argument b: b.len() = {} != a.len() = {}", $b.len(), n); + debug_assert!(CHUNK&(CHUNK-1) == 0, "invalid CHUNK const: not a power of two"); + + match CHUNK{ + 8 => { + + izip!($a.chunks_exact(8), $b.chunks_exact_mut(8)).for_each(|(a, b)| { + $f(&$self, &a[0], &mut b[0]); + $f(&$self, &a[1], &mut b[1]); + $f(&$self, &a[2], &mut b[2]); + $f(&$self, &a[3], &mut b[3]); + $f(&$self, &a[4], &mut b[4]); + $f(&$self, &a[5], &mut b[5]); + $f(&$self, &a[6], &mut b[6]); + $f(&$self, &a[7], &mut b[7]); + }); + + let m = n - (n&(CHUNK-1)); + izip!($a[m..].iter(), $b[m..].iter_mut()).for_each(|(a, b)| { + $f(&$self, a, b); + }); + }, + _=>{ + izip!($a.iter(), $b.iter_mut()).for_each(|(a, b)| { + $f(&$self, a, b); + }); + } + } + }; + } + + #[macro_export] + macro_rules! apply_ternary { + + ($self:expr, $f:expr, $a:expr, $b:expr, $c:expr, $CHUNK:expr) => { + + let n: usize = $a.len(); + debug_assert!($b.len() == n, "invalid argument b: b.len() = {} != a.len() = {}", $b.len(), n); + debug_assert!(CHUNK&(CHUNK-1) == 0, "invalid CHUNK const: not a power of two"); + + match CHUNK{ + 8 => { + + izip!($a.chunks_exact(8), $b.chunks_exact(8), $c.chunks_exact_mut(8)).for_each(|(a, b, c)| { + $f(&$self, &a[0], &b[0], &mut c[0]); + $f(&$self, &a[1], &b[1], &mut c[1]); + $f(&$self, &a[2], &b[2], &mut c[2]); + $f(&$self, &a[3], &b[3], &mut c[3]); + $f(&$self, &a[4], &b[4], &mut c[4]); + $f(&$self, &a[5], &b[5], &mut c[5]); + $f(&$self, &a[6], &b[6], &mut c[6]); + $f(&$self, &a[7], &b[7], &mut c[7]); + }); + + let m = n - (n&7); + izip!($a[m..].iter(), $b[m..].iter(), $c[m..].iter_mut()).for_each(|(a, b, c)| { + $f(&$self, a, b, c); + }); + }, + _=>{ + izip!($a.iter(), $b.iter(), $c.iter_mut()).for_each(|(a, b, c)| { + $f(&$self, a, b, c); + }); + } + } + }; + } +} \ No newline at end of file diff --git a/src/modulus.rs b/src/modulus.rs index 2033493..3fc01c0 100644 --- a/src/modulus.rs +++ b/src/modulus.rs @@ -4,6 +4,8 @@ pub mod montgomery; pub mod shoup; pub mod impl_u64; + + pub trait WordOps{ fn log2(self) -> O; fn reverse_bits_msb(self, n:u32) -> O; @@ -71,3 +73,38 @@ impl ReduceOnce for u64{ (*self).min(self.wrapping_sub(q)) } } + + +pub trait Operations{ + // Assigns a + b to c. + fn add_binary_assign(&self, a: &O, b:&O, c: &mut O); + + // Assigns a + b to b. + fn add_unary_assign(&self, a: &O, b: &mut O); + + // Assigns a[i] + b[i] to c[i] + fn add_vec_binary_assign(&self, a: &[O], b:&[O], c: &mut [O]); + + // Assigns a[i] + b[i] to b[i] + fn add_vec_unary_assign(&self, a: &[O], b: &mut [O]); + + // Assigns a - b to c. + fn sub_binary_assign(&self, a: &O, b:&O, c: &mut O); + + // Assigns b - a to b. + fn sub_unary_assign(&self, a: &O, b: &mut O); + + // Assigns a[i] - b[i] to c[i] + fn sub_vec_binary_assign(&self, a: &[O], b:&[O], c: &mut [O]); + + // Assigns a[i] - b[i] to b[i] + fn sub_vec_unary_assign(&self, a: &[O], b: &mut [O]); + + // Assigns -a to a. + fn neg_assign(&self, a:&mut O); + + // Assigns -a[i] to a[i]. + fn neg_vec_assign(&self, a: &mut [O]); +} + + diff --git a/src/modulus/impl_u64/mod.rs b/src/modulus/impl_u64/mod.rs index 61322a4..4394599 100644 --- a/src/modulus/impl_u64/mod.rs +++ b/src/modulus/impl_u64/mod.rs @@ -1,4 +1,5 @@ pub mod prime; pub mod barrett; pub mod montgomery; -pub mod shoup; \ No newline at end of file +pub mod shoup; +pub mod operations; \ No newline at end of file diff --git a/src/modulus/impl_u64/operations.rs b/src/modulus/impl_u64/operations.rs new file mode 100644 index 0000000..05215cd --- /dev/null +++ b/src/modulus/impl_u64/operations.rs @@ -0,0 +1,60 @@ + +use crate::modulus::Operations; +use crate::modulus::prime::Prime; +use crate::modulus::ReduceOnce; +use crate::{apply_unary, apply_binary, apply_ternary}; +use itertools::izip; + +impl Operations for Prime{ + + #[inline(always)] + fn add_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){ + *c = a.wrapping_add(*b).reduce_once(self.q); + } + + #[inline(always)] + fn add_unary_assign(&self, a: &u64, b: &mut u64){ + *b = a.wrapping_add(*b).reduce_once(self.q); + } + + #[inline(always)] + fn add_vec_binary_assign(&self, a: &[u64], b:&[u64], c:&mut [u64]){ + apply_ternary!(self, Self::add_binary_assign, a, b, c, CHUNK); + } + + #[inline(always)] + fn add_vec_unary_assign(&self, a: &[u64], b:&mut [u64]){ + apply_binary!(self, Self::add_unary_assign, a, b, CHUNK); + } + + #[inline(always)] + fn sub_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){ + *c = a.wrapping_add(self.q.wrapping_sub(*b)).reduce_once(self.q); + } + + #[inline(always)] + fn sub_unary_assign(&self, a: &u64, b: &mut u64){ + *b = a.wrapping_add(self.q.wrapping_sub(*b)).reduce_once(self.q); + } + + #[inline(always)] + fn sub_vec_binary_assign(&self, a: &[u64], b:&[u64], c:&mut [u64]){ + apply_ternary!(self, Self::sub_binary_assign, a, b, c, CHUNK); + } + + #[inline(always)] + fn sub_vec_unary_assign(&self, a: &[u64], b:&mut [u64]){ + apply_binary!(self, Self::sub_unary_assign, a, b, CHUNK); + } + + #[inline(always)] + fn neg_assign(&self, a: &mut u64){ + *a = self.q.wrapping_sub(*a); + } + + #[inline(always)] + fn neg_vec_assign(&self, a: &mut [u64]){ + apply_unary!(self, Self::neg_assign, a, CHUNK); + } +} + diff --git a/src/poly/poly.rs b/src/poly/poly.rs index e69de29..8b13789 100644 --- a/src/poly/poly.rs +++ b/src/poly/poly.rs @@ -0,0 +1 @@ + diff --git a/src/ring/impl_u64/automorphism.rs b/src/ring/impl_u64/automorphism.rs index c78b2ce..739cd35 100644 --- a/src/ring/impl_u64/automorphism.rs +++ b/src/ring/impl_u64/automorphism.rs @@ -33,11 +33,12 @@ impl Ring{ let b_vec: &mut _ = &mut b.0; let a_vec: &_ = &a.0; - for i in 0..n{ + a_vec.iter().enumerate().for_each(|(i, ai)|{ let gal_el_i: u64 = i as u64 * gal_el; - let i_out: u64 = gal_el_i & mask; let sign: u64 = (gal_el_i>>log_n) & 1; - b_vec[i_out as usize] = a_vec[i] * (sign^1) | (q - a_vec[i]) * sign - } + let i_out: u64 = gal_el_i & mask; + b_vec[i_out as usize] = ai * (sign^1) | (q - ai) * sign + }); + } } \ No newline at end of file