From 8d84727fae4fe7f0b79ab60ed6891aa5c1dbe61c Mon Sep 17 00:00:00 2001 From: Jean-Philippe Bossuat Date: Tue, 31 Dec 2024 15:30:57 +0100 Subject: [PATCH] wip --- benches/ntt.rs | 24 ++-- benches/operations.rs | 50 +++++++-- examples/main.rs | 4 +- src/dft/ntt.rs | 174 ++++++++++++++++------------- src/modulus.rs | 88 +++++++-------- src/modulus/barrett.rs | 17 +++ src/modulus/impl_u64/barrett.rs | 45 +++++++- src/modulus/impl_u64/mod.rs | 31 ++++- src/modulus/impl_u64/montgomery.rs | 49 +++----- src/modulus/impl_u64/operations.rs | 129 +++++++++++++-------- src/modulus/impl_u64/prime.rs | 16 +-- src/modulus/impl_u64/shoup.rs | 60 ---------- src/modulus/montgomery.rs | 30 +---- src/modulus/prime.rs | 6 +- src/modulus/shoup.rs | 22 ---- src/ring/impl_u64/automorphism.rs | 3 +- src/ring/impl_u64/ring.rs | 37 ++++-- 17 files changed, 422 insertions(+), 363 deletions(-) delete mode 100644 src/modulus/impl_u64/shoup.rs delete mode 100644 src/modulus/shoup.rs diff --git a/benches/ntt.rs b/benches/ntt.rs index 5cc1a65..c21328c 100644 --- a/benches/ntt.rs +++ b/benches/ntt.rs @@ -3,25 +3,25 @@ use math::{modulus::prime::Prime,dft::ntt::Table}; use math::dft::DFT; fn forward_inplace(c: &mut Criterion) { - fn runner + 'static>(prime_instance: Prime, nth_root: u64) -> Box { + fn runner(prime_instance: Prime, nth_root: u64) -> Box { let ntt_table: Table = Table::::new(prime_instance, nth_root); let mut a: Vec = vec![0; (nth_root >> 1) as usize]; for i in 0..a.len(){ a[i] = i as u64; } Box::new(move || { - ntt_table.forward_inplace(&mut a) + ntt_table.forward_inplace::(&mut a) }) } let mut b: criterion::BenchmarkGroup<'_, criterion::measurement::WallTime> = c.benchmark_group("forward_inplace"); for log_nth_root in 11..18 { - let mut prime_instance: Prime = Prime::::new(0x1fffffffffe00001, 1); + let prime_instance: Prime = Prime::::new(0x1fffffffffe00001, 1); let runners = [ ("prime", { - runner::>(prime_instance, 1< + 'static>(prime_instance: Prime, nth_root: u64) -> Box { + fn runner(prime_instance: Prime, nth_root: u64) -> Box { let ntt_table: Table = Table::::new(prime_instance, nth_root); let mut a: Vec = vec![0; (nth_root >> 1) as usize]; for i in 0..a.len(){ @@ -50,7 +50,7 @@ fn forward_inplace_lazy(c: &mut Criterion) { let runners = [ ("prime", { - runner::>(prime_instance, 1< + 'static>(prime_instance: Prime, nth_root: u64) -> Box { + fn runner(prime_instance: Prime, nth_root: u64) -> Box { let ntt_table: Table = Table::::new(prime_instance, nth_root); let mut a: Vec = vec![0; (nth_root >> 1) as usize]; for i in 0..a.len(){ a[i] = i as u64; } Box::new(move || { - ntt_table.backward_inplace(&mut a) + ntt_table.backward_inplace::(&mut a) }) } @@ -79,7 +79,7 @@ fn backward_inplace(c: &mut Criterion) { let runners = [ ("prime", { - runner::>(prime_instance, 1< + 'static>(prime_instance: Prime, nth_root: u64) -> Box { + fn runner(prime_instance: Prime, nth_root: u64) -> Box { let ntt_table: Table = Table::::new(prime_instance, nth_root); let mut a: Vec = vec![0; (nth_root >> 1) as usize]; for i in 0..a.len(){ a[i] = i as u64; } Box::new(move || { - ntt_table.backward_inplace_lazy(&mut a) + ntt_table.backward_inplace::(&mut a) }) } @@ -108,7 +108,7 @@ fn backward_inplace_lazy(c: &mut Criterion) { let runners = [ ("prime", { - runner::>(prime_instance, 1<) -> Box { let mut p0: math::poly::Poly = r.new_poly(); @@ -16,7 +16,7 @@ fn add_vec_unary(c: &mut Criterion) { p1.0[i] = i as u64; } Box::new(move || { - r.modulus.add_vec_unary_assign::(&p0.0, &mut p1.0); + r.modulus.vec_add_unary_assign::(&p0.0, &mut p1.0); }) } @@ -39,17 +39,17 @@ fn add_vec_unary(c: &mut Criterion) { } } -fn mul_vec_montgomery_external_unary_assign(c: &mut Criterion) { +fn vec_mul_montgomery_external_unary_assign(c: &mut Criterion) { fn runner(r: Ring) -> Box { - let mut p0: math::poly::Poly> = r.new_poly_montgomery(); + let mut p0: math::poly::Poly> = r.new_poly(); let mut p1: math::poly::Poly = r.new_poly(); for i in 0..p0.n(){ p0.0[i] = r.modulus.montgomery.prepare::(i as u64); p1.0[i] = i as u64; } Box::new(move || { - r.modulus.mul_vec_montgomery_external_unary_assign::(&p0.0, &mut p1.0); + r.modulus.vec_mul_montgomery_external_unary_assign::(&p0.0, &mut p1.0); }) } @@ -72,5 +72,39 @@ fn mul_vec_montgomery_external_unary_assign(c: &mut Criterion) { } } -criterion_group!(benches, add_vec_unary, mul_vec_montgomery_external_unary_assign); +fn vec_mul_montgomery_external_binary_assign(c: &mut Criterion) { + fn runner(r: Ring) -> Box { + + let mut p0: math::poly::Poly> = r.new_poly(); + let mut p1: math::poly::Poly = r.new_poly(); + let mut p2: math::poly::Poly = r.new_poly(); + for i in 0..p0.n(){ + p0.0[i] = r.modulus.montgomery.prepare::(i as u64); + p1.0[i] = i as u64; + } + Box::new(move || { + r.modulus.vec_mul_montgomery_external_binary_assign::(&p0.0, & p1.0, &mut p2.0); + }) + } + + let mut b: criterion::BenchmarkGroup<'_, criterion::measurement::WallTime> = c.benchmark_group("mul_vec_montgomery_external_binary_assign"); + for log_n in 11..17 { + + let n: usize = 1< = Ring::::new(n, q_base, q_power); + let runners = [ + ("prime", { + runner(r) + }), + ]; + for (name, mut runner) in runners { + let id = BenchmarkId::new(name, n); + b.bench_with_input(id, &(), |b, _| b.iter(&mut runner)); + } + } +} + +criterion_group!(benches, vec_add_unary, vec_mul_montgomery_external_unary_assign, vec_mul_montgomery_external_binary_assign); criterion_main!(benches); diff --git a/examples/main.rs b/examples/main.rs index 355d0cb..dd68ecb 100644 --- a/examples/main.rs +++ b/examples/main.rs @@ -28,11 +28,11 @@ fn main() { println!("{:?}", a); - ntt_table.forward_inplace(&mut a); + ntt_table.forward_inplace::(&mut a); println!("{:?}", a); - ntt_table.backward_inplace(&mut a); + ntt_table.backward_inplace::(&mut a); println!("{:?}", a); diff --git a/src/dft/ntt.rs b/src/dft/ntt.rs index 3b91024..730f84b 100644 --- a/src/dft/ntt.rs +++ b/src/dft/ntt.rs @@ -1,5 +1,5 @@ use crate::modulus::montgomery::Montgomery; -use crate::modulus::shoup::Shoup; +use crate::modulus::barrett::Barrett; use crate::modulus::prime::Prime; use crate::modulus::ReduceOnce; use crate::modulus::WordOps; @@ -9,8 +9,9 @@ use itertools::izip; pub struct Table{ prime:Prime, - psi_forward_rev:Vec>, - psi_backward_rev: Vec>, + psi: O, + psi_forward_rev:Vec>, + psi_backward_rev: Vec>, q:O, two_q:O, four_q:O, @@ -26,11 +27,11 @@ impl Table< u64> { let psi_mont: Montgomery = prime.montgomery.prepare::(psi); let psi_inv_mont: Montgomery = prime.montgomery.pow(psi_mont, prime.phi-1); - let mut psi_forward_rev: Vec> = vec![Shoup(0, 0); (nth_root >> 1) as usize]; - let mut psi_backward_rev: Vec> = vec![Shoup(0, 0); (nth_root >> 1) as usize]; + let mut psi_forward_rev: Vec> = vec![Barrett(0, 0); (nth_root >> 1) as usize]; + let mut psi_backward_rev: Vec> = vec![Barrett(0, 0); (nth_root >> 1) as usize]; - psi_forward_rev[0] = prime.shoup.prepare(1); - psi_backward_rev[0] = prime.shoup.prepare(1); + psi_forward_rev[0] = prime.barrett.prepare(1); + psi_backward_rev[0] = prime.barrett.prepare(1); let log_nth_root_half: u32 = (nth_root>>1).log2() as _; @@ -44,14 +45,15 @@ impl Table< u64> { prime.montgomery.mul_external_assign::(psi_mont, &mut powers_forward); prime.montgomery.mul_external_assign::(psi_inv_mont, &mut powers_backward); - psi_forward_rev[i_rev] = prime.shoup.prepare(powers_forward); - psi_backward_rev[i_rev] = prime.shoup.prepare(powers_backward); + psi_forward_rev[i_rev] = prime.barrett.prepare(powers_forward); + psi_backward_rev[i_rev] = prime.barrett.prepare(powers_backward); } let q: u64 = prime.q(); Self{ prime: prime, + psi:psi, psi_forward_rev: psi_forward_rev, psi_backward_rev: psi_backward_rev, q:q, @@ -59,65 +61,60 @@ impl Table< u64> { four_q:q<<2, } } - - // Returns n^-1 mod q in Montgomery. - fn inv(&self, n:u64) -> Montgomery{ - self.prime.montgomery.pow(self.prime.montgomery.prepare::(n), self.prime.phi-1) - } } impl DFT for Table{ + fn forward_inplace(&self, a: &mut [u64]){ - self.forward_inplace(a) + self.forward_inplace::(a) } fn forward_inplace_lazy(&self, a: &mut [u64]){ - self.forward_inplace_lazy(a) + self.forward_inplace::(a) } fn backward_inplace(&self, a: &mut [u64]){ - self.backward_inplace(a) + self.backward_inplace::(a) } fn backward_inplace_lazy(&self, a: &mut [u64]){ - self.backward_inplace_lazy(a) + self.backward_inplace::(a) } } impl Table{ - pub fn forward_inplace_lazy(&self, a: &mut [u64]){ - self.forward_inplace_core::(a); + pub fn forward_inplace(&self, a: &mut [u64]){ + self.forward_inplace_core::(a); } - pub fn forward_inplace(&self, a: &mut [u64]){ - self.forward_inplace_core::(a); - } - - pub fn forward_inplace_core(&self, a: &mut [u64]) { + pub fn forward_inplace_core(&self, a: &mut [u64]) { let n: usize = a.len(); assert!(n & n-1 == 0, "invalid x.len()= {} must be a power of two", n); let log_n: u32 = usize::BITS - ((n as usize)-1).leading_zeros(); - for layer in 0..log_n { + let start: u32 = SKIPSTART as u32; + let end: u32 = log_n - (SKIPEND as u32); + + for layer in start..end { let (m, size) = (1 << layer, 1 << (log_n - layer - 1)); let t: usize = 2*size; if layer == log_n - 1 { if LAZY{ izip!(a.chunks_exact_mut(t), &self.psi_forward_rev[m..]).for_each(|(a, psi)| { let (a, b) = a.split_at_mut(size); - self.dit::(&mut a[0], &mut b[0], *psi); + self.dit_inplace::(&mut a[0], &mut b[0], *psi); debug_assert!(a[0] < self.two_q, "forward_inplace_core:: output {} > {} (2q-1)", a[0], self.two_q-1); debug_assert!(b[0] < self.two_q, "forward_inplace_core:: output {} > {} (2q-1)", b[0], self.two_q-1); }); }else{ izip!(a.chunks_exact_mut(t), &self.psi_forward_rev[m..]).for_each(|(a, psi)| { let (a, b) = a.split_at_mut(size); - self.dit::(&mut a[0], &mut b[0], *psi); - self.prime.shoup.reduce_assign(&mut a[0]); - self.prime.shoup.reduce_assign(&mut b[0]); + self.dit_inplace::(&mut a[0], &mut b[0], *psi); + self.prime.barrett.reduce_assign(&mut a[0]); + self.prime.barrett.reduce_assign(&mut b[0]); debug_assert!(a[0] < self.q, "forward_inplace_core:: output {} > {} (q-1)", a[0], self.q-1); debug_assert!(b[0] < self.q, "forward_inplace_core:: output {} > {} (q-1)", b[0], self.q-1); }); @@ -127,31 +124,31 @@ impl Table{ izip!(a.chunks_exact_mut(t), &self.psi_forward_rev[m..]).for_each(|(a, psi)| { let (a, b) = a.split_at_mut(size); izip!(a.chunks_exact_mut(8), b.chunks_exact_mut(8)).for_each(|(a, b)| { - self.dit::(&mut a[0], &mut b[0], *psi); - self.dit::(&mut a[1], &mut b[1], *psi); - self.dit::(&mut a[2], &mut b[2], *psi); - self.dit::(&mut a[3], &mut b[3], *psi); - self.dit::(&mut a[4], &mut b[4], *psi); - self.dit::(&mut a[5], &mut b[5], *psi); - self.dit::(&mut a[6], &mut b[6], *psi); - self.dit::(&mut a[7], &mut b[7], *psi); + self.dit_inplace::(&mut a[0], &mut b[0], *psi); + self.dit_inplace::(&mut a[1], &mut b[1], *psi); + self.dit_inplace::(&mut a[2], &mut b[2], *psi); + self.dit_inplace::(&mut a[3], &mut b[3], *psi); + self.dit_inplace::(&mut a[4], &mut b[4], *psi); + self.dit_inplace::(&mut a[5], &mut b[5], *psi); + self.dit_inplace::(&mut a[6], &mut b[6], *psi); + self.dit_inplace::(&mut a[7], &mut b[7], *psi); }); }); }else{ izip!(a.chunks_exact_mut(t), &self.psi_forward_rev[m..]).for_each(|(a, psi)| { let (a, b) = a.split_at_mut(size); - izip!(a, b).for_each(|(a, b)| self.dit::(a, b, *psi)); + izip!(a, b).for_each(|(a, b)| self.dit_inplace::(a, b, *psi)); }); } } } #[inline(always)] - fn dit(&self, a: &mut u64, b: &mut u64, t: Shoup) { + fn dit_inplace(&self, a: &mut u64, b: &mut u64, t: Barrett) { debug_assert!(*a < self.four_q, "a:{} q:{}", a, self.four_q); debug_assert!(*b < self.four_q, "b:{} q:{}", b, self.four_q); a.reduce_once_assign(self.two_q); - let bt: u64 = self.prime.shoup.mul_external_lazy(t, *b); + let bt: u64 = self.prime.barrett.mul_external_lazy(t, *b); *b = a.wrapping_add(self.two_q-bt); *a = a.wrapping_add(bt); if !LAZY { @@ -160,39 +157,38 @@ impl Table{ } } - pub fn backward_inplace_lazy(&self, a: &mut [u64]){ - self.backward_inplace_core::(a); + pub fn backward_inplace(&self, a: &mut [u64]){ + self.backward_inplace_core::(a); } - pub fn backward_inplace(&self, a: &mut [u64]){ - self.backward_inplace_core::(a); - } - - pub fn backward_inplace_core(&self, a: &mut [u64]) { + pub fn backward_inplace_core(&self, a: &mut [u64]) { let n: usize = a.len(); assert!(n & n-1 == 0, "invalid x.len()= {} must be a power of two", n); let log_n = usize::BITS - ((n as usize)-1).leading_zeros(); - for layer in (0..log_n).rev() { + let start: u32 = SKIPEND as u32; + let end: u32 = log_n - (SKIPSTART as u32); + + for layer in (start..end).rev() { let (m, size) = (1 << layer, 1 << (log_n - layer - 1)); let t: usize = 2*size; if layer == 0 { - let n_inv: Shoup = self.prime.shoup.prepare(self.prime.inv(n as u64)); - let psi: Shoup = self.prime.shoup.prepare(self.prime.shoup.mul_external(n_inv, self.psi_backward_rev[1].0)); + let n_inv: Barrett = self.prime.barrett.prepare(self.prime.inv(n as u64)); + let psi: Barrett = self.prime.barrett.prepare(self.prime.barrett.mul_external(n_inv, self.psi_backward_rev[1].0)); izip!(a.chunks_exact_mut(2 * size)).for_each( |a| { let (a, b) = a.split_at_mut(size); izip!(a.chunks_exact_mut(8), b.chunks_exact_mut(8)).for_each(|(a, b)| { - self.dif_last::(&mut a[0], &mut b[0], psi, n_inv); - self.dif_last::(&mut a[1], &mut b[1], psi, n_inv); - self.dif_last::(&mut a[2], &mut b[2], psi, n_inv); - self.dif_last::(&mut a[3], &mut b[3], psi, n_inv); - self.dif_last::(&mut a[4], &mut b[4], psi, n_inv); - self.dif_last::(&mut a[5], &mut b[5], psi, n_inv); - self.dif_last::(&mut a[6], &mut b[6], psi, n_inv); - self.dif_last::(&mut a[7], &mut b[7], psi, n_inv); + self.dif_last_inplace::(&mut a[0], &mut b[0], psi, n_inv); + self.dif_last_inplace::(&mut a[1], &mut b[1], psi, n_inv); + self.dif_last_inplace::(&mut a[2], &mut b[2], psi, n_inv); + self.dif_last_inplace::(&mut a[3], &mut b[3], psi, n_inv); + self.dif_last_inplace::(&mut a[4], &mut b[4], psi, n_inv); + self.dif_last_inplace::(&mut a[5], &mut b[5], psi, n_inv); + self.dif_last_inplace::(&mut a[6], &mut b[6], psi, n_inv); + self.dif_last_inplace::(&mut a[7], &mut b[7], psi, n_inv); }); }, ); @@ -202,14 +198,14 @@ impl Table{ |(a, psi)| { let (a, b) = a.split_at_mut(size); izip!(a.chunks_exact_mut(8), b.chunks_exact_mut(8)).for_each(|(a, b)| { - self.dif::(&mut a[0], &mut b[0], *psi); - self.dif::(&mut a[1], &mut b[1], *psi); - self.dif::(&mut a[2], &mut b[2], *psi); - self.dif::(&mut a[3], &mut b[3], *psi); - self.dif::(&mut a[4], &mut b[4], *psi); - self.dif::(&mut a[5], &mut b[5], *psi); - self.dif::(&mut a[6], &mut b[6], *psi); - self.dif::(&mut a[7], &mut b[7], *psi); + self.dif_inplace::(&mut a[0], &mut b[0], *psi); + self.dif_inplace::(&mut a[1], &mut b[1], *psi); + self.dif_inplace::(&mut a[2], &mut b[2], *psi); + self.dif_inplace::(&mut a[3], &mut b[3], *psi); + self.dif_inplace::(&mut a[4], &mut b[4], *psi); + self.dif_inplace::(&mut a[5], &mut b[5], *psi); + self.dif_inplace::(&mut a[6], &mut b[6], *psi); + self.dif_inplace::(&mut a[7], &mut b[7], *psi); }); }, ); @@ -217,7 +213,7 @@ impl Table{ izip!(a.chunks_exact_mut(2 * size), &self.psi_backward_rev[m..]).for_each( |(a, psi)| { let (a, b) = a.split_at_mut(size); - izip!(a, b).for_each(|(a, b)| self.dif::(a, b, *psi)); + izip!(a, b).for_each(|(a, b)| self.dif_inplace::(a, b, *psi)); }, ); } @@ -225,10 +221,10 @@ impl Table{ } #[inline(always)] - fn dif(&self, a: &mut u64, b: &mut u64, t: Shoup) { - debug_assert!(*a < self.two_q); - debug_assert!(*b < self.two_q); - let d: u64 = self.prime.shoup.mul_external_lazy(t, *a + self.two_q - *b); + fn dif_inplace(&self, a: &mut u64, b: &mut u64, t: Barrett) { + debug_assert!(*a < self.two_q, "a:{} q:{}", a, self.four_q); + debug_assert!(*b < self.two_q, "b:{} q:{}", b, self.four_q); + let d: u64 = self.prime.barrett.mul_external_lazy(t, *a + self.two_q - *b); *a = a.wrapping_add(*b); a.reduce_once_assign(self.two_q); *b = d; @@ -238,17 +234,41 @@ impl Table{ } } - fn dif_last(&self, a: &mut u64, b: &mut u64, psi: Shoup, n_inv: Shoup){ + fn dif_last_inplace(&self, a: &mut u64, b: &mut u64, psi: Barrett, n_inv: Barrett){ debug_assert!(*a < self.two_q); debug_assert!(*b < self.two_q); if LAZY{ - let d: u64 = self.prime.shoup.mul_external_lazy(psi, *a + self.two_q - *b); - *a = self.prime.shoup.mul_external_lazy(n_inv, *a + *b); + let d: u64 = self.prime.barrett.mul_external_lazy(psi, *a + self.two_q - *b); + *a = self.prime.barrett.mul_external_lazy(n_inv, *a + *b); *b = d; }else{ - let d: u64 = self.prime.shoup.mul_external(psi, *a + self.two_q - *b); - *a = self.prime.shoup.mul_external(n_inv, *a + *b); + let d: u64 = self.prime.barrett.mul_external(psi, *a + self.two_q - *b); + *a = self.prime.barrett.mul_external(n_inv, *a + *b); *b = d; } } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_ntt() { + let q_base: u64 = 0x800000000004001; + let q_power: usize = 1; + let prime_instance: Prime = Prime::::new(q_base, q_power); + let n: u64 = 32; + let two_nth_root: u64 = n<<1; + let ntt_table: Table = Table::::new(prime_instance, two_nth_root); + let mut a: Vec = vec![0; n as usize]; + for i in 0..a.len(){ + a[i] = i as u64; + } + + let b: Vec = a.clone(); + ntt_table.forward_inplace::(&mut a); + ntt_table.backward_inplace::(&mut a); + assert!(a == b); + } +} \ No newline at end of file diff --git a/src/modulus.rs b/src/modulus.rs index 3893656..3f37d0c 100644 --- a/src/modulus.rs +++ b/src/modulus.rs @@ -1,7 +1,6 @@ pub mod prime; pub mod barrett; pub mod montgomery; -pub mod shoup; pub mod impl_u64; pub type REDUCEMOD = u8; @@ -55,75 +54,64 @@ pub trait ReduceOnce{ fn reduce_once(&self, q:O) -> O; } -impl ReduceOnce for u64{ - #[inline(always)] - fn reduce_once_constant_time_assign(&mut self, q: u64){ - debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); - *self -= (q.wrapping_sub(*self)>>63)*q; - } - - #[inline(always)] - fn reduce_once_constant_time(&self, q:u64) -> u64{ - debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); - self - (q.wrapping_sub(*self)>>63)*q - } +pub trait WordOperations{ - #[inline(always)] - fn reduce_once_assign(&mut self, q: u64){ - debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); - *self = *self.min(&mut self.wrapping_sub(q)) - } + // Applies a parameterized modular reduction. + fn word_reduce_assign(&self, x: &mut O); - #[inline(always)] - fn reduce_once(&self, q:u64) -> u64{ - debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); - *self.min(&mut self.wrapping_sub(q)) - } -} - - -pub trait Operations{ // Assigns a + b to c. - fn add_binary_assign(&self, a: &O, b:&O, c: &mut O); + fn word_add_binary_assign(&self, a: &O, b:&O, c: &mut O); // Assigns a + b to b. - fn add_unary_assign(&self, a: &O, b: &mut O); - - // Assigns a[i] + b[i] to c[i] - fn add_vec_binary_assign(&self, a: &[O], b:&[O], c: &mut [O]); - - // Assigns a[i] + b[i] to b[i] - fn add_vec_unary_assign(&self, a: &[O], b: &mut [O]); + fn word_add_unary_assign(&self, a: &O, b: &mut O); // Assigns a - b to c. - fn sub_binary_assign(&self, a: &O, b:&O, c: &mut O); + fn word_sub_binary_assign(&self, a: &O, b:&O, c: &mut O); // Assigns b - a to b. - fn sub_unary_assign(&self, a: &O, b: &mut O); - - // Assigns a[i] - b[i] to c[i] - fn sub_vec_binary_assign(&self, a: &[O], b:&[O], c: &mut [O]); - - // Assigns a[i] - b[i] to b[i] - fn sub_vec_unary_assign(&self, a: &[O], b: &mut [O]); + fn word_sub_unary_assign(&self, a: &O, b: &mut O); // Assigns -a to a. - fn neg_assign(&self, a:&mut O); + fn word_neg_assign(&self, a:&mut O); - // Assigns -a[i] to a[i]. - fn neg_vec_assign(&self, a: &mut [O]); + // Assigns a * 2^64 to b. + fn word_prepare_montgomery_assign(&self, a: &O, b: &mut montgomery::Montgomery); // Assigns a * b to c. - fn mul_montgomery_external_binary_assign(&self, a:&montgomery::Montgomery, b:&O, c: &mut O); + fn word_mul_montgomery_external_binary_assign(&self, a:&montgomery::Montgomery, b:&O, c: &mut O); // Assigns a * b to b. - fn mul_montgomery_external_unary_assign(&self, a:&montgomery::Montgomery, b:&mut O); + fn word_mul_montgomery_external_unary_assign(&self, a:&montgomery::Montgomery, b:&mut O); +} + +pub trait VecOperations{ + + // Applies a parameterized modular reduction. + fn vec_reduce_assign(&self, x: &mut [O]); + + // Assigns a[i] + b[i] to c[i] + fn vec_add_binary_assign(&self, a: &[O], b:&[O], c: &mut [O]); + + // Assigns a[i] + b[i] to b[i] + fn vec_add_unary_assign(&self, a: &[O], b: &mut [O]); + + // Assigns a[i] - b[i] to c[i] + fn vec_sub_binary_assign(&self, a: &[O], b:&[O], c: &mut [O]); + + // Assigns a[i] - b[i] to b[i] + fn vec_sub_unary_assign(&self, a: &[O], b: &mut [O]); + + // Assigns -a[i] to a[i]. + fn vec_neg_assign(&self, a: &mut [O]); + + // Assigns a * 2^64 to b. + fn vec_prepare_montgomery_assign(&self, a: &[O], b: &mut [montgomery::Montgomery]); // Assigns a[i] * b[i] to c[i]. - fn mul_vec_montgomery_external_binary_assign(&self, a:&[montgomery::Montgomery], b:&[O], c: &mut [O]); + fn vec_mul_montgomery_external_binary_assign(&self, a:&[montgomery::Montgomery], b:&[O], c: &mut [O]); // Assigns a[i] * b[i] to b[i]. - fn mul_vec_montgomery_external_unary_assign(&self, a:&[montgomery::Montgomery], b:&mut [O]); + fn vec_mul_montgomery_external_unary_assign(&self, a:&[montgomery::Montgomery], b:&mut [O]); } diff --git a/src/modulus/barrett.rs b/src/modulus/barrett.rs index 9c63c49..0499fbb 100644 --- a/src/modulus/barrett.rs +++ b/src/modulus/barrett.rs @@ -1,8 +1,25 @@ +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub struct Barrett(pub O, pub O); + +impl Barrett { + + #[inline(always)] + pub fn value(&self) -> &O { + &self.0 + } + + #[inline(always)] + pub fn quotient(&self) -> &O { + &self.1 + } +} + #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct BarrettPrecomp{ pub q: O, pub lo:O, pub hi:O, + pub one: Barrett, } impl BarrettPrecomp{ diff --git a/src/modulus/impl_u64/barrett.rs b/src/modulus/impl_u64/barrett.rs index 976cea3..0641009 100644 --- a/src/modulus/impl_u64/barrett.rs +++ b/src/modulus/impl_u64/barrett.rs @@ -1,15 +1,30 @@ -use crate::modulus::barrett::BarrettPrecomp; +use crate::modulus::barrett::{Barrett, BarrettPrecomp}; use crate::modulus::ReduceOnce; use num_bigint::BigUint; use num_traits::cast::ToPrimitive; impl BarrettPrecomp{ + pub fn new(q: u64) -> BarrettPrecomp { let big_r: BigUint = (BigUint::from(1 as usize)<<((u64::BITS<<1) as usize)) / BigUint::from(q); let lo: u64 = (&big_r & BigUint::from(u64::MAX)).to_u64().unwrap(); let hi: u64 = (big_r >> u64::BITS).to_u64().unwrap(); - Self{q, lo, hi} + let mut precomp: BarrettPrecomp = Self{q, lo, hi, one:Barrett(0,0)}; + precomp.one = precomp.prepare(1); + precomp + } + + #[inline(always)] + pub fn one(&self) -> Barrett { + self.one + } + + #[inline(always)] + pub fn prepare(&self, v: u64) -> Barrett { + debug_assert!(v < self.q); + let quotient: u64 = (((v as u128) << 64) / self.q as u128) as _; + Barrett(v, quotient) } /// Returns lhs mod q. @@ -40,4 +55,30 @@ impl BarrettPrecomp{ let (_, mhi) = lhs.widening_mul(self.hi); *lhs = *lhs - mhi.wrapping_mul(self.q) } + + #[inline(always)] + pub fn mul_external(&self, lhs: Barrett, rhs: u64) -> u64 { + let mut r: u64 = self.mul_external_lazy(lhs, rhs); + r.reduce_once_assign(self.q); + r + } + + #[inline(always)] + pub fn mul_external_assign(&self, lhs: Barrett, rhs: &mut u64){ + self.mul_external_lazy_assign(lhs, rhs); + rhs.reduce_once_assign(self.q); + } + + #[inline(always)] + pub fn mul_external_lazy(&self, lhs: Barrett, rhs: u64) -> u64 { + let mut r: u64 = rhs; + self.mul_external_lazy_assign(lhs, &mut r); + r + } + + #[inline(always)] + pub fn mul_external_lazy_assign(&self, lhs: Barrett, rhs: &mut u64){ + let t: u64 = ((*lhs.quotient() as u128 * *rhs as u128) >> 64) as _; + *rhs = (rhs.wrapping_mul(*lhs.value())).wrapping_sub(self.q.wrapping_mul(t)); + } } \ No newline at end of file diff --git a/src/modulus/impl_u64/mod.rs b/src/modulus/impl_u64/mod.rs index 4394599..6d8942a 100644 --- a/src/modulus/impl_u64/mod.rs +++ b/src/modulus/impl_u64/mod.rs @@ -1,5 +1,32 @@ pub mod prime; pub mod barrett; pub mod montgomery; -pub mod shoup; -pub mod operations; \ No newline at end of file +pub mod operations; + +use crate::modulus::ReduceOnce; + +impl ReduceOnce for u64{ + #[inline(always)] + fn reduce_once_constant_time_assign(&mut self, q: u64){ + debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); + *self -= (q.wrapping_sub(*self)>>63)*q; + } + + #[inline(always)] + fn reduce_once_constant_time(&self, q:u64) -> u64{ + debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); + self - (q.wrapping_sub(*self)>>63)*q + } + + #[inline(always)] + fn reduce_once_assign(&mut self, q: u64){ + debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); + *self = *self.min(&mut self.wrapping_sub(q)) + } + + #[inline(always)] + fn reduce_once(&self, q:u64) -> u64{ + debug_assert!(q < 0x8000000000000000, "2q >= 2^64"); + *self.min(&mut self.wrapping_sub(q)) + } +} \ No newline at end of file diff --git a/src/modulus/impl_u64/montgomery.rs b/src/modulus/impl_u64/montgomery.rs index 3522f32..cd03a0a 100644 --- a/src/modulus/impl_u64/montgomery.rs +++ b/src/modulus/impl_u64/montgomery.rs @@ -26,12 +26,12 @@ impl MontgomeryPrecomp{ four_q: q<<2, barrett: BarrettPrecomp::new(q), q_inv: q_inv, - one: Montgomery(0), - minus_one: Montgomery(0), + one: 0, + minus_one:0, }; precomp.one = precomp.prepare::(1); - precomp.minus_one = Montgomery(q-precomp.one.value()); + precomp.minus_one = q-precomp.one; precomp } @@ -71,7 +71,7 @@ impl MontgomeryPrecomp{ TWICE=>{x.reduce_once_assign(self.two_q)}, FOURTIMES =>{x.reduce_once_assign(self.four_q)}, BARRETT =>{self.barrett.reduce_assign(x)}, - BARRETTLAZY =>{self.barrett.reduce_assign(x)}, + BARRETTLAZY =>{self.barrett.reduce_lazy_assign(x)}, _ => unreachable!("invalid REDUCE argument") } } @@ -79,7 +79,7 @@ impl MontgomeryPrecomp{ /// Returns lhs * 2^64 mod q as a Montgomery. #[inline(always)] pub fn prepare(&self, lhs: u64) -> Montgomery{ - let mut rhs = Montgomery(0); + let mut rhs: u64 = 0; self.prepare_assign::(lhs, &mut rhs); rhs } @@ -88,8 +88,8 @@ impl MontgomeryPrecomp{ #[inline(always)] pub fn prepare_assign(&self, lhs: u64, rhs: &mut Montgomery){ let (_, mhi) = lhs.widening_mul(*self.barrett.value_lo()); - *rhs = Montgomery((lhs.wrapping_mul(*self.barrett.value_hi()).wrapping_add(mhi)).wrapping_mul(self.q).wrapping_neg()); - self.reduce_assign::(rhs.value_mut()); + *rhs = (lhs.wrapping_mul(*self.barrett.value_hi()).wrapping_add(mhi)).wrapping_mul(self.q).wrapping_neg(); + self.reduce_assign::(rhs); } /// Returns lhs * (2^64)^-1 mod q as a u64. @@ -103,7 +103,7 @@ impl MontgomeryPrecomp{ /// Assigns lhs * (2^64)^-1 mod q to rhs. #[inline(always)] pub fn unprepare_assign(&self, lhs: Montgomery, rhs: &mut u64){ - let (_, r) = self.q.widening_mul(lhs.value().wrapping_mul(self.q_inv)); + let (_, r) = self.q.widening_mul(lhs.wrapping_mul(self.q_inv)); *rhs = self.reduce::(self.q.wrapping_sub(r)); } @@ -118,7 +118,7 @@ impl MontgomeryPrecomp{ /// Assigns lhs * rhs * (2^{64})^-1 mod q to rhs. #[inline(always)] pub fn mul_external_assign(&self, lhs: Montgomery, rhs: &mut u64){ - let (mlo, mhi) = lhs.value().widening_mul(*rhs); + let (mlo, mhi) = lhs.widening_mul(*rhs); let (_, hhi) = self.q.widening_mul(mlo.wrapping_mul(self.q_inv)); *rhs = self.reduce::(mhi.wrapping_sub(hhi).wrapping_add(self.q)); } @@ -126,31 +126,31 @@ impl MontgomeryPrecomp{ /// Returns lhs * rhs * (2^{64})^-1 mod q in range [0, 2q-1]. #[inline(always)] pub fn mul_internal(&self, lhs: Montgomery, rhs: Montgomery) -> Montgomery{ - Montgomery(self.mul_external::(lhs, *rhs.value())) + self.mul_external::(lhs, rhs) } /// Assigns lhs * rhs * (2^{64})^-1 mod q to rhs. #[inline(always)] pub fn mul_internal_assign(&self, lhs: Montgomery, rhs: &mut Montgomery){ - self.mul_external_assign::(lhs, rhs.value_mut()); + self.mul_external_assign::(lhs, rhs); } #[inline(always)] pub fn add_internal(&self, lhs: Montgomery, rhs: Montgomery) -> Montgomery{ - Montgomery(self.barrett.reduce(rhs.value() + lhs.value())) + self.barrett.reduce(rhs + lhs) } /// Assigns lhs + rhs to rhs. #[inline(always)] pub fn add_internal_lazy_assign(&self, lhs: Montgomery, rhs: &mut Montgomery){ - *rhs.value_mut() += lhs.value() + *rhs += lhs } /// Assigns lhs + rhs - q if (lhs + rhs) >= q to rhs. #[inline(always)] pub fn add_internal_reduce_once_assign(&self, lhs: Montgomery, rhs: &mut Montgomery){ self.add_internal_lazy_assign(lhs, rhs); - rhs.value_mut().reduce_once_assign(self.q); + rhs.reduce_once_assign(self.q); } /// Returns lhs mod q in range [0, 2q-1]. @@ -173,30 +173,11 @@ impl MontgomeryPrecomp{ i >>= 1; } - y.value_mut().reduce_once_assign(self.q); + y.reduce_once_assign(self.q); y } } -/// Returns x^exponent mod q. -/// This function internally instantiate a new MontgomeryPrecomp -/// To be used when called only a few times and if there -/// is no Prime instantiated with q. -fn pow(x:u64, exponent:u64, q:u64) -> u64{ - let montgomery: MontgomeryPrecomp = MontgomeryPrecomp::::new(q); - let mut y_mont: Montgomery = montgomery.one(); - let mut x_mont: Montgomery = montgomery.prepare::(x); - while exponent > 0{ - if exponent & 1 == 1{ - montgomery.mul_internal_assign::(x_mont, &mut y_mont); - } - - montgomery.mul_internal_assign::(x_mont, &mut x_mont); - } - - montgomery.unprepare::(y_mont) -} - #[cfg(test)] mod tests { use crate::modulus::montgomery; diff --git a/src/modulus/impl_u64/operations.rs b/src/modulus/impl_u64/operations.rs index 9d2c4e0..a91d039 100644 --- a/src/modulus/impl_u64/operations.rs +++ b/src/modulus/impl_u64/operations.rs @@ -1,84 +1,121 @@ -use crate::modulus::Operations; +use crate::modulus::{WordOperations, VecOperations}; use crate::modulus::prime::Prime; use crate::modulus::ReduceOnce; use crate::modulus::montgomery::Montgomery; -use crate::modulus::{REDUCEMOD, NONE, ONCE, BARRETT, BARRETTLAZY}; +use crate::modulus::REDUCEMOD; use crate::{apply_unary, apply_binary, apply_ternary}; use itertools::izip; -impl Operations for Prime{ +impl WordOperations for Prime{ + + /// Applies a modular reduction on x based on REDUCE: + /// - LAZY: no modular reduction. + /// - ONCE: subtracts q if x >= q. + /// - TWO: subtracts 2q if x >= 2q. + /// - FOUR: subtracts 4q if x >= 4q. + /// - BARRETT: maps x to x mod q using Barrett reduction. + /// - BARRETTLAZY: maps x to x mod q using Barrett reduction with values in [0, 2q-1]. + #[inline(always)] + fn word_reduce_assign(&self, x: &mut u64){ + self.montgomery.reduce_assign::(x); + } #[inline(always)] - fn add_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){ + fn word_add_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){ *c = a.wrapping_add(*b); - self.montgomery.reduce_assign::(c); + self.word_reduce_assign::(c); } #[inline(always)] - fn add_unary_assign(&self, a: &u64, b: &mut u64){ + fn word_add_unary_assign(&self, a: &u64, b: &mut u64){ *b = a.wrapping_add(*b); - self.montgomery.reduce_assign::(b); + self.word_reduce_assign::(b); } #[inline(always)] - fn add_vec_binary_assign(&self, a: &[u64], b:&[u64], c:&mut [u64]){ - apply_ternary!(self, Self::add_binary_assign::, a, b, c, CHUNK); - } - - #[inline(always)] - fn add_vec_unary_assign(&self, a: &[u64], b:&mut [u64]){ - apply_binary!(self, Self::add_unary_assign::, a, b, CHUNK); - } - - #[inline(always)] - fn sub_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){ + fn word_sub_binary_assign(&self, a: &u64, b: &u64, c: &mut u64){ *c = a.wrapping_add(self.q.wrapping_sub(*b)).reduce_once(self.q); } #[inline(always)] - fn sub_unary_assign(&self, a: &u64, b: &mut u64){ + fn word_sub_unary_assign(&self, a: &u64, b: &mut u64){ *b = a.wrapping_add(self.q.wrapping_sub(*b)).reduce_once(self.q); } #[inline(always)] - fn sub_vec_binary_assign(&self, a: &[u64], b:&[u64], c:&mut [u64]){ - apply_ternary!(self, Self::sub_binary_assign::, a, b, c, CHUNK); - } - - #[inline(always)] - fn sub_vec_unary_assign(&self, a: &[u64], b:&mut [u64]){ - apply_binary!(self, Self::sub_unary_assign::, a, b, CHUNK); - } - - #[inline(always)] - fn neg_assign(&self, a: &mut u64){ + fn word_neg_assign(&self, a: &mut u64){ *a = self.q.wrapping_sub(*a); - self.montgomery.reduce_assign::(a) + self.word_reduce_assign::(a) } #[inline(always)] - fn neg_vec_assign(&self, a: &mut [u64]){ - apply_unary!(self, Self::neg_assign::, a, CHUNK); + fn word_prepare_montgomery_assign(&self, a: &u64, b: &mut Montgomery){ + self.montgomery.prepare_assign::(*a, b); } #[inline(always)] - fn mul_montgomery_external_binary_assign(&self, a:& Montgomery, b:&u64, c: &mut u64){ + fn word_mul_montgomery_external_binary_assign(&self, a: &Montgomery, b:&u64, c: &mut u64){ *c = self.montgomery.mul_external::(*a, *b); } #[inline(always)] - fn mul_montgomery_external_unary_assign(&self, lhs:&Montgomery, rhs:&mut u64){ - *rhs = self.montgomery.mul_external::(*lhs, *rhs); - } - - #[inline(always)] - fn mul_vec_montgomery_external_binary_assign(&self, a:& [Montgomery], b:&[u64], c: &mut [u64]){ - apply_ternary!(self, Self::mul_montgomery_external_binary_assign::, a, b, c, CHUNK); - } - - #[inline(always)] - fn mul_vec_montgomery_external_unary_assign(&self, a:&[Montgomery], b:&mut [u64]){ - apply_binary!(self, Self::mul_montgomery_external_unary_assign::, a, b, CHUNK); + fn word_mul_montgomery_external_unary_assign(&self, lhs:&Montgomery, rhs:&mut u64){ + self.montgomery.mul_external_assign::(*lhs, rhs); + } +} + +impl VecOperations for Prime{ + + /// Applies a modular reduction on x based on REDUCE: + /// - LAZY: no modular reduction. + /// - ONCE: subtracts q if x >= q. + /// - TWO: subtracts 2q if x >= 2q. + /// - FOUR: subtracts 4q if x >= 4q. + /// - BARRETT: maps x to x mod q using Barrett reduction. + /// - BARRETTLAZY: maps x to x mod q using Barrett reduction with values in [0, 2q-1]. + #[inline(always)] + fn vec_reduce_assign(&self, x: &mut [u64]){ + apply_unary!(self, Self::word_reduce_assign::, x, CHUNK); + } + + #[inline(always)] + fn vec_add_binary_assign(&self, a: &[u64], b:&[u64], c:&mut [u64]){ + apply_ternary!(self, Self::word_add_binary_assign::, a, b, c, CHUNK); + } + + #[inline(always)] + fn vec_add_unary_assign(&self, a: &[u64], b:&mut [u64]){ + apply_binary!(self, Self::word_add_unary_assign::, a, b, CHUNK); + } + + #[inline(always)] + fn vec_sub_binary_assign(&self, a: &[u64], b:&[u64], c:&mut [u64]){ + apply_ternary!(self, Self::word_sub_binary_assign::, a, b, c, CHUNK); + } + + #[inline(always)] + fn vec_sub_unary_assign(&self, a: &[u64], b:&mut [u64]){ + apply_binary!(self, Self::word_sub_unary_assign::, a, b, CHUNK); + } + + #[inline(always)] + fn vec_neg_assign(&self, a: &mut [u64]){ + apply_unary!(self, Self::word_neg_assign::, a, CHUNK); + } + + #[inline(always)] + fn vec_prepare_montgomery_assign(&self, a: &[u64], b: &mut [Montgomery]){ + apply_binary!(self, Self::word_prepare_montgomery_assign::, a, b, CHUNK); + } + + #[inline(always)] + fn vec_mul_montgomery_external_binary_assign(&self, a:& [Montgomery], b:&[u64], c: &mut [u64]){ + apply_ternary!(self, Self::word_mul_montgomery_external_binary_assign::, a, b, c, CHUNK); + } + + #[inline(always)] + fn vec_mul_montgomery_external_unary_assign(&self, a:& [Montgomery], b:&mut [u64]){ + apply_binary!(self, Self::word_mul_montgomery_external_unary_assign::, a, b, CHUNK); } } diff --git a/src/modulus/impl_u64/prime.rs b/src/modulus/impl_u64/prime.rs index c7100f4..13b27eb 100644 --- a/src/modulus/impl_u64/prime.rs +++ b/src/modulus/impl_u64/prime.rs @@ -1,6 +1,6 @@ use crate::modulus::prime::Prime; use crate::modulus::montgomery::{Montgomery, MontgomeryPrecomp}; -use crate::modulus::shoup::{ShoupPrecomp}; +use crate::modulus::barrett::BarrettPrecomp; use crate::modulus::ONCE; use primality_test::is_prime; use prime_factorization::Factorization; @@ -34,11 +34,13 @@ impl Prime{ let mut prime: Prime = Self { q:q, + two_q:q<<1, + four_q:q<<2, q_base:q_base, q_power:q_power, factors: Vec::new(), montgomery:MontgomeryPrecomp::new(q), - shoup:ShoupPrecomp::new(q), + barrett:BarrettPrecomp::new(q), phi:phi, }; @@ -101,7 +103,7 @@ impl Prime{ for &factor in &self.factors{ - if Pow(candidate, (self.q_base-1)/factor, self.q_base) == 1{ + if pow(candidate, (self.q_base-1)/factor, self.q_base) == 1{ not_found = true; break } @@ -124,7 +126,7 @@ impl Prime{ let psi: u64 = self.primitive_root(); // nth primitive root mod q_base: psi_nth^(prime.q_base-1)/nth_root mod q_base - let psi_nth_q_base: u64 = Pow(psi, (self.q_base-1)/nth_root, self.q_base); + let psi_nth_q_base: u64 = pow(psi, (self.q_base-1)/nth_root, self.q_base); // lifts nth primitive root mod q_base to q = q_base^q_power let psi_nth_q: u64 = self.hensel_lift(psi_nth_q_base, nth_root); @@ -171,7 +173,7 @@ impl Prime{ /// Returns (psi + a * q_base)^{nth_root} = 1 mod q = q_base^q_power given psi^{nth_root} = 1 mod q_base. /// Panics if psi^{nth_root} != 1 mod q_base. fn hensel_lift(&self, psi: u64, nth_root: u64) -> u64{ - assert!(Pow(psi, nth_root, self.q_base)==1, "invalid argument psi: psi^nth_root = {} != 1", Pow(psi, nth_root, self.q_base)); + assert!(pow(psi, nth_root, self.q_base)==1, "invalid argument psi: psi^nth_root = {} != 1", pow(psi, nth_root, self.q_base)); let mut psi_mont: Montgomery = self.montgomery.prepare::(psi); let nth_root_mont: Montgomery = self.montgomery.prepare::(nth_root); @@ -180,7 +182,7 @@ impl Prime{ let psi_pow: Montgomery = self.montgomery.pow(psi_mont, nth_root-1); - let num: Montgomery = Montgomery(self.montgomery.one().value() + self.q - self.montgomery.mul_internal::(psi_pow, psi_mont).value()); + let num: Montgomery = self.montgomery.one() + self.q - self.montgomery.mul_internal::(psi_pow, psi_mont); let mut den: Montgomery = self.montgomery.mul_internal::(nth_root_mont, psi_pow); @@ -197,7 +199,7 @@ impl Prime{ /// This function internally instantiate a new MontgomeryPrecomp /// To be used when called only a few times and if there /// is no Prime instantiated with q. -pub fn Pow(x:u64, exponent:u64, q:u64) -> u64{ +pub fn pow(x:u64, exponent:u64, q:u64) -> u64{ let montgomery: MontgomeryPrecomp = MontgomeryPrecomp::::new(q); let mut y_mont: Montgomery = montgomery.one(); let mut x_mont: Montgomery = montgomery.prepare::(x); diff --git a/src/modulus/impl_u64/shoup.rs b/src/modulus/impl_u64/shoup.rs deleted file mode 100644 index 0d7241e..0000000 --- a/src/modulus/impl_u64/shoup.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::modulus::ReduceOnce; -use crate::modulus::shoup::{ShoupPrecomp, Shoup}; - -impl ShoupPrecomp{ - - pub fn new(q: u64) -> Self { - let mut precomp: ShoupPrecomp = Self{q:q, one:Shoup(0,0)}; - precomp.one = precomp.prepare(1); - precomp - } - - #[inline(always)] - pub fn one(&self) -> Shoup { - self.one - } - - #[inline(always)] - pub fn prepare(&self, v: u64) -> Shoup { - debug_assert!(v < self.q); - let quotient: u64 = (((v as u128) << 64) / self.q as u128) as _; - Shoup(v, quotient) - } - - #[inline(always)] - pub fn mul_external(&self, lhs: Shoup, rhs: u64) -> u64 { - let mut r: u64 = self.mul_external_lazy(lhs, rhs); - r.reduce_once_assign(self.q); - r - } - - #[inline(always)] - pub fn mul_external_assign(&self, lhs: Shoup, rhs: &mut u64){ - self.mul_external_lazy_assign(lhs, rhs); - rhs.reduce_once_assign(self.q); - } - - #[inline(always)] - pub fn mul_external_lazy(&self, lhs: Shoup, rhs: u64) -> u64 { - let mut r: u64 = rhs; - self.mul_external_lazy_assign(lhs, &mut r); - r - } - - #[inline(always)] - pub fn mul_external_lazy_assign(&self, lhs: Shoup, rhs: &mut u64){ - let t: u64 = ((*lhs.quotient() as u128 * *rhs as u128) >> 64) as _; - *rhs = (rhs.wrapping_mul(*lhs.value())).wrapping_sub(self.q.wrapping_mul(t)); - } - - #[inline(always)] - pub fn reduce_assign(&self, rhs: &mut u64){ - self.reduce_assign_lazy(rhs); - rhs.reduce_once_assign(self.q); - } - - #[inline(always)] - pub fn reduce_assign_lazy(&self, rhs: &mut u64){ - *rhs = rhs.wrapping_sub(self.q.wrapping_mul(((self.one.1 as u128 * *rhs as u128) >> 64) as _)) - } -} \ No newline at end of file diff --git a/src/modulus/montgomery.rs b/src/modulus/montgomery.rs index 9435015..c3b15bc 100644 --- a/src/modulus/montgomery.rs +++ b/src/modulus/montgomery.rs @@ -2,35 +2,7 @@ use crate::modulus::barrett::BarrettPrecomp; /// Montgomery is a generic struct storing /// an element in the Montgomery domain. -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct Montgomery(pub O); - -/// Implements helper methods on the struct Montgomery. -impl Montgomery{ - - #[inline(always)] - pub fn new(lhs: O) -> Self{ - Self(lhs) - } - - #[inline(always)] - pub fn value(&self) -> &O{ - &self.0 - } - - pub fn value_mut(&mut self) -> &mut O{ - &mut self.0 - } -} - -/// Default instantiation. -impl Default for Montgomery where O:Default { - fn default() -> Self { - Self { - 0: O::default(), - } - } -} +pub type Montgomery = O; /// MontgomeryPrecomp is a generic struct storing /// precomputations for Montgomery arithmetic. diff --git a/src/modulus/prime.rs b/src/modulus/prime.rs index d051763..0841ab0 100644 --- a/src/modulus/prime.rs +++ b/src/modulus/prime.rs @@ -1,14 +1,16 @@ use crate::modulus::montgomery::MontgomeryPrecomp; -use crate::modulus::shoup::ShoupPrecomp; +use crate::modulus::barrett::BarrettPrecomp; #[derive(Clone, Debug, PartialEq, Eq)] pub struct Prime { pub q: O, /// q_base^q_powers + pub two_q: O, + pub four_q: O, pub q_base: O, pub q_power: usize, pub factors: Vec, /// distinct factors of q-1 pub montgomery: MontgomeryPrecomp, - pub shoup:ShoupPrecomp, + pub barrett:BarrettPrecomp, pub phi: O, } diff --git a/src/modulus/shoup.rs b/src/modulus/shoup.rs deleted file mode 100644 index 901be14..0000000 --- a/src/modulus/shoup.rs +++ /dev/null @@ -1,22 +0,0 @@ -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct Shoup(pub O, pub O); - -impl Shoup { - - #[inline(always)] - pub fn value(&self) -> &O { - &self.0 - } - - #[inline(always)] - pub fn quotient(&self) -> &O { - &self.1 - } -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub struct ShoupPrecomp{ - pub q: O, - pub one: Shoup, -} - diff --git a/src/ring/impl_u64/automorphism.rs b/src/ring/impl_u64/automorphism.rs index 739cd35..18179b0 100644 --- a/src/ring/impl_u64/automorphism.rs +++ b/src/ring/impl_u64/automorphism.rs @@ -5,7 +5,7 @@ use crate::poly::Poly; /// Returns a lookup table for the automorphism X^{i} -> X^{i * k mod nth_root}. /// Method will panic if n or nth_root are not power-of-two. /// Method will panic if gal_el is not coprime with nth_root. -pub fn automorphism_index_ntt(n: usize, nth_root:u64, gal_el: u64) -> (Vec){ +pub fn automorphism_index_ntt(n: usize, nth_root:u64, gal_el: u64) -> Vec{ assert!(n&(n-1) != 0, "invalid n={}: not a power-of-two", n); assert!(nth_root&(nth_root-1) != 0, "invalid nth_root={}: not a power-of-two", n); assert!(gal_el & 1 == 1, "invalid gal_el={}: not coprime with nth_root={}", gal_el, nth_root); @@ -39,6 +39,5 @@ impl Ring{ let i_out: u64 = gal_el_i & mask; b_vec[i_out as usize] = ai * (sign^1) | (q - ai) * sign }); - } } \ No newline at end of file diff --git a/src/ring/impl_u64/ring.rs b/src/ring/impl_u64/ring.rs index 61d566b..de43a0a 100644 --- a/src/ring/impl_u64/ring.rs +++ b/src/ring/impl_u64/ring.rs @@ -1,7 +1,6 @@ use crate::ring::Ring; use crate::dft::ntt::Table; use crate::modulus::prime::Prime; -use crate::modulus::montgomery::Montgomery; use crate::poly::Poly; impl Ring{ @@ -18,15 +17,37 @@ impl Ring{ return self.n } - fn new_poly_core(&self) -> Poly where O: Default + Clone { - Poly::::new(self.n()) - } - pub fn new_poly(&self) -> Poly{ - self.new_poly_core::() + Poly::::new(self.n()) } - pub fn new_poly_montgomery(&self) -> Poly>{ - self.new_poly_core::>() + pub fn ntt_inplace(&self, poly: &mut Poly){ + match LAZY{ + true => self.dft.forward_inplace_lazy(&mut poly.0), + false => self.dft.forward_inplace(&mut poly.0) + } + } + + pub fn intt_inplace(&self, poly: &mut Poly){ + match LAZY{ + true => self.dft.forward_inplace_lazy(&mut poly.0), + false => self.dft.forward_inplace(&mut poly.0) + } + } + + pub fn ntt(&self, poly_in: &Poly, poly_out: &mut Poly){ + poly_out.0.copy_from_slice(&poly_in.0); + match LAZY{ + true => self.dft.backward_inplace_lazy(&mut poly_out.0), + false => self.dft.backward_inplace(&mut poly_out.0) + } + } + + pub fn intt(&self, poly_in: &Poly, poly_out: &mut Poly){ + poly_out.0.copy_from_slice(&poly_in.0); + match LAZY{ + true => self.dft.backward_inplace_lazy(&mut poly_out.0), + false => self.dft.backward_inplace(&mut poly_out.0) + } } } \ No newline at end of file