move modulus into different files

2026-01-12 00:51:29 +01:00 · 2024-06-10 17:06:25 +05:30
parent e161b33402
commit 001515413b
3 changed files with 265 additions and 246 deletions
--- a/src/backend/mod.rs
+++ b/src/backend/mod.rs
@@ -0,0 +1,135 @@
+use num_traits::ToPrimitive;
+
+use crate::{Matrix, RowMut};
+
+mod modulus_u64;
+mod word_size;
+
+pub use modulus_u64::ModularOpsU64;
+pub use word_size::WordSizeModulus;
+
+pub trait Modulus {
+    type Element;
+    /// Modulus value if it fits in Element
+    fn q(&self) -> Option<Self::Element>;
+    /// Modulus value as f64 if it fits in f64
+    fn q_as_f64(&self) -> Option<f64>;
+    /// Is modulus native?
+    fn is_native(&self) -> bool;
+    /// -1 in signed representaiton
+    fn neg_one(&self) -> Self::Element;
+    /// Largest unsigned value that fits in the modulus. That is, q - 1.
+    fn largest_unsigned_value(&self) -> Self::Element;
+    /// Smallest unsigned value that fits in the modulus
+    /// Always assmed to be 0.
+    fn smallest_unsigned_value(&self) -> Self::Element;
+    /// Convert unsigned value in signed represetation to i64
+    fn map_element_to_i64(&self, v: &Self::Element) -> i64;
+    /// Convert f64 to signed represented in modulus
+    fn map_element_from_f64(&self, v: f64) -> Self::Element;
+    /// Convert i64 to signed represented in modulus
+    fn map_element_from_i64(&self, v: i64) -> Self::Element;
+}
+
+impl Modulus for u64 {
+    type Element = u64;
+    fn is_native(&self) -> bool {
+        // q of size u64 can never be a naitve modulus
+        false
+    }
+    fn largest_unsigned_value(&self) -> Self::Element {
+        self - 1
+    }
+    fn neg_one(&self) -> Self::Element {
+        self - 1
+    }
+    fn smallest_unsigned_value(&self) -> Self::Element {
+        0
+    }
+    fn map_element_to_i64(&self, v: &Self::Element) -> i64 {
+        assert!(v <= self, "{v} must be <= {self}");
+        if *v >= (self >> 1) {
+            -ToPrimitive::to_i64(&(self - v)).unwrap()
+        } else {
+            ToPrimitive::to_i64(v).unwrap()
+        }
+    }
+    fn map_element_from_f64(&self, v: f64) -> Self::Element {
+        //FIXME (Jay): Before I check whether v is smaller than 0 with `let is_neg =
+        // o.is_sign_negative() && o != 0.0; I'm ocnfused why didn't I simply check <
+        // 0.0?
+        let v = v.round();
+        if v < 0.0 {
+            self - v.abs().to_u64().unwrap()
+        } else {
+            v.to_u64().unwrap()
+        }
+    }
+    fn map_element_from_i64(&self, v: i64) -> Self::Element {
+        if v < 0 {
+            self - v.abs().to_u64().unwrap()
+        } else {
+            v.to_u64().unwrap()
+        }
+    }
+    fn q(&self) -> Option<Self::Element> {
+        Some(*self)
+    }
+    fn q_as_f64(&self) -> Option<f64> {
+        self.to_f64()
+    }
+}
+
+pub trait ModInit {
+    type M;
+    fn new(modulus: Self::M) -> Self;
+}
+
+pub trait GetModulus {
+    type Element;
+    type M: Modulus<Element = Self::Element>;
+    fn modulus(&self) -> &Self::M;
+}
+
+pub trait VectorOps {
+    type Element;
+
+    fn elwise_scalar_mul(&self, out: &mut [Self::Element], a: &[Self::Element], b: &Self::Element);
+    fn elwise_mul(&self, out: &mut [Self::Element], a: &[Self::Element], b: &[Self::Element]);
+
+    fn elwise_add_mut(&self, a: &mut [Self::Element], b: &[Self::Element]);
+    fn elwise_sub_mut(&self, a: &mut [Self::Element], b: &[Self::Element]);
+    fn elwise_mul_mut(&self, a: &mut [Self::Element], b: &[Self::Element]);
+    fn elwise_scalar_mul_mut(&self, a: &mut [Self::Element], b: &Self::Element);
+    fn elwise_neg_mut(&self, a: &mut [Self::Element]);
+    /// inplace mutates `a`: a = a + b*c
+    fn elwise_fma_mut(&self, a: &mut [Self::Element], b: &[Self::Element], c: &[Self::Element]);
+    fn elwise_fma_scalar_mut(
+        &self,
+        a: &mut [Self::Element],
+        b: &[Self::Element],
+        c: &Self::Element,
+    );
+}
+
+pub trait ArithmeticOps {
+    type Element;
+    fn mul(&self, a: &Self::Element, b: &Self::Element) -> Self::Element;
+    fn add(&self, a: &Self::Element, b: &Self::Element) -> Self::Element;
+    fn sub(&self, a: &Self::Element, b: &Self::Element) -> Self::Element;
+    fn neg(&self, a: &Self::Element) -> Self::Element;
+}
+
+pub trait ArithmeticLazyOps {
+    type Element;
+    fn mul_lazy(&self, a: &Self::Element, b: &Self::Element) -> Self::Element;
+    fn add_lazy(&self, a: &Self::Element, b: &Self::Element) -> Self::Element;
+}
+
+pub trait ShoupMatrixFMA<M: Matrix>
+where
+    M::R: RowMut,
+{
+    /// Returns summation of row-wise product of matrix a and b.
+    fn shoup_matrix_fma(&self, out: &mut M::R, a: &M, a_shoup: &M, b: &M);
+}
--- a/src/backend/modulus_u64.rs
+++ b/src/backend/modulus_u64.rs
@@ -0,0 +1,353 @@
+use std::marker::PhantomData;
+
+use itertools::izip;
+use num_traits::{PrimInt, Signed, ToPrimitive, WrappingAdd, WrappingMul, WrappingSub, Zero};
+
+use super::{
+    ArithmeticLazyOps, ArithmeticOps, GetModulus, ModInit, Modulus, ShoupMatrixFMA, VectorOps,
+};
+use crate::{utils::ShoupMul, Matrix, RowMut};
+
+pub struct ModularOpsU64<T> {
+    q: u64,
+    q_twice: u64,
+    logq: usize,
+    barrett_mu: u128,
+    barrett_alpha: usize,
+    modulus: T,
+}
+
+impl<T> ModInit for ModularOpsU64<T>
+where
+    T: Modulus<Element = u64>,
+{
+    type M = T;
+    fn new(modulus: Self::M) -> ModularOpsU64<T> {
+        assert!(!modulus.is_native());
+
+        // largest unsigned value modulus fits is modulus-1
+        let q = modulus.largest_unsigned_value() + 1;
+        let logq = 64 - (q + 1u64).leading_zeros();
+
+        // barrett calculation
+        let mu = (1u128 << (logq * 2 + 3)) / (q as u128);
+        let alpha = logq + 3;
+
+        ModularOpsU64 {
+            q,
+            q_twice: q << 1,
+            logq: logq as usize,
+            barrett_alpha: alpha as usize,
+            barrett_mu: mu,
+            modulus,
+        }
+    }
+}
+
+impl<T> ModularOpsU64<T> {
+    fn add_mod_fast(&self, a: u64, b: u64) -> u64 {
+        debug_assert!(a < self.q);
+        debug_assert!(b < self.q);
+
+        let mut o = a + b;
+        if o >= self.q {
+            o -= self.q;
+        }
+        o
+    }
+
+    fn add_mod_fast_lazy(&self, a: u64, b: u64) -> u64 {
+        debug_assert!(a < self.q_twice);
+        debug_assert!(b < self.q_twice);
+
+        let mut o = a + b;
+        if o >= self.q_twice {
+            o -= self.q_twice;
+        }
+        o
+    }
+
+    fn sub_mod_fast(&self, a: u64, b: u64) -> u64 {
+        debug_assert!(a < self.q);
+        debug_assert!(b < self.q);
+
+        if a >= b {
+            a - b
+        } else {
+            (self.q + a) - b
+        }
+    }
+
+    // returns (a * b)  % q
+    ///
+    /// - both a and b must be in range [0, 2q)
+    /// - output is in range [0 , 2q)
+    fn mul_mod_fast_lazy(&self, a: u64, b: u64) -> u64 {
+        debug_assert!(a < 2 * self.q);
+        debug_assert!(b < 2 * self.q);
+
+        let ab = a as u128 * b as u128;
+
+        // ab / (2^{n + \beta})
+        // note: \beta is assumed to -2
+        let tmp = ab >> (self.logq - 2);
+
+        // k = ((ab / (2^{n + \beta})) * \mu) / 2^{\alpha - (-2)}
+        let k = (tmp * self.barrett_mu) >> (self.barrett_alpha + 2);
+
+        // ab - k*p
+        let tmp = k * (self.q as u128);
+
+        (ab - tmp) as u64
+    }
+
+    /// returns (a * b)  % q
+    ///
+    /// - both a and b must be in range [0, 2q)
+    /// - output is in range [0 , q)
+    fn mul_mod_fast(&self, a: u64, b: u64) -> u64 {
+        debug_assert!(a < 2 * self.q);
+        debug_assert!(b < 2 * self.q);
+
+        let ab = a as u128 * b as u128;
+
+        // ab / (2^{n + \beta})
+        // note: \beta is assumed to -2
+        let tmp = ab >> (self.logq - 2);
+
+        // k = ((ab / (2^{n + \beta})) * \mu) / 2^{\alpha - (-2)}
+        let k = (tmp * self.barrett_mu) >> (self.barrett_alpha + 2);
+
+        // ab - k*p
+        let tmp = k * (self.q as u128);
+
+        let mut out = (ab - tmp) as u64;
+
+        if out >= self.q {
+            out -= self.q;
+        }
+
+        return out;
+    }
+}
+
+impl<T> ArithmeticOps for ModularOpsU64<T> {
+    type Element = u64;
+
+    fn add(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        self.add_mod_fast(*a, *b)
+    }
+
+    fn mul(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        self.mul_mod_fast(*a, *b)
+    }
+
+    fn sub(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        self.sub_mod_fast(*a, *b)
+    }
+
+    fn neg(&self, a: &Self::Element) -> Self::Element {
+        self.q - *a
+    }
+
+    // fn modulus(&self) -> Self::Element {
+    //     self.q
+    // }
+}
+
+impl<T> ArithmeticLazyOps for ModularOpsU64<T> {
+    type Element = u64;
+    fn add_lazy(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        self.add_mod_fast_lazy(*a, *b)
+    }
+    fn mul_lazy(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        self.mul_mod_fast_lazy(*a, *b)
+    }
+}
+
+impl<T> VectorOps for ModularOpsU64<T> {
+    type Element = u64;
+
+    fn elwise_add_mut(&self, a: &mut [Self::Element], b: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = self.add_mod_fast(*ai, *bi);
+        });
+    }
+
+    fn elwise_sub_mut(&self, a: &mut [Self::Element], b: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = self.sub_mod_fast(*ai, *bi);
+        });
+    }
+
+    fn elwise_mul_mut(&self, a: &mut [Self::Element], b: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = self.mul_mod_fast(*ai, *bi);
+        });
+    }
+
+    fn elwise_neg_mut(&self, a: &mut [Self::Element]) {
+        a.iter_mut().for_each(|ai| *ai = self.q - *ai);
+    }
+
+    fn elwise_scalar_mul(&self, out: &mut [Self::Element], a: &[Self::Element], b: &Self::Element) {
+        izip!(out.iter_mut(), a.iter()).for_each(|(oi, ai)| {
+            *oi = self.mul_mod_fast(*ai, *b);
+        });
+    }
+
+    fn elwise_mul(&self, out: &mut [Self::Element], a: &[Self::Element], b: &[Self::Element]) {
+        izip!(out.iter_mut(), a.iter(), b.iter()).for_each(|(oi, ai, bi)| {
+            *oi = self.mul_mod_fast(*ai, *bi);
+        });
+    }
+
+    fn elwise_scalar_mul_mut(&self, a: &mut [Self::Element], b: &Self::Element) {
+        a.iter_mut().for_each(|ai| {
+            *ai = self.mul_mod_fast(*ai, *b);
+        });
+    }
+
+    fn elwise_fma_mut(&self, a: &mut [Self::Element], b: &[Self::Element], c: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter(), c.iter()).for_each(|(ai, bi, ci)| {
+            *ai = self.add_mod_fast(*ai, self.mul_mod_fast(*bi, *ci));
+        });
+    }
+
+    fn elwise_fma_scalar_mut(
+        &self,
+        a: &mut [Self::Element],
+        b: &[Self::Element],
+        c: &Self::Element,
+    ) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = self.add_mod_fast(*ai, self.mul_mod_fast(*bi, *c));
+        });
+    }
+
+    // fn modulus(&self) -> Self::Element {
+    //     self.q
+    // }
+}
+
+impl<M: Matrix<MatElement = u64>, T> ShoupMatrixFMA<M> for ModularOpsU64<T>
+where
+    M::R: RowMut,
+{
+    fn shoup_matrix_fma(&self, out: &mut <M as Matrix>::R, a: &M, a_shoup: &M, b: &M) {
+        assert!(a.dimension() == a_shoup.dimension());
+        assert!(a.dimension() == b.dimension());
+
+        let q = self.q;
+        let q_twice = self.q << 1;
+
+        // first row (without summation)
+        izip!(
+            out.as_mut().iter_mut(),
+            a.get_row(0),
+            a_shoup.get_row(0),
+            b.get_row(0)
+        )
+        .for_each(|(o, a, a_shoup, b)| {
+            *o = ShoupMul::mul(*b, *a, *a_shoup, q);
+        });
+
+        izip!(
+            a.iter_rows().skip(1),
+            a_shoup.iter_rows().skip(1),
+            b.iter_rows().skip(1)
+        )
+        .for_each(|(a_row, a_shoup_row, b_row)| {
+            izip!(
+                out.as_mut().iter_mut(),
+                a_row.as_ref().iter(),
+                a_shoup_row.as_ref().iter(),
+                b_row.as_ref().iter()
+            )
+            .for_each(|(o, a0, a0_shoup, b0)| {
+                let quotient = ((*a0_shoup as u128 * *b0 as u128) >> 64) as u64;
+                let mut v = (a0.wrapping_mul(b0)).wrapping_add(*o);
+                v = v.wrapping_sub(q.wrapping_mul(quotient));
+
+                if v >= q_twice {
+                    v -= q_twice;
+                }
+
+                *o = v;
+            });
+        });
+    }
+}
+
+impl<T> GetModulus for ModularOpsU64<T>
+where
+    T: Modulus,
+{
+    type Element = T::Element;
+    type M = T;
+    fn modulus(&self) -> &Self::M {
+        &self.modulus
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use itertools::Itertools;
+    use rand::{thread_rng, Rng};
+    use rand_distr::Uniform;
+
+    #[test]
+    fn fma() {
+        let mut rng = thread_rng();
+        let prime = 36028797017456641;
+        let ring_size = 1 << 3;
+
+        let dist = Uniform::new(0, prime);
+        let d = 2;
+        let a0_matrix = (0..d)
+            .into_iter()
+            .map(|_| (&mut rng).sample_iter(dist).take(ring_size).collect_vec())
+            .collect_vec();
+        // a0 in shoup representation
+        let a0_shoup_matrix = a0_matrix
+            .iter()
+            .map(|r| {
+                r.iter()
+                    .map(|v| {
+                        // $(v * 2^{\beta}) / p$
+                        ((*v as u128 * (1u128 << 64)) / prime as u128) as u64
+                    })
+                    .collect_vec()
+            })
+            .collect_vec();
+        let a1_matrix = (0..d)
+            .into_iter()
+            .map(|_| (&mut rng).sample_iter(dist).take(ring_size).collect_vec())
+            .collect_vec();
+
+        let modop = ModularOpsU64::new(prime);
+
+        let mut out_shoup_fma_lazy = vec![0u64; ring_size];
+        modop.shoup_matrix_fma(
+            &mut out_shoup_fma_lazy,
+            &a0_matrix,
+            &a0_shoup_matrix,
+            &a1_matrix,
+        );
+        let out_shoup_fma = out_shoup_fma_lazy
+            .iter()
+            .map(|v| if *v >= prime { v - prime } else { *v })
+            .collect_vec();
+
+        // expected
+        let mut out_expected = vec![0u64; ring_size];
+        izip!(a0_matrix.iter(), a1_matrix.iter()).for_each(|(a_r, b_r)| {
+            izip!(out_expected.iter_mut(), a_r.iter(), b_r.iter()).for_each(|(o, a0, a1)| {
+                *o = (*o + ((*a0 as u128 * *a1 as u128) % prime as u128) as u64) % prime;
+            });
+        });
+
+        assert_eq!(out_expected, out_shoup_fma);
+    }
+}
--- a/src/backend/word_size.rs
+++ b/src/backend/word_size.rs
@@ -0,0 +1,127 @@
+use itertools::izip;
+use num_traits::{PrimInt, Signed, ToPrimitive, WrappingAdd, WrappingMul, WrappingSub, Zero};
+
+use super::{
+    ArithmeticLazyOps, ArithmeticOps, GetModulus, ModInit, Modulus, ShoupMatrixFMA, VectorOps,
+};
+use crate::{utils::ShoupMul, Matrix, RowMut};
+
+pub struct WordSizeModulus<T> {
+    modulus: T,
+}
+
+impl<T> ModInit for WordSizeModulus<T>
+where
+    T: Modulus,
+{
+    type M = T;
+    fn new(modulus: T) -> Self {
+        assert!(modulus.is_native());
+        // For now assume ModulusOpsU64 is only used for u64
+        Self { modulus: modulus }
+    }
+}
+
+impl<T> ArithmeticOps for WordSizeModulus<T>
+where
+    T: Modulus,
+    T::Element: WrappingAdd + WrappingSub + WrappingMul + Zero,
+{
+    type Element = T::Element;
+    fn add(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        T::Element::wrapping_add(a, b)
+    }
+
+    fn mul(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        T::Element::wrapping_mul(a, b)
+    }
+
+    fn neg(&self, a: &Self::Element) -> Self::Element {
+        T::Element::wrapping_sub(&T::Element::zero(), a)
+    }
+
+    fn sub(&self, a: &Self::Element, b: &Self::Element) -> Self::Element {
+        T::Element::wrapping_sub(a, b)
+    }
+}
+
+impl<T> VectorOps for WordSizeModulus<T>
+where
+    T: Modulus,
+    T::Element: WrappingAdd + WrappingSub + WrappingMul + Zero,
+{
+    type Element = T::Element;
+
+    fn elwise_add_mut(&self, a: &mut [Self::Element], b: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = T::Element::wrapping_add(ai, bi);
+        });
+    }
+
+    fn elwise_sub_mut(&self, a: &mut [Self::Element], b: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = T::Element::wrapping_sub(ai, bi);
+        });
+    }
+
+    fn elwise_mul_mut(&self, a: &mut [Self::Element], b: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = T::Element::wrapping_mul(ai, bi);
+        });
+    }
+
+    fn elwise_neg_mut(&self, a: &mut [Self::Element]) {
+        a.iter_mut()
+            .for_each(|ai| *ai = T::Element::wrapping_sub(&T::Element::zero(), ai));
+    }
+
+    fn elwise_scalar_mul(&self, out: &mut [Self::Element], a: &[Self::Element], b: &Self::Element) {
+        izip!(out.iter_mut(), a.iter()).for_each(|(oi, ai)| {
+            *oi = T::Element::wrapping_mul(ai, b);
+        });
+    }
+
+    fn elwise_mul(&self, out: &mut [Self::Element], a: &[Self::Element], b: &[Self::Element]) {
+        izip!(out.iter_mut(), a.iter(), b.iter()).for_each(|(oi, ai, bi)| {
+            *oi = T::Element::wrapping_mul(ai, bi);
+        });
+    }
+
+    fn elwise_scalar_mul_mut(&self, a: &mut [Self::Element], b: &Self::Element) {
+        a.iter_mut().for_each(|ai| {
+            *ai = T::Element::wrapping_mul(ai, b);
+        });
+    }
+
+    fn elwise_fma_mut(&self, a: &mut [Self::Element], b: &[Self::Element], c: &[Self::Element]) {
+        izip!(a.iter_mut(), b.iter(), c.iter()).for_each(|(ai, bi, ci)| {
+            *ai = T::Element::wrapping_add(ai, &T::Element::wrapping_mul(bi, ci));
+        });
+    }
+
+    fn elwise_fma_scalar_mut(
+        &self,
+        a: &mut [Self::Element],
+        b: &[Self::Element],
+        c: &Self::Element,
+    ) {
+        izip!(a.iter_mut(), b.iter()).for_each(|(ai, bi)| {
+            *ai = T::Element::wrapping_add(ai, &T::Element::wrapping_mul(bi, c));
+        });
+    }
+
+    // fn modulus(&self) -> &T {
+    //     &self.modulus
+    // }
+}
+
+impl<T> GetModulus for WordSizeModulus<T>
+where
+    T: Modulus,
+{
+    type Element = T::Element;
+    type M = T;
+    fn modulus(&self) -> &Self::M {
+        &self.modulus
+    }
+}