Distinguish between gglwe_to_ggsw key and tensor_key + update key repreentation

2026-02-10 13:16:44 +01:00 · 2025-10-27 11:28:53 +01:00
parent 41ca5aafcc
commit 8d4c19a304
59 changed files with 2812 additions and 1596 deletions
--- a/poulpy-core/src/conversion/gglwe_to_ggsw.rs
+++ b/poulpy-core/src/conversion/gglwe_to_ggsw.rs
@@ -1,17 +1,16 @@
 use poulpy_hal::{
    api::{
-        ModuleN, ScratchAvailable, ScratchTakeBasic, VecZnxBigBytesOf, VecZnxBigNormalize, VecZnxDftAddInplace, VecZnxDftApply,
-        VecZnxDftBytesOf, VecZnxDftCopy, VecZnxIdftApplyTmpA, VecZnxNormalize, VecZnxNormalizeTmpBytes, VmpApplyDftToDft,
-        VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
+        ScratchAvailable, ScratchTakeBasic, VecZnxBigAddSmallInplace, VecZnxBigBytesOf, VecZnxBigNormalize,
+        VecZnxBigNormalizeTmpBytes, VecZnxDftApply, VecZnxDftBytesOf, VecZnxIdftApplyConsume, VecZnxNormalize,
    },
-    layouts::{Backend, DataMut, Module, Scratch, VmpPMat, ZnxInfos},
+    layouts::{Backend, DataMut, Module, Scratch, VecZnxBig},
 };

 use crate::{
-    GLWECopy, ScratchTakeCore,
+    GGLWEProduct, GLWECopy, ScratchTakeCore,
    layouts::{
-        GGLWE, GGLWEInfos, GGLWEToRef, GGSW, GGSWInfos, GGSWToMut, GLWEInfos, LWEInfos,
-        prepared::{GLWETensorKeyPrepared, GLWETensorKeyPreparedToRef},
+        GGLWE, GGLWEInfos, GGLWEToGGSWKeyPrepared, GGLWEToGGSWKeyPreparedToRef, GGLWEToRef, GGSW, GGSWInfos, GGSWToMut, GLWE,
+        GLWEInfos, LWEInfos,
    },
 };

@@ -31,7 +30,7 @@ impl<D: DataMut> GGSW<D> {
    where
        M: GGSWFromGGLWE<BE>,
        G: GGLWEToRef,
-        T: GLWETensorKeyPreparedToRef<BE>,
+        T: GGLWEToGGSWKeyPreparedToRef<BE>,
        Scratch<BE>: ScratchTakeCore<BE>,
    {
        module.ggsw_from_gglwe(self, gglwe, tsk, scratch);
@@ -54,12 +53,12 @@ where
    where
        R: GGSWToMut,
        A: GGLWEToRef,
-        T: GLWETensorKeyPreparedToRef<BE>,
+        T: GGLWEToGGSWKeyPreparedToRef<BE>,
        Scratch<BE>: ScratchTakeCore<BE>,
    {
        let res: &mut GGSW<&mut [u8]> = &mut res.to_mut();
        let a: &GGLWE<&[u8]> = &a.to_ref();
-        let tsk: &GLWETensorKeyPrepared<&[u8], BE> = &tsk.to_ref();
+        let tsk: &GGLWEToGGSWKeyPrepared<&[u8], BE> = &tsk.to_ref();

        assert_eq!(res.rank(), a.rank_out());
        assert_eq!(res.dnum(), a.dnum());
@@ -85,177 +84,140 @@ pub trait GGSWFromGGLWE<BE: Backend> {
    where
        R: GGSWToMut,
        A: GGLWEToRef,
-        T: GLWETensorKeyPreparedToRef<BE>,
+        T: GGLWEToGGSWKeyPreparedToRef<BE>,
        Scratch<BE>: ScratchTakeCore<BE>;
 }

-impl<BE: Backend> GGSWExpandRows<BE> for Module<BE> where
-    Self: Sized
-        + ModuleN
-        + VecZnxDftBytesOf
-        + VmpApplyDftToDftTmpBytes
-        + VecZnxBigBytesOf
-        + VecZnxNormalizeTmpBytes
-        + VecZnxDftBytesOf
-        + VmpApplyDftToDftTmpBytes
-        + VecZnxBigBytesOf
-        + VecZnxNormalizeTmpBytes
-        + VecZnxDftApply<BE>
-        + VecZnxDftCopy<BE>
-        + VmpApplyDftToDft<BE>
-        + VmpApplyDftToDftAdd<BE>
-        + VecZnxDftAddInplace<BE>
-        + VecZnxBigNormalize<BE>
-        + VecZnxIdftApplyTmpA<BE>
-        + VecZnxNormalize<BE>
-{
+pub trait GGSWExpandRows<BE: Backend> {
+    fn ggsw_expand_rows_tmp_bytes<R, A>(&self, res_infos: &R, tsk_infos: &A) -> usize
+    where
+        R: GGSWInfos,
+        A: GGLWEInfos;
+
+    fn ggsw_expand_row<R, T>(&self, res: &mut R, tsk: &T, scratch: &mut Scratch<BE>)
+    where
+        R: GGSWToMut,
+        T: GGLWEToGGSWKeyPreparedToRef<BE>,
+        Scratch<BE>: ScratchTakeCore<BE>;
 }

-pub trait GGSWExpandRows<BE: Backend>
+impl<BE: Backend> GGSWExpandRows<BE> for Module<BE>
 where
-    Self: Sized
-        + ModuleN
-        + VecZnxDftBytesOf
-        + VmpApplyDftToDftTmpBytes
-        + VecZnxBigBytesOf
-        + VecZnxNormalizeTmpBytes
-        + VecZnxDftApply<BE>
-        + VecZnxDftCopy<BE>
-        + VmpApplyDftToDft<BE>
-        + VmpApplyDftToDftAdd<BE>
-        + VecZnxDftAddInplace<BE>
+    Self: GGLWEProduct<BE>
        + VecZnxBigNormalize<BE>
-        + VecZnxIdftApplyTmpA<BE>
-        + VecZnxNormalize<BE>,
+        + VecZnxBigNormalizeTmpBytes
+        + VecZnxBigBytesOf
+        + VecZnxDftBytesOf
+        + VecZnxDftApply<BE>
+        + VecZnxNormalize<BE>
+        + VecZnxBigAddSmallInplace<BE>
+        + VecZnxIdftApplyConsume<BE>,
 {
    fn ggsw_expand_rows_tmp_bytes<R, A>(&self, res_infos: &R, tsk_infos: &A) -> usize
    where
        R: GGSWInfos,
        A: GGLWEInfos,
    {
-        let tsk_size: usize = tsk_infos.k().div_ceil(tsk_infos.base2k()) as usize;
-        let size_in: usize = res_infos
-            .k()
-            .div_ceil(tsk_infos.base2k())
-            .div_ceil(tsk_infos.dsize().into()) as usize;
+        let base2k_in: usize = res_infos.base2k().into();
+        let base2k_tsk: usize = tsk_infos.base2k().into();

-        let tmp_dft_i: usize = self.bytes_of_vec_znx_dft((tsk_infos.rank_out() + 1).into(), tsk_size);
-        let tmp_a: usize = self.bytes_of_vec_znx_dft(1, size_in);
-        let vmp: usize = self.vmp_apply_dft_to_dft_tmp_bytes(
-            tsk_size,
-            size_in,
-            size_in,
-            (tsk_infos.rank_in()).into(),  // Verify if rank+1
-            (tsk_infos.rank_out()).into(), // Verify if rank+1
-            tsk_size,
-        );
-        let tmp_idft: usize = self.bytes_of_vec_znx_big(1, tsk_size);
-        let norm: usize = self.vec_znx_normalize_tmp_bytes();
+        let rank: usize = res_infos.rank().into();
+        let cols: usize = rank + 1;

-        tmp_dft_i + ((tmp_a + vmp) | (tmp_idft + norm))
+        let res_size = res_infos.size();
+        let a_size: usize = (res_infos.size() * base2k_in).div_ceil(base2k_tsk);
+
+        let a_dft = self.bytes_of_vec_znx_dft(cols - 1, a_size);
+        let res_dft = self.bytes_of_vec_znx_dft(cols, a_size);
+        let gglwe_prod: usize = self.gglwe_product_dft_tmp_bytes(res_size, a_size, tsk_infos);
+        let normalize = self.vec_znx_big_normalize_tmp_bytes();
+
+        (a_dft + res_dft + gglwe_prod).max(normalize)
    }

    fn ggsw_expand_row<R, T>(&self, res: &mut R, tsk: &T, scratch: &mut Scratch<BE>)
    where
        R: GGSWToMut,
-        T: GLWETensorKeyPreparedToRef<BE>,
+        T: GGLWEToGGSWKeyPreparedToRef<BE>,
        Scratch<BE>: ScratchTakeCore<BE>,
    {
        let res: &mut GGSW<&mut [u8]> = &mut res.to_mut();
-        let tsk: &GLWETensorKeyPrepared<&[u8], BE> = &tsk.to_ref();
+        let tsk: &GGLWEToGGSWKeyPrepared<&[u8], BE> = &tsk.to_ref();

-        let basek_in: usize = res.base2k().into();
-        let basek_tsk: usize = tsk.base2k().into();
+        let base2k_in: usize = res.base2k().into();
+        let base2k_tsk: usize = tsk.base2k().into();

        assert!(scratch.available() >= self.ggsw_expand_rows_tmp_bytes(res, tsk));

        let rank: usize = res.rank().into();
        let cols: usize = rank + 1;

-        let a_size: usize = (res.size() * basek_in).div_ceil(basek_tsk);
+        let a_size: usize = (res.size() * base2k_in).div_ceil(base2k_tsk);

        // Keyswitch the j-th row of the col 0
-        for row_i in 0..res.dnum().into() {
-            let a = &res.at(row_i, 0).data;
+        for row in 0..res.dnum().as_usize() {
+            let (mut a_dft, scratch_1) = scratch.take_vec_znx_dft(self, cols - 1, a_size);

-            // Pre-compute DFT of (a0, a1, a2)
-            let (mut ci_dft, scratch_1) = scratch.take_vec_znx_dft(self, cols, a_size);
+            {
+                let glwe_mi_1: &GLWE<&[u8]> = &res.at(row, 0);

-            if basek_in == basek_tsk {
-                for i in 0..cols {
-                    self.vec_znx_dft_apply(1, 0, &mut ci_dft, i, a, i);
-                }
-            } else {
-                let (mut a_conv, scratch_2) = scratch_1.take_vec_znx(self.n(), 1, a_size);
-                for i in 0..cols {
-                    self.vec_znx_normalize(basek_tsk, &mut a_conv, 0, basek_in, a, i, scratch_2);
-                    self.vec_znx_dft_apply(1, 0, &mut ci_dft, i, &a_conv, 0);
+                if base2k_in == base2k_tsk {
+                    for col_i in 0..cols - 1 {
+                        self.vec_znx_dft_apply(1, 0, &mut a_dft, col_i, glwe_mi_1.data(), col_i + 1);
+                    }
+                } else {
+                    let (mut a_conv, scratch_2) = scratch_1.take_vec_znx(self.n(), 1, a_size);
+                    for i in 0..cols - 1 {
+                        self.vec_znx_normalize(
+                            base2k_tsk,
+                            &mut a_conv,
+                            0,
+                            base2k_in,
+                            glwe_mi_1.data(),
+                            i + 1,
+                            scratch_2,
+                        );
+                        self.vec_znx_dft_apply(1, 0, &mut a_dft, i, &a_conv, 0);
+                    }
                }
            }

-            for col_j in 1..cols {
-                // Example for rank 3:
+            // Example for rank 3:
+            //
+            // Note: M is a vector (m, Bm, B^2m, B^3m, ...), so each column is
+            // actually composed of that many dnum and we focus on a specific row here
+            // implicitely given ci_dft.
+            //
+            // # Input
+            //
+            // col 0: (-(a0s0 + a1s1 + a2s2) + M[i], a0    , a1    , a2    )
+            // col 1: (0, 0, 0, 0)
+            // col 2: (0, 0, 0, 0)
+            // col 3: (0, 0, 0, 0)
+            //
+            // # Output
+            //
+            // col 0: (-(a0s0 + a1s1 + a2s2) + M[i], a0       , a1       , a2       )
+            // col 1: (-(b0s0 + b1s1 + b2s2)       , b0 + M[i], b1       , b2       )
+            // col 2: (-(c0s0 + c1s1 + c2s2)       , c0       , c1 + M[i], c2       )
+            // col 3: (-(d0s0 + d1s1 + d2s2)       , d0       , d1       , d2 + M[i])
+            for col in 1..cols {
+                let (mut res_dft, scratch_2) = scratch_1.take_vec_znx_dft(self, cols, tsk.size()); // Todo optimise
+
+                // Performs a key-switch for each combination of s[i]*s[j], i.e. for a0, a1, a2
                //
-                // Note: M is a vector (m, Bm, B^2m, B^3m, ...), so each column is
-                // actually composed of that many dnum and we focus on a specific row here
-                // implicitely given ci_dft.
+                // # Example for col=1
                //
-                // # Input
-                //
-                // col 0: (-(a0s0 + a1s1 + a2s2) + M[i], a0    , a1    , a2    )
-                // col 1: (0, 0, 0, 0)
-                // col 2: (0, 0, 0, 0)
-                // col 3: (0, 0, 0, 0)
-                //
-                // # Output
-                //
-                // col 0: (-(a0s0 + a1s1 + a2s2) + M[i], a0       , a1       , a2       )
-                // col 1: (-(b0s0 + b1s1 + b2s2)       , b0 + M[i], b1       , b2       )
-                // col 2: (-(c0s0 + c1s1 + c2s2)       , c0       , c1 + M[i], c2       )
-                // col 3: (-(d0s0 + d1s1 + d2s2)       , d0       , d1       , d2 + M[i])
+                // a0 * (-(f0s0 + f1s1 + f1s2) + s0^2, f0, f1, f2) = (-(a0f0s0 + a0f1s1 + a0f1s2) + a0s0^2, a0f0, a0f1, a0f2)
+                // +
+                // a1 * (-(g0s0 + g1s1 + g1s2) + s0s1, g0, g1, g2) = (-(a1g0s0 + a1g1s1 + a1g1s2) + a1s0s1, a1g0, a1g1, a1g2)
+                // +
+                // a2 * (-(h0s0 + h1s1 + h1s2) + s0s2, h0, h1, h2) = (-(a2h0s0 + a2h1s1 + a2h1s2) + a2s0s2, a2h0, a2h1, a2h2)
+                // =
+                // (-(x0s0 + x1s1 + x2s2) + s0(a0s0 + a1s1 + a2s2), x0, x1, x2)
+                self.gglwe_product_dft(&mut res_dft, &a_dft, tsk.at(col - 1), scratch_2);

-                let dsize: usize = tsk.dsize().into();
-
-                let (mut tmp_dft_i, scratch_2) = scratch_1.take_vec_znx_dft(self, cols, tsk.size());
-                let (mut tmp_a, scratch_3) = scratch_2.take_vec_znx_dft(self, 1, ci_dft.size().div_ceil(dsize));
-
-                {
-                    // Performs a key-switch for each combination of s[i]*s[j], i.e. for a0, a1, a2
-                    //
-                    // # Example for col=1
-                    //
-                    // a0 * (-(f0s0 + f1s1 + f1s2) + s0^2, f0, f1, f2) = (-(a0f0s0 + a0f1s1 + a0f1s2) + a0s0^2, a0f0, a0f1, a0f2)
-                    // +
-                    // a1 * (-(g0s0 + g1s1 + g1s2) + s0s1, g0, g1, g2) = (-(a1g0s0 + a1g1s1 + a1g1s2) + a1s0s1, a1g0, a1g1, a1g2)
-                    // +
-                    // a2 * (-(h0s0 + h1s1 + h1s2) + s0s2, h0, h1, h2) = (-(a2h0s0 + a2h1s1 + a2h1s2) + a2s0s2, a2h0, a2h1, a2h2)
-                    // =
-                    // (-(x0s0 + x1s1 + x2s2) + s0(a0s0 + a1s1 + a2s2), x0, x1, x2)
-                    for col_i in 1..cols {
-                        let pmat: &VmpPMat<&[u8], BE> = &tsk.at(col_i - 1, col_j - 1).data; // Selects Enc(s[i]s[j])
-
-                        // Extracts a[i] and multipies with Enc(s[i]s[j])
-                        for di in 0..dsize {
-                            tmp_a.set_size((ci_dft.size() + di) / dsize);
-
-                            // Small optimization for dsize > 2
-                            // VMP produce some error e, and since we aggregate vmp * 2^{di * B}, then
-                            // we also aggregate ei * 2^{di * B}, with the largest error being ei * 2^{(dsize-1) * B}.
-                            // As such we can ignore the last dsize-2 limbs safely of the sum of vmp products.
-                            // It is possible to further ignore the last dsize-1 limbs, but this introduce
-                            // ~0.5 to 1 bit of additional noise, and thus not chosen here to ensure that the same
-                            // noise is kept with respect to the ideal functionality.
-                            tmp_dft_i.set_size(tsk.size() - ((dsize - di) as isize - 2).max(0) as usize);
-
-                            self.vec_znx_dft_copy(dsize, dsize - 1 - di, &mut tmp_a, 0, &ci_dft, col_i);
-                            if di == 0 && col_i == 1 {
-                                self.vmp_apply_dft_to_dft(&mut tmp_dft_i, &tmp_a, pmat, scratch_3);
-                            } else {
-                                self.vmp_apply_dft_to_dft_add(&mut tmp_dft_i, &tmp_a, pmat, di, scratch_3);
-                            }
-                        }
-                    }
-                }
+                let mut res_big: VecZnxBig<&mut [u8], BE> = self.vec_znx_idft_apply_consume(res_dft);

                // Adds -(sum a[i] * s[i]) + m)  on the i-th column of tmp_idft_i
                //
@@ -266,18 +228,17 @@ where
                // (-(x0s0 + x1s1 + x2s2) + s0(a0s0 + a1s1 + a2s2), x0 -(a0s0 + a1s1 + a2s2) + M[i], x1, x2)
                // =
                // (-(x0s0 + x1s1 + x2s2), x0 + M[i], x1, x2)
-                self.vec_znx_dft_add_inplace(&mut tmp_dft_i, col_j, &ci_dft, 0);
-                let (mut tmp_idft, scratch_3) = scratch_2.take_vec_znx_big(self, 1, tsk.size());
-                for i in 0..cols {
-                    self.vec_znx_idft_apply_tmpa(&mut tmp_idft, 0, &mut tmp_dft_i, i);
+                self.vec_znx_big_add_small_inplace(&mut res_big, col, res.at(row, 0).data(), 0);
+
+                for j in 0..cols {
                    self.vec_znx_big_normalize(
-                        basek_in,
-                        &mut res.at_mut(row_i, col_j).data,
-                        i,
-                        basek_tsk,
-                        &tmp_idft,
-                        0,
-                        scratch_3,
+                        res.base2k().as_usize(),
+                        res.at_mut(row, col).data_mut(),
+                        j,
+                        tsk.base2k().as_usize(),
+                        &res_big,
+                        j,
+                        scratch_2,
                    );
                }
            }
--- a/poulpy-core/src/conversion/lwe_to_glwe.rs
+++ b/poulpy-core/src/conversion/lwe_to_glwe.rs
@@ -1,5 +1,5 @@
 use poulpy_hal::{
-    api::ScratchTakeBasic,
+    api::{ScratchTakeBasic, VecZnxNormalize, VecZnxNormalizeTmpBytes},
    layouts::{Backend, DataMut, Module, Scratch, VecZnx, ZnxView, ZnxViewMut, ZnxZero},
 };

@@ -8,11 +8,10 @@ use crate::{
    layouts::{GGLWEInfos, GGLWEPreparedToRef, GLWE, GLWEInfos, GLWELayout, GLWEToMut, LWE, LWEInfos, LWEToRef},
 };

-impl<BE: Backend> GLWEFromLWE<BE> for Module<BE> where Self: GLWEKeyswitch<BE> {}
-
-pub trait GLWEFromLWE<BE: Backend>
+impl<BE: Backend> GLWEFromLWE<BE> for Module<BE>
 where
-    Self: GLWEKeyswitch<BE>,
+    Self: GLWEKeyswitch<BE> + VecZnxNormalizeTmpBytes + VecZnxNormalize<BE>,
+    Scratch<BE>: ScratchTakeCore<BE>,
 {
    fn glwe_from_lwe_tmp_bytes<R, A, K>(&self, glwe_infos: &R, lwe_infos: &A, key_infos: &K) -> usize
    where
@@ -41,7 +40,6 @@ where
        R: GLWEToMut,
        A: LWEToRef,
        K: GGLWEPreparedToRef<BE> + GGLWEInfos,
-        Scratch<BE>: ScratchTakeCore<BE>,
    {
        let res: &mut GLWE<&mut [u8]> = &mut res.to_mut();
        let lwe: &LWE<&[u8]> = &lwe.to_ref();
@@ -105,6 +103,23 @@ where
    }
 }

+pub trait GLWEFromLWE<BE: Backend>
+where
+    Self: GLWEKeyswitch<BE>,
+{
+    fn glwe_from_lwe_tmp_bytes<R, A, K>(&self, glwe_infos: &R, lwe_infos: &A, key_infos: &K) -> usize
+    where
+        R: GLWEInfos,
+        A: LWEInfos,
+        K: GGLWEInfos;
+
+    fn glwe_from_lwe<R, A, K>(&self, res: &mut R, lwe: &A, ksk: &K, scratch: &mut Scratch<BE>)
+    where
+        R: GLWEToMut,
+        A: LWEToRef,
+        K: GGLWEPreparedToRef<BE> + GGLWEInfos;
+}
+
 impl GLWE<Vec<u8>> {
    pub fn from_lwe_tmp_bytes<R, A, K, M, BE: Backend>(module: &M, glwe_infos: &R, lwe_infos: &A, key_infos: &K) -> usize
    where