Applied discussed changes, everything working, but still to discuss

2026-02-10 13:16:44 +01:00 · 2025-05-01 10:33:19 +02:00
parent 4e6fce3458
commit ca5e6d46c9
14 changed files with 710 additions and 508 deletions
--- a/base2k/src/mat_znx_dft_ops.rs
+++ b/base2k/src/mat_znx_dft_ops.rs
@@ -1,8 +1,9 @@
-use crate::ffi::vec_znx_big::vec_znx_big_t;
 use crate::ffi::vec_znx_dft::vec_znx_dft_t;
 use crate::ffi::vmp;
 use crate::znx_base::{ZnxInfos, ZnxLayout};
-use crate::{Backend, FFT64, MatZnxDft, Module, VecZnx, VecZnxBig, VecZnxDft, ZnxAlloc, assert_alignement};
+use crate::{
+    Backend, FFT64, MatZnxDft, Module, VecZnx, VecZnxBig, VecZnxBigOps, VecZnxDft, VecZnxDftOps, assert_alignement, is_aligned,
+};

 /// This trait implements methods for vector matrix product,
 /// that is, multiplying a [VecZnx] with a [MatZnxDft].
@@ -13,44 +14,45 @@ pub trait MatZnxDftOps<B: Backend> {
    ///
    /// * `rows`: number of rows (number of [VecZnxDft]).
    /// * `size`: number of size (number of size of each [VecZnxDft]).
-    fn new_mat_znx_dft(&self, rows: usize, cols: usize, size: usize) -> MatZnxDft<B>;
+    fn new_mat_znx_dft(&self, rows: usize, cols_in: usize, cols_out: usize, size: usize) -> MatZnxDft<B>;

-    fn bytes_of_mat_znx_dft(&self, rows: usize, cols: usize, size: usize) -> usize;
+    fn bytes_of_mat_znx_dft(&self, rows: usize, cols_in: usize, cols_out: usize, size: usize) -> usize;

-    fn new_mat_znx_dft_from_bytes(&self, rows: usize, cols: usize, size: usize, bytes: Vec<u8>) -> MatZnxDft<FFT64>;
+    fn new_mat_znx_dft_from_bytes(
+        &self,
+        rows: usize,
+        cols_in: usize,
+        cols_out: usize,
+        size: usize,
+        bytes: Vec<u8>,
+    ) -> MatZnxDft<FFT64>;

-    fn new_mat_znx_dft_from_bytes_borrow(&self, rows: usize, cols: usize, size: usize, bytes: &mut [u8]) -> MatZnxDft<FFT64>;
+    fn new_mat_znx_dft_from_bytes_borrow(
+        &self,
+        rows: usize,
+        cols_in: usize,
+        cols_out: usize,
+        size: usize,
+        bytes: &mut [u8],
+    ) -> MatZnxDft<FFT64>;

-    /// Returns the number of bytes needed as scratch space for [MatZnxDftOps::vmp_prepare_contiguous].
-    ///
-    /// # Arguments
-    ///
-    /// * `rows`: number of rows of the [MatZnxDft] used in [MatZnxDftOps::vmp_prepare_contiguous].
-    /// * `size`: number of size of the [MatZnxDft] used in [MatZnxDftOps::vmp_prepare_contiguous].
-    fn vmp_prepare_tmp_bytes(&self, rows: usize, cols: usize, size: usize) -> usize;
-
-    /// Prepares a [MatZnxDft] from a contiguous array of [i64].
-    /// The helper struct [Matrix3D] can be used to contruct and populate
-    /// the appropriate contiguous array.
-    ///
-    /// # Arguments
-    ///
-    /// * `b`: [MatZnxDft] on which the values are encoded.
-    /// * `a`: the contiguous array of [i64] of the 3D matrix to encode on the [MatZnxDft].
-    /// * `buf`: scratch space, the size of buf can be obtained with [MatZnxDftOps::vmp_prepare_tmp_bytes].
-    fn vmp_prepare_contiguous(&self, b: &mut MatZnxDft<B>, a: &[i64], buf: &mut [u8]);
+    /// Returns the of bytes needed as scratch space for [MatZnxDftOps::vmp_prepare_row]
+    fn vmp_prepare_row_tmp_bytes(&self, cols_out: usize, size: usize) -> usize;

    /// Prepares the ith-row of [MatZnxDft] from a [VecZnx].
    ///
    /// # Arguments
    ///
    /// * `b`: [MatZnxDft] on which the values are encoded.
-    /// * `a`: the vector of [VecZnx] to encode on the [MatZnxDft].
-    /// * `row_i`: the index of the row to prepare.
+    /// * `row_i`: the row of the [MatZnxDft] to prepare.
+    /// * `a`: the [VecZnx] to encode on the i-th row of the [MatZnxDft].
    /// * `buf`: scratch space, the size of buf can be obtained with [MatZnxDftOps::vmp_prepare_tmp_bytes].
    ///
    /// The size of buf can be obtained with [MatZnxDftOps::vmp_prepare_tmp_bytes].
-    fn vmp_prepare_row(&self, b: &mut MatZnxDft<B>, a: &[i64], row_i: usize, tmp_bytes: &mut [u8]);
+    fn vmp_prepare_row(&self, b: &mut MatZnxDft<B>, b_row: usize, b_col_in: usize, a: &VecZnx, tmp_bytes: &mut [u8]);
+
+    /// Returns the of bytes needed as scratch space for [MatZnxDftOps::vmp_extract_row]
+    fn vmp_extract_row_tmp_bytes(&self, cols_out: usize, size: usize) -> usize;

    /// Extracts the ith-row of [MatZnxDft] into a [VecZnxBig].
    ///
@@ -59,7 +61,15 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `b`: the [VecZnxBig] to on which to extract the row of the [MatZnxDft].
    /// * `a`: [MatZnxDft] on which the values are encoded.
    /// * `row_i`: the index of the row to extract.
-    fn vmp_extract_row(&self, b: &mut VecZnxBig<B>, a: &MatZnxDft<B>, row_i: usize);
+    fn vmp_extract_row(
+        &self,
+        log_base2k: usize,
+        b: &mut VecZnx,
+        a: &MatZnxDft<B>,
+        b_row: usize,
+        b_col_in: usize,
+        tmp_bytes: &mut [u8],
+    );

    /// Prepares the ith-row of [MatZnxDft] from a [VecZnxDft].
    ///
@@ -70,7 +80,7 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `row_i`: the index of the row to prepare.
    ///
    /// The size of buf can be obtained with [MatZnxDftOps::vmp_prepare_tmp_bytes].
-    fn vmp_prepare_row_dft(&self, b: &mut MatZnxDft<B>, a: &VecZnxDft<B>, row_i: usize);
+    fn vmp_prepare_row_dft(&self, b: &mut MatZnxDft<B>, b_row: usize, b_col_in: usize, a: &VecZnxDft<B>);

    /// Extracts the ith-row of [MatZnxDft] into a [VecZnxDft].
    ///
@@ -79,7 +89,7 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `b`: the [VecZnxDft] to on which to extract the row of the [MatZnxDft].
    /// * `a`: [MatZnxDft] on which the values are encoded.
    /// * `row_i`: the index of the row to extract.
-    fn vmp_extract_row_dft(&self, b: &mut VecZnxDft<B>, row_i: usize, a: &MatZnxDft<B>);
+    fn vmp_extract_row_dft(&self, b: &mut VecZnxDft<B>, a: &MatZnxDft<B>, a_row: usize, a_col_in: usize);

    /// Returns the size of the stratch space necessary for [MatZnxDftOps::vmp_apply_dft].
    ///
@@ -89,7 +99,15 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `a_size`: number of size of the input [VecZnx].
    /// * `rows`: number of rows of the input [MatZnxDft].
    /// * `size`: number of size of the input [MatZnxDft].
-    fn vmp_apply_dft_tmp_bytes(&self, c_size: usize, a_size: usize, b_rows: usize, b_size: usize) -> usize;
+    fn vmp_apply_dft_tmp_bytes(
+        &self,
+        c_size: usize,
+        a_size: usize,
+        b_rows: usize,
+        b_cols_in: usize,
+        b_cols_out: usize,
+        b_size: usize,
+    ) -> usize;

    /// Applies the vector matrix product [VecZnxDft] x [MatZnxDft].
    ///
@@ -117,32 +135,6 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `buf`: scratch space, the size can be obtained with [MatZnxDftOps::vmp_apply_dft_tmp_bytes].
    fn vmp_apply_dft(&self, c: &mut VecZnxDft<B>, a: &VecZnx, b: &MatZnxDft<B>, buf: &mut [u8]);

-    /// Applies the vector matrix product [VecZnxDft] x [MatZnxDft] and adds on the receiver.
-    ///
-    /// A vector matrix product is equivalent to a sum of [crate::SvpPPolOps::svp_apply_dft]
-    /// where each [crate::Scalar] is a limb of the input [VecZnxDft] (equivalent to an [crate::SvpPPol])
-    /// and each vector a [VecZnxDft] (row) of the [MatZnxDft].
-    ///
-    /// As such, given an input [VecZnx] of `i` size and a [MatZnxDft] of `i` rows and
-    /// `j` size, the output is a [VecZnx] of `j` size.
-    ///
-    /// If there is a mismatch between the dimensions the largest valid ones are used.
-    ///
-    /// ```text
-    /// |a b c d| x |e f g| = (a * |e f g| + b * |h i j| + c * |k l m|) = |n o p|
-    ///             |h i j|
-    ///             |k l m|
-    /// ```
-    /// where each element is a [VecZnxDft].
-    ///
-    /// # Arguments
-    ///
-    /// * `c`: the operand on which the output of the vector matrix product is added, as a [VecZnxDft].
-    /// * `a`: the left operand [VecZnx] of the vector matrix product.
-    /// * `b`: the right operand [MatZnxDft] of the vector matrix product.
-    /// * `buf`: scratch space, the size can be obtained with [MatZnxDftOps::vmp_apply_dft_tmp_bytes].
-    fn vmp_apply_dft_add(&self, c: &mut VecZnxDft<B>, a: &VecZnx, b: &MatZnxDft<B>, buf: &mut [u8]);
-
    /// Returns the size of the stratch space necessary for [MatZnxDftOps::vmp_apply_dft_to_dft].
    ///
    /// # Arguments
@@ -151,7 +143,17 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `a_size`: number of size of the input [VecZnxDft].
    /// * `rows`: number of rows of the input [MatZnxDft].
    /// * `size`: number of size of the input [MatZnxDft].
-    fn vmp_apply_dft_to_dft_tmp_bytes(&self, c_size: usize, a_size: usize, rows: usize, size: usize) -> usize;
+    fn vmp_apply_dft_to_dft_tmp_bytes(
+        &self,
+        c_cols: usize,
+        c_size: usize,
+        a_cols: usize,
+        a_size: usize,
+        b_rows: usize,
+        b_cols_in: usize,
+        b_cols_out: usize,
+        b_size: usize,
+    ) -> usize;

    /// Applies the vector matrix product [VecZnxDft] x [MatZnxDft].
    /// The size of `buf` is given by [MatZnxDftOps::vmp_apply_dft_to_dft_tmp_bytes].
@@ -179,308 +181,385 @@ pub trait MatZnxDftOps<B: Backend> {
    /// * `b`: the right operand [MatZnxDft] of the vector matrix product.
    /// * `buf`: scratch space, the size can be obtained with [MatZnxDftOps::vmp_apply_dft_to_dft_tmp_bytes].
    fn vmp_apply_dft_to_dft(&self, c: &mut VecZnxDft<B>, a: &VecZnxDft<B>, b: &MatZnxDft<B>, buf: &mut [u8]);
-
-    /// Applies the vector matrix product [VecZnxDft] x [MatZnxDft] and adds on top of the receiver instead of overwritting it.
-    /// The size of `buf` is given by [MatZnxDftOps::vmp_apply_dft_to_dft_tmp_bytes].
-    ///
-    /// A vector matrix product is equivalent to a sum of [crate::SvpPPolOps::svp_apply_dft]
-    /// where each [crate::Scalar] is a limb of the input [VecZnxDft] (equivalent to an [crate::SvpPPol])
-    /// and each vector a [VecZnxDft] (row) of the [MatZnxDft].
-    ///
-    /// As such, given an input [VecZnx] of `i` size and a [MatZnxDft] of `i` rows and
-    /// `j` size, the output is a [VecZnx] of `j` size.
-    ///
-    /// If there is a mismatch between the dimensions the largest valid ones are used.
-    ///
-    /// ```text
-    /// |a b c d| x |e f g| = (a * |e f g| + b * |h i j| + c * |k l m|) = |n o p|
-    ///             |h i j|
-    ///             |k l m|
-    /// ```
-    /// where each element is a [VecZnxDft].
-    ///
-    /// # Arguments
-    ///
-    /// * `c`: the operand on which the output of the vector matrix product is added, as a [VecZnxDft].
-    /// * `a`: the left operand [VecZnxDft] of the vector matrix product.
-    /// * `b`: the right operand [MatZnxDft] of the vector matrix product.
-    /// * `buf`: scratch space, the size can be obtained with [MatZnxDftOps::vmp_apply_dft_to_dft_tmp_bytes].
-    fn vmp_apply_dft_to_dft_add(&self, c: &mut VecZnxDft<B>, a: &VecZnxDft<B>, b: &MatZnxDft<B>, buf: &mut [u8]);
-
-    /// Applies the vector matrix product [VecZnxDft] x [MatZnxDft] in place.
-    /// The size of `buf` is given by [MatZnxDftOps::vmp_apply_dft_to_dft_tmp_bytes].
-    ///
-    /// A vector matrix product is equivalent to a sum of [crate::SvpPPolOps::svp_apply_dft]
-    /// where each [crate::Scalar] is a limb of the input [VecZnxDft] (equivalent to an [crate::SvpPPol])
-    /// and each vector a [VecZnxDft] (row) of the [MatZnxDft].
-    ///
-    /// As such, given an input [VecZnx] of `i` size and a [MatZnxDft] of `i` rows and
-    /// `j` size, the output is a [VecZnx] of `j` size.
-    ///
-    /// If there is a mismatch between the dimensions the largest valid ones are used.
-    ///
-    /// ```text
-    /// |a b c d| x |e f g| = (a * |e f g| + b * |h i j| + c * |k l m|) = |n o p|
-    ///             |h i j|
-    ///             |k l m|
-    /// ```
-    /// where each element is a [VecZnxDft].
-    ///
-    /// # Arguments
-    ///
-    /// * `b`: the input and output of the vector matrix product, as a [VecZnxDft].
-    /// * `a`: the right operand [MatZnxDft] of the vector matrix product.
-    /// * `buf`: scratch space, the size can be obtained with [MatZnxDftOps::vmp_apply_dft_to_dft_tmp_bytes].
-    fn vmp_apply_dft_to_dft_inplace(&self, b: &mut VecZnxDft<B>, a: &MatZnxDft<B>, buf: &mut [u8]);
 }

 impl MatZnxDftOps<FFT64> for Module<FFT64> {
-    fn new_mat_znx_dft(&self, rows: usize, cols: usize, size: usize) -> MatZnxDft<FFT64> {
-        MatZnxDft::<FFT64>::new(self, rows, cols, size)
+    fn new_mat_znx_dft(&self, rows: usize, cols_in: usize, cols_out: usize, size: usize) -> MatZnxDft<FFT64> {
+        MatZnxDft::<FFT64>::new(self, rows, cols_in, cols_out, size)
    }

-    fn bytes_of_mat_znx_dft(&self, rows: usize, cols: usize, size: usize) -> usize {
-        MatZnxDft::<FFT64>::bytes_of(self, rows, cols, size)
+    fn bytes_of_mat_znx_dft(&self, rows: usize, cols_in: usize, cols_out: usize, size: usize) -> usize {
+        MatZnxDft::<FFT64>::bytes_of(self, rows, cols_in, cols_out, size)
    }

-    fn new_mat_znx_dft_from_bytes(&self, rows: usize, cols: usize, size: usize, bytes: Vec<u8>) -> MatZnxDft<FFT64> {
-        MatZnxDft::<FFT64>::from_bytes(self, rows, cols, size, bytes)
+    fn new_mat_znx_dft_from_bytes(
+        &self,
+        rows: usize,
+        cols_in: usize,
+        cols_out: usize,
+        size: usize,
+        bytes: Vec<u8>,
+    ) -> MatZnxDft<FFT64> {
+        MatZnxDft::<FFT64>::from_bytes(self, rows, cols_in, cols_out, size, bytes)
    }

-    fn new_mat_znx_dft_from_bytes_borrow(&self, rows: usize, cols: usize, size: usize, bytes: &mut [u8]) -> MatZnxDft<FFT64> {
-        MatZnxDft::<FFT64>::from_bytes_borrow(self, rows, cols, size, bytes)
+    fn new_mat_znx_dft_from_bytes_borrow(
+        &self,
+        rows: usize,
+        cols_in: usize,
+        cols_out: usize,
+        size: usize,
+        bytes: &mut [u8],
+    ) -> MatZnxDft<FFT64> {
+        MatZnxDft::<FFT64>::from_bytes_borrow(self, rows, cols_in, cols_out, size, bytes)
    }

-    fn vmp_prepare_tmp_bytes(&self, rows: usize, cols: usize, size: usize) -> usize {
-        unsafe { vmp::vmp_prepare_tmp_bytes(self.ptr, rows as u64, (size * cols) as u64) as usize }
+    fn vmp_prepare_row_tmp_bytes(&self, cols_out: usize, size: usize) -> usize {
+        self.bytes_of_vec_znx_dft(cols_out, size)
    }

-    fn vmp_prepare_contiguous(&self, b: &mut MatZnxDft<FFT64>, a: &[i64], tmp_bytes: &mut [u8]) {
+    fn vmp_prepare_row(&self, b: &mut MatZnxDft<FFT64>, b_row: usize, b_col_in: usize, a: &VecZnx, tmp_bytes: &mut [u8]) {
        #[cfg(debug_assertions)]
        {
-            assert_eq!(a.len(), b.n() * b.poly_count());
-            assert!(tmp_bytes.len() >= self.vmp_prepare_tmp_bytes(b.rows(), b.cols(), b.size()));
-            assert_alignement(tmp_bytes.as_ptr());
+            assert_eq!(b.n(), self.n());
+            assert_eq!(a.n(), self.n());
+            assert_eq!(
+                a.cols(),
+                b.cols_out(),
+                "a.cols(): {} != b.cols_out(): {}",
+                a.cols(),
+                b.cols_out()
+            );
+            assert!(
+                b_row < b.rows(),
+                "b_row: {} >= b.rows(): {}",
+                b_row,
+                b.rows()
+            );
+            assert!(
+                b_col_in < b.cols_in(),
+                "b_col_in: {} >= b.cols_in(): {}",
+                b_col_in,
+                b.cols_in()
+            );
+            assert_eq!(
+                b.size(),
+                a.size(),
+                "b.size(): {} != a.size(): {}",
+                b.size(),
+                a.size()
+            );
+            assert!(tmp_bytes.len() >= self.vmp_prepare_row_tmp_bytes(a.cols(), a.size()));
+            assert!(is_aligned(tmp_bytes.as_ptr()))
        }
-        unsafe {
-            vmp::vmp_prepare_contiguous(
-                self.ptr,
-                b.as_mut_ptr() as *mut vmp::vmp_pmat_t,
-                a.as_ptr(),
-                b.rows() as u64,
-                (b.size() * b.cols()) as u64,
-                tmp_bytes.as_mut_ptr(),
+
+        let cols_out: usize = a.cols();
+        let a_size: usize = a.size();
+
+        let (tmp_bytes_a_dft, _) = tmp_bytes.split_at_mut(self.bytes_of_vec_znx_dft(cols_out, a_size));
+
+        let mut a_dft: VecZnxDft<FFT64> = self.new_vec_znx_dft_from_bytes_borrow(cols_out, a_size, tmp_bytes_a_dft);
+        (0..cols_out).for_each(|i| self.vec_znx_dft(&mut a_dft, i, &a, i));
+
+        Self::vmp_prepare_row_dft(&self, b, b_row, b_col_in, &a_dft);
+    }
+
+    fn vmp_extract_row_tmp_bytes(&self, cols_out: usize, size: usize) -> usize {
+        self.bytes_of_vec_znx_dft(cols_out, size) + self.vec_znx_big_normalize_tmp_bytes()
+    }
+
+    fn vmp_extract_row(
+        &self,
+        log_base2k: usize,
+        b: &mut VecZnx,
+        a: &MatZnxDft<FFT64>,
+        a_row: usize,
+        a_col_in: usize,
+        tmp_bytes: &mut [u8],
+    ) {
+        #[cfg(debug_assertions)]
+        {
+            assert_eq!(b.n(), self.n());
+            assert_eq!(a.n(), self.n());
+            assert_eq!(
+                b.cols(),
+                a.cols_out(),
+                "b.cols(): {} != a.cols_out(): {}",
+                b.cols(),
+                a.cols_out()
+            );
+            assert!(
+                a_row < a.rows(),
+                "a_row: {} >= a.rows(): {}",
+                a_row,
+                a.rows()
+            );
+            assert!(
+                a_col_in < a.cols_in(),
+                "a_col_in: {} >= a.cols_in(): {}",
+                a_col_in,
+                a.cols_in()
+            );
+            assert_eq!(
+                b.size(),
+                a.size(),
+                "b.size(): {} != a.size(): {}",
+                b.size(),
+                a.size()
+            );
+            assert!(tmp_bytes.len() >= self.vmp_extract_row_tmp_bytes(a.cols(), a.size()));
+            assert!(is_aligned(tmp_bytes.as_ptr()))
+        }
+
+        let cols_out: usize = b.cols();
+        let size: usize = b.size();
+
+        let (bytes_a_dft, tmp_bytes) = tmp_bytes.split_at_mut(self.bytes_of_vec_znx_dft(cols_out, size));
+        let mut b_dft: VecZnxDft<FFT64> = self.new_vec_znx_dft_from_bytes_borrow(cols_out, size, bytes_a_dft);
+        Self::vmp_extract_row_dft(&self, &mut b_dft, a, a_row, a_col_in);
+        let mut b_big: VecZnxBig<FFT64> = b_dft.alias_as_vec_znx_big();
+        (0..cols_out).for_each(|i| {
+            self.vec_znx_idft_tmp_a(&mut b_big, i, &mut b_dft, i);
+            self.vec_znx_big_normalize(log_base2k, b, i, &b_big, i, tmp_bytes);
+        });
+    }
+
+    fn vmp_prepare_row_dft(&self, b: &mut MatZnxDft<FFT64>, b_row: usize, b_col_in: usize, a: &VecZnxDft<FFT64>) {
+        #[cfg(debug_assertions)]
+        {
+            assert_eq!(b.n(), self.n());
+            assert_eq!(a.n(), self.n());
+            assert_eq!(
+                a.cols(),
+                b.cols_out(),
+                "a.cols(): {} != b.cols_out(): {}",
+                a.cols(),
+                b.cols_out()
+            );
+            assert!(
+                b_row < b.rows(),
+                "b_row: {} >= b.rows(): {}",
+                b_row,
+                b.rows()
+            );
+            assert!(
+                b_col_in < b.cols_in(),
+                "b_col_in: {} >= b.cols_in(): {}",
+                b_col_in,
+                b.cols_in()
+            );
+            assert_eq!(
+                b.size(),
+                a.size(),
+                "b.size(): {} != a.size(): {}",
+                b.size(),
+                a.size()
            );
        }
-    }

-    fn vmp_prepare_row(&self, b: &mut MatZnxDft<FFT64>, a: &[i64], row_i: usize, tmp_bytes: &mut [u8]) {
-        #[cfg(debug_assertions)]
-        {
-            assert_eq!(a.len(), b.size() * self.n() * b.cols());
-            assert!(tmp_bytes.len() >= self.vmp_prepare_tmp_bytes(b.rows(), b.cols(), b.size()));
-            assert_alignement(tmp_bytes.as_ptr());
-        }
-        unsafe {
-            vmp::vmp_prepare_row(
-                self.ptr,
-                b.as_mut_ptr() as *mut vmp::vmp_pmat_t,
-                a.as_ptr(),
-                row_i as u64,
-                b.rows() as u64,
-                (b.size() * b.cols()) as u64,
-                tmp_bytes.as_mut_ptr(),
-            );
-        }
-    }
-
-    fn vmp_extract_row(&self, b: &mut VecZnxBig<FFT64>, a: &MatZnxDft<FFT64>, row_i: usize) {
-        #[cfg(debug_assertions)]
-        {
-            assert_eq!(a.n(), b.n());
-            assert_eq!(a.size(), b.size());
-            assert_eq!(a.cols(), b.cols());
-        }
-        unsafe {
-            vmp::vmp_extract_row(
-                self.ptr,
-                b.as_mut_ptr() as *mut vec_znx_big_t,
-                a.as_ptr() as *const vmp::vmp_pmat_t,
-                row_i as u64,
-                a.rows() as u64,
-                (a.size() * a.cols()) as u64,
-            );
-        }
-    }
-
-    fn vmp_prepare_row_dft(&self, b: &mut MatZnxDft<FFT64>, a: &VecZnxDft<FFT64>, row_i: usize) {
-        #[cfg(debug_assertions)]
-        {
-            assert_eq!(a.n(), b.n());
-            assert_eq!(a.size(), b.size());
-        }
        unsafe {
            vmp::vmp_prepare_row_dft(
                self.ptr,
                b.as_mut_ptr() as *mut vmp::vmp_pmat_t,
                a.as_ptr() as *const vec_znx_dft_t,
-                row_i as u64,
-                b.rows() as u64,
-                b.size() as u64,
+                (b_row * b.cols_in() + b_col_in) as u64,
+                (b.rows() * b.cols_in()) as u64,
+                (b.size() * b.cols_out()) as u64,
            );
        }
    }

-    fn vmp_extract_row_dft(&self, b: &mut VecZnxDft<FFT64>, row_i: usize, a: &MatZnxDft<FFT64>) {
+    fn vmp_extract_row_dft(&self, b: &mut VecZnxDft<FFT64>, a: &MatZnxDft<FFT64>, a_row: usize, a_col_in: usize) {
        #[cfg(debug_assertions)]
        {
-            assert_eq!(a.n(), b.n());
-            assert_eq!(a.size(), b.size());
+            assert_eq!(b.n(), self.n());
+            assert_eq!(a.n(), self.n());
+            assert_eq!(
+                b.cols(),
+                a.cols_out(),
+                "b.cols(): {} != a.cols_out(): {}",
+                b.cols(),
+                a.cols_out()
+            );
+            assert!(
+                a_row < a.rows(),
+                "a_row: {} >= a.rows(): {}",
+                a_row,
+                a.rows()
+            );
+            assert!(
+                a_col_in < a.cols_in(),
+                "a_col_in: {} >= a.cols_in(): {}",
+                a_col_in,
+                a.cols_in()
+            );
+            assert_eq!(
+                b.size(),
+                a.size(),
+                "b.size(): {} != a.size(): {}",
+                b.size(),
+                a.size()
+            );
        }
        unsafe {
            vmp::vmp_extract_row_dft(
                self.ptr,
                b.as_mut_ptr() as *mut vec_znx_dft_t,
                a.as_ptr() as *const vmp::vmp_pmat_t,
-                row_i as u64,
-                a.rows() as u64,
-                a.size() as u64,
+                (a_row * a.cols_in() + a_col_in) as u64,
+                (a.rows() * a.cols_in()) as u64,
+                (a.size() * a.cols_out()) as u64,
            );
        }
    }

-    fn vmp_apply_dft_tmp_bytes(&self, res_size: usize, a_size: usize, b_rows: usize, b_size: usize) -> usize {
+    fn vmp_apply_dft_tmp_bytes(
+        &self,
+        res_size: usize,
+        a_size: usize,
+        b_rows: usize,
+        b_cols_in: usize,
+        b_cols_out: usize,
+        b_size: usize,
+    ) -> usize {
        unsafe {
            vmp::vmp_apply_dft_tmp_bytes(
                self.ptr,
                res_size as u64,
                a_size as u64,
-                b_rows as u64,
-                b_size as u64,
+                (b_rows * b_cols_in) as u64,
+                (b_size * b_cols_out) as u64,
            ) as usize
        }
    }

    fn vmp_apply_dft(&self, c: &mut VecZnxDft<FFT64>, a: &VecZnx, b: &MatZnxDft<FFT64>, tmp_bytes: &mut [u8]) {
-        debug_assert!(tmp_bytes.len() >= self.vmp_apply_dft_tmp_bytes(c.size(), a.size(), b.rows(), b.size()));
+        debug_assert!(
+            tmp_bytes.len()
+                >= self.vmp_apply_dft_tmp_bytes(
+                    c.size(),
+                    a.size(),
+                    b.rows(),
+                    b.cols_in(),
+                    b.cols_out(),
+                    b.size()
+                )
+        );
        #[cfg(debug_assertions)]
        {
+            assert_eq!(c.n(), self.n());
+            assert_eq!(b.n(), self.n());
+            assert_eq!(a.n(), self.n());
+            assert_eq!(
+                c.cols(),
+                b.cols_out(),
+                "c.cols(): {} != b.cols_out: {}",
+                c.cols(),
+                b.cols_out()
+            );
+            assert_eq!(
+                a.cols(),
+                b.cols_in(),
+                "a.cols(): {} != b.cols_in: {}",
+                a.cols(),
+                b.cols_in()
+            );
+            assert!(
+                tmp_bytes.len()
+                    >= self.vmp_apply_dft_tmp_bytes(
+                        c.size(),
+                        a.size(),
+                        b.rows(),
+                        b.cols_in(),
+                        b.cols_out(),
+                        b.size()
+                    )
+            );
            assert_alignement(tmp_bytes.as_ptr());
        }
        unsafe {
            vmp::vmp_apply_dft(
                self.ptr,
                c.as_mut_ptr() as *mut vec_znx_dft_t,
-                c.size() as u64,
+                (c.size() * c.cols()) as u64,
                a.as_ptr(),
-                a.size() as u64,
-                (a.n() * a.cols()) as u64,
+                (a.size() * a.cols()) as u64,
+                a.n() as u64,
                b.as_ptr() as *const vmp::vmp_pmat_t,
-                b.rows() as u64,
-                b.size() as u64,
+                (b.rows() * b.cols_in()) as u64,
+                (b.size() * b.cols_out()) as u64,
                tmp_bytes.as_mut_ptr(),
            )
        }
    }

-    fn vmp_apply_dft_add(&self, c: &mut VecZnxDft<FFT64>, a: &VecZnx, b: &MatZnxDft<FFT64>, tmp_bytes: &mut [u8]) {
-        debug_assert!(tmp_bytes.len() >= self.vmp_apply_dft_tmp_bytes(c.size(), a.size(), b.rows(), b.size()));
-        #[cfg(debug_assertions)]
-        {
-            assert_alignement(tmp_bytes.as_ptr());
-        }
-        unsafe {
-            vmp::vmp_apply_dft_add(
-                self.ptr,
-                c.as_mut_ptr() as *mut vec_znx_dft_t,
-                c.size() as u64,
-                a.as_ptr(),
-                a.size() as u64,
-                (a.n() * a.size()) as u64,
-                b.as_ptr() as *const vmp::vmp_pmat_t,
-                b.rows() as u64,
-                b.size() as u64,
-                tmp_bytes.as_mut_ptr(),
-            )
-        }
-    }
-
-    fn vmp_apply_dft_to_dft_tmp_bytes(&self, res_size: usize, a_size: usize, gct_rows: usize, gct_size: usize) -> usize {
+    fn vmp_apply_dft_to_dft_tmp_bytes(
+        &self,
+        res_cols: usize,
+        res_size: usize,
+        a_size: usize,
+        a_cols: usize,
+        b_rows: usize,
+        b_cols_in: usize,
+        b_cols_out: usize,
+        b_size: usize,
+    ) -> usize {
        unsafe {
            vmp::vmp_apply_dft_to_dft_tmp_bytes(
                self.ptr,
-                res_size as u64,
-                a_size as u64,
-                gct_rows as u64,
-                gct_size as u64,
+                (res_size * res_cols) as u64,
+                (a_size * a_cols) as u64,
+                (b_rows * b_cols_in) as u64,
+                (b_size * b_cols_out) as u64,
            ) as usize
        }
    }

    fn vmp_apply_dft_to_dft(&self, c: &mut VecZnxDft<FFT64>, a: &VecZnxDft<FFT64>, b: &MatZnxDft<FFT64>, tmp_bytes: &mut [u8]) {
-        debug_assert!(tmp_bytes.len() >= self.vmp_apply_dft_to_dft_tmp_bytes(c.size(), a.size(), b.rows(), b.size()));
        #[cfg(debug_assertions)]
        {
+            assert_eq!(c.n(), self.n());
+            assert_eq!(b.n(), self.n());
+            assert_eq!(a.n(), self.n());
+            assert_eq!(
+                c.cols(),
+                b.cols_out(),
+                "c.cols(): {} != b.cols_out: {}",
+                c.cols(),
+                b.cols_out()
+            );
+            assert_eq!(
+                a.cols(),
+                b.cols_in(),
+                "a.cols(): {} != b.cols_in: {}",
+                a.cols(),
+                b.cols_in()
+            );
+            assert!(
+                tmp_bytes.len()
+                    >= self.vmp_apply_dft_to_dft_tmp_bytes(
+                        c.cols(),
+                        c.size(),
+                        a.cols(),
+                        a.size(),
+                        b.rows(),
+                        b.cols_in(),
+                        b.cols_out(),
+                        b.size()
+                    )
+            );
            assert_alignement(tmp_bytes.as_ptr());
        }
        unsafe {
            vmp::vmp_apply_dft_to_dft(
                self.ptr,
                c.as_mut_ptr() as *mut vec_znx_dft_t,
-                c.size() as u64,
+                c.poly_count() as u64,
                a.as_ptr() as *const vec_znx_dft_t,
-                a.size() as u64,
+                a.poly_count() as u64,
                b.as_ptr() as *const vmp::vmp_pmat_t,
                b.rows() as u64,
-                b.size() as u64,
-                tmp_bytes.as_mut_ptr(),
-            )
-        }
-    }
-
-    fn vmp_apply_dft_to_dft_add(
-        &self,
-        c: &mut VecZnxDft<FFT64>,
-        a: &VecZnxDft<FFT64>,
-        b: &MatZnxDft<FFT64>,
-        tmp_bytes: &mut [u8],
-    ) {
-        debug_assert!(tmp_bytes.len() >= self.vmp_apply_dft_to_dft_tmp_bytes(c.size(), a.size(), b.rows(), b.size()));
-        #[cfg(debug_assertions)]
-        {
-            assert_alignement(tmp_bytes.as_ptr());
-        }
-        unsafe {
-            vmp::vmp_apply_dft_to_dft_add(
-                self.ptr,
-                c.as_mut_ptr() as *mut vec_znx_dft_t,
-                c.size() as u64,
-                a.as_ptr() as *const vec_znx_dft_t,
-                a.size() as u64,
-                b.as_ptr() as *const vmp::vmp_pmat_t,
-                b.rows() as u64,
-                b.size() as u64,
-                tmp_bytes.as_mut_ptr(),
-            )
-        }
-    }
-
-    fn vmp_apply_dft_to_dft_inplace(&self, b: &mut VecZnxDft<FFT64>, a: &MatZnxDft<FFT64>, tmp_bytes: &mut [u8]) {
-        debug_assert!(tmp_bytes.len() >= self.vmp_apply_dft_to_dft_tmp_bytes(b.size(), b.size(), a.rows(), a.size()));
-        #[cfg(debug_assertions)]
-        {
-            assert_alignement(tmp_bytes.as_ptr());
-        }
-        unsafe {
-            vmp::vmp_apply_dft_to_dft(
-                self.ptr,
-                b.as_mut_ptr() as *mut vec_znx_dft_t,
-                b.size() as u64,
-                b.as_ptr() as *mut vec_znx_dft_t,
-                b.size() as u64,
-                a.as_ptr() as *const vmp::vmp_pmat_t,
-                a.rows() as u64,
-                a.size() as u64,
+                (b.size() * b.cols()) as u64,
                tmp_bytes.as_mut_ptr(),
            )
        }
@@ -497,38 +576,52 @@ mod tests {

    #[test]
    fn vmp_prepare_row_dft() {
-        let module: Module<FFT64> = Module::<FFT64>::new(32);
-        let vpmat_rows: usize = 4;
-        let vpmat_size: usize = 5;
+        let module: Module<FFT64> = Module::<FFT64>::new(16);
        let log_base2k: usize = 8;
-        let mut a: VecZnx = module.new_vec_znx(1, vpmat_size);
-        let mut a_dft: VecZnxDft<FFT64> = module.new_vec_znx_dft(1, vpmat_size);
-        let mut a_big: VecZnxBig<FFT64> = module.new_vec_znx_big(1, vpmat_size);
-        let mut b_big: VecZnxBig<FFT64> = module.new_vec_znx_big(1, vpmat_size);
-        let mut b_dft: VecZnxDft<FFT64> = module.new_vec_znx_dft(1, vpmat_size);
-        let mut vmpmat_0: MatZnxDft<FFT64> = module.new_mat_znx_dft(vpmat_rows, 1, vpmat_size);
-        let mut vmpmat_1: MatZnxDft<FFT64> = module.new_mat_znx_dft(vpmat_rows, 1, vpmat_size);
+        let mat_rows: usize = 4;
+        let mat_cols_in: usize = 2;
+        let mat_cols_out: usize = 2;
+        let mat_size: usize = 5;
+        let mut a: VecZnx = module.new_vec_znx(mat_cols_out, mat_size);
+        let mut b: VecZnx = module.new_vec_znx(mat_cols_out, mat_size);
+        let mut a_dft: VecZnxDft<FFT64> = module.new_vec_znx_dft(mat_cols_out, mat_size);
+        let mut a_big: VecZnxBig<FFT64> = module.new_vec_znx_big(mat_cols_out, mat_size);
+        let mut b_dft: VecZnxDft<FFT64> = module.new_vec_znx_dft(mat_cols_out, mat_size);
+        let mut vmpmat_0: MatZnxDft<FFT64> = module.new_mat_znx_dft(mat_rows, mat_cols_in, mat_cols_out, mat_size);
+        let mut vmpmat_1: MatZnxDft<FFT64> = module.new_mat_znx_dft(mat_rows, mat_cols_in, mat_cols_out, mat_size);

-        let mut tmp_bytes: Vec<u8> = alloc_aligned(module.vmp_prepare_tmp_bytes(vpmat_rows, 1, vpmat_size));
+        let mut tmp_bytes: Vec<u8> =
+            alloc_aligned(module.vmp_prepare_row_tmp_bytes(mat_cols_out, mat_size) | module.vec_znx_big_normalize_tmp_bytes());

-        for row_i in 0..vpmat_rows {
-            let mut source: Source = Source::new([0u8; 32]);
-            module.fill_uniform(log_base2k, &mut a, 0, vpmat_size, &mut source);
-            module.vec_znx_dft(&mut a_dft, 0, &a, 0);
-            module.vmp_prepare_row(&mut vmpmat_0, &a.raw(), row_i, &mut tmp_bytes);
+        for col_in in 0..mat_cols_in {
+            for row_i in 0..mat_rows {
+                let mut source: Source = Source::new([0u8; 32]);

-            // Checks that prepare(mat_znx_dft, a) = prepare_dft(mat_znx_dft, a_dft)
-            module.vmp_prepare_row_dft(&mut vmpmat_1, &a_dft, row_i);
-            assert_eq!(vmpmat_0.raw(), vmpmat_1.raw());
+                (0..mat_cols_out).for_each(|col_out| {
+                    module.fill_uniform(log_base2k, &mut a, col_out, mat_size, &mut source);
+                    module.vec_znx_dft(&mut a_dft, col_out, &a, col_out);
+                });

-            // Checks that a_dft = extract_dft(prepare(mat_znx_dft, a), b_dft)
-            module.vmp_extract_row_dft(&mut b_dft, row_i, &vmpmat_0);
-            assert_eq!(a_dft.raw(), b_dft.raw());
+                module.vmp_prepare_row(&mut vmpmat_0, row_i, col_in, &a, &mut tmp_bytes);

-            // Checks that a_big = extract(prepare_dft(mat_znx_dft, a_dft), b_big)
-            module.vmp_extract_row(&mut b_big, &vmpmat_0, row_i);
-            module.vec_znx_idft(&mut a_big, 0, &a_dft, 0, &mut tmp_bytes);
-            assert_eq!(a_big.raw(), b_big.raw());
+                // Checks that prepare(mat_znx_dft, a) = prepare_dft(mat_znx_dft, a_dft)
+                module.vmp_prepare_row_dft(&mut vmpmat_1, row_i, col_in, &a_dft);
+                assert_eq!(vmpmat_0.raw(), vmpmat_1.raw());
+
+                // Checks that a_dft = extract_dft(prepare(mat_znx_dft, a), b_dft)
+                module.vmp_extract_row_dft(&mut b_dft, &vmpmat_0, row_i, col_in);
+                assert_eq!(a_dft.raw(), b_dft.raw());
+
+                // Checks that a_big = extract(prepare_dft(mat_znx_dft, a_dft), b_big)
+                module.vmp_extract_row(log_base2k, &mut b, &vmpmat_0, row_i, col_in, &mut tmp_bytes);
+
+                (0..mat_cols_out).for_each(|col_out| {
+                    module.vec_znx_idft(&mut a_big, col_out, &a_dft, col_out, &mut tmp_bytes);
+                    module.vec_znx_big_normalize(log_base2k, &mut a, col_out, &a_big, col_out, &mut tmp_bytes);
+                });
+
+                assert_eq!(a.raw(), b.raw());
+            }
        }

        module.free();