Merge remote-tracking branch 'origin/dev_bdd_selector' into dev_bdd_selector

This commit is contained in:
Pro7ech
2025-10-26 17:30:20 +01:00
11 changed files with 173 additions and 140 deletions

View File

@@ -194,10 +194,10 @@ unsafe impl VecZnxDftCopyImpl<Self> for FFT64Avx {
} }
unsafe impl VecZnxDftZeroImpl<Self> for FFT64Avx { unsafe impl VecZnxDftZeroImpl<Self> for FFT64Avx {
fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R) fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<Self>, R: VecZnxDftToMut<Self>,
{ {
vec_znx_dft_zero(res); vec_znx_dft_zero(res, res_col);
} }
} }

View File

@@ -4,6 +4,6 @@ use crate::FFT64Ref;
#[test] #[test]
fn test_convolution_fft64_ref() { fn test_convolution_fft64_ref() {
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(64); let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(8);
test_convolution(&module); test_convolution(&module);
} }

View File

@@ -194,10 +194,10 @@ unsafe impl VecZnxDftCopyImpl<Self> for FFT64Ref {
} }
unsafe impl VecZnxDftZeroImpl<Self> for FFT64Ref { unsafe impl VecZnxDftZeroImpl<Self> for FFT64Ref {
fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R) fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<Self>, R: VecZnxDftToMut<Self>,
{ {
vec_znx_dft_zero(res); vec_znx_dft_zero(res, res_col);
} }
} }

View File

@@ -12,7 +12,7 @@ use poulpy_hal::{
reference::{ reference::{
fft64::{ fft64::{
reim::{ReimCopy, ReimZero, reim_copy_ref, reim_negate_inplace_ref, reim_negate_ref, reim_zero_ref}, reim::{ReimCopy, ReimZero, reim_copy_ref, reim_negate_inplace_ref, reim_negate_ref, reim_zero_ref},
vec_znx_dft::vec_znx_dft_copy, vec_znx_dft::{vec_znx_dft_copy, vec_znx_dft_zero},
}, },
znx::znx_zero_ref, znx::znx_zero_ref,
}, },
@@ -426,10 +426,10 @@ impl ReimZero for FFT64Spqlios {
} }
unsafe impl VecZnxDftZeroImpl<Self> for FFT64Spqlios { unsafe impl VecZnxDftZeroImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R) fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<Self>, R: VecZnxDftToMut<Self>,
{ {
res.to_mut().data.fill(0); vec_znx_dft_zero(res, res_col);
} }
} }

View File

@@ -1,9 +1,9 @@
use crate::{ use crate::{
api::{ api::{
ModuleN, ScratchTakeBasic, SvpApplyDftToDft, SvpPPolAlloc, SvpPPolBytesOf, SvpPrepare, VecZnxDftAddScaledInplace, ModuleN, ScratchTakeBasic, SvpApplyDftToDft, SvpPPolAlloc, SvpPPolBytesOf, SvpPrepare, VecZnxDftAddScaledInplace,
VecZnxDftBytesOf, VecZnxDftBytesOf, VecZnxDftZero,
}, },
layouts::{Backend, Module, Scratch, VecZnxDftToMut, VecZnxDftToRef, VecZnxToRef, ZnxInfos, ZnxZero}, layouts::{Backend, Module, Scratch, VecZnxDftToMut, VecZnxDftToRef, VecZnxToRef, ZnxInfos},
}; };
impl<BE: Backend> Convolution<BE> for Module<BE> impl<BE: Backend> Convolution<BE> for Module<BE>
@@ -15,7 +15,8 @@ where
+ SvpPrepare<BE> + SvpPrepare<BE>
+ SvpPPolBytesOf + SvpPPolBytesOf
+ VecZnxDftBytesOf + VecZnxDftBytesOf
+ VecZnxDftAddScaledInplace<BE>, + VecZnxDftAddScaledInplace<BE>
+ VecZnxDftZero<BE>,
Scratch<BE>: ScratchTakeBasic, Scratch<BE>: ScratchTakeBasic,
{ {
} }
@@ -29,46 +30,15 @@ where
+ SvpPrepare<BE> + SvpPrepare<BE>
+ SvpPPolBytesOf + SvpPPolBytesOf
+ VecZnxDftBytesOf + VecZnxDftBytesOf
+ VecZnxDftAddScaledInplace<BE>, + VecZnxDftAddScaledInplace<BE>
+ VecZnxDftZero<BE>,
Scratch<BE>: ScratchTakeBasic, Scratch<BE>: ScratchTakeBasic,
{ {
fn convolution_tmp_bytes(&self, res_size: usize) -> usize { fn convolution_tmp_bytes(&self, b_size: usize) -> usize {
self.bytes_of_svp_ppol(1) + self.bytes_of_vec_znx_dft(1, res_size) self.bytes_of_svp_ppol(1) + self.bytes_of_vec_znx_dft(1, b_size)
} }
/// Evaluates a bivariate convolution over Z[X, Y] / (X^N + 1) where Y = 2^-K fn bivariate_convolution_full<R, A, B>(&self, k: i64, res: &mut R, a: &A, b: &B, scratch: &mut Scratch<BE>)
/// and scales the result by 2^{res_scale * K}
///
/// # Example
/// a = [a00, a10, a20, a30] = (a00 * 2^-K + a01 * 2^-2K) + (a10 * 2^-K + a11 * 2^-2K) * X ...
/// [a01, a11, a21, a31]
///
/// b = [b00, b10, b20, b30] = (b00 * 2^-K + b01 * 2^-2K) + (b10 * 2^-K + b11 * 2^-2K) * X ...
/// [b01, b11, b21, b31]
///
/// If res_scale = 0:
/// res = [ 0, 0, 0, 0] = (r01 * 2^-2K + r02 * 2^-3K + r03 * 2^-4K + r04 * 2^-5K) + ...
/// [r01, r11, r21, r31]
/// [r02, r12, r22, r32]
/// [r03, r13, r23, r33]
/// [r04, r14, r24, r34]
///
/// If res_scale = 1:
/// res = [r01, r11, r21, r31] = (r01 * 2^-K + r02 * 2^-2K + r03 * 2^-3K + r04 * 2^-4K + r05 * 2^-5K) + ...
/// [r02, r12, r22, r32]
/// [r03, r13, r23, r33]
/// [r04, r14, r24, r34]
/// [r05, r15, r25, r35]
///
/// If res_scale = -1:
/// res = [ 0, 0, 0, 0] = (r01 * 2^-3K + r02 * 2^-4K + r03 * 2^-5K) + ...
/// [ 0, 0, 0, 0]
/// [r01, r11, r21, r31]
/// [r02, r12, r22, r32]
/// [r03, r13, r23, r33]
///
/// If res.size() < a.size() + b.size() + 1 + res_scale, result is truncated accordingly in the Y dimension.
fn convolution<R, A, B>(&self, res: &mut R, res_scale: i64, a: &A, b: &B, scratch: &mut Scratch<BE>)
where where
R: VecZnxDftToMut<BE>, R: VecZnxDftToMut<BE>,
A: VecZnxToRef, A: VecZnxToRef,
@@ -78,32 +48,99 @@ where
let a: &crate::layouts::VecZnx<&[u8]> = &a.to_ref(); let a: &crate::layouts::VecZnx<&[u8]> = &a.to_ref();
let b: &crate::layouts::VecZnxDft<&[u8], BE> = &b.to_ref(); let b: &crate::layouts::VecZnxDft<&[u8], BE> = &b.to_ref();
assert!(res.cols() >= a.cols() + b.cols() - 1); let res_cols: usize = res.cols();
let a_cols: usize = a.cols();
let b_cols: usize = b.cols();
res.zero(); assert!(res_cols >= a_cols + b_cols - 1);
for res_col in 0..res_cols {
let a_min: usize = res_col.saturating_sub(b_cols - 1);
let a_max: usize = res_col.min(a_cols - 1);
self.bivariate_convolution_single(k, res, res_col, a, a_min, b, res_col - a_min, scratch);
for a_col in a_min + 1..a_max + 1 {
self.bivariate_convolution_single_add(k, res, res_col, a, a_col, b, res_col - a_col, scratch);
}
}
}
/// Evaluates a bivariate convolution over Z[X, Y] / (X^N + 1) where Y = 2^-K over the
/// selected columsn and stores the result on the selected column, scaled by 2^{k * Base2K}
///
/// # Example
/// a = [a00, a10, a20, a30] = (a00 * 2^-K + a01 * 2^-2K) + (a10 * 2^-K + a11 * 2^-2K) * X ...
/// [a01, a11, a21, a31]
///
/// b = [b00, b10, b20, b30] = (b00 * 2^-K + b01 * 2^-2K) + (b10 * 2^-K + b11 * 2^-2K) * X ...
/// [b01, b11, b21, b31]
///
/// If k = 0:
/// res = [ 0, 0, 0, 0] = (r01 * 2^-2K + r02 * 2^-3K + r03 * 2^-4K + r04 * 2^-5K) + ...
/// [r01, r11, r21, r31]
/// [r02, r12, r22, r32]
/// [r03, r13, r23, r33]
/// [r04, r14, r24, r34]
///
/// If k = 1:
/// res = [r01, r11, r21, r31] = (r01 * 2^-K + r02 * 2^-2K + r03 * 2^-3K + r04 * 2^-4K + r05 * 2^-5K) + ...
/// [r02, r12, r22, r32]
/// [r03, r13, r23, r33]
/// [r04, r14, r24, r34]
/// [r05, r15, r25, r35]
///
/// If k = -1:
/// res = [ 0, 0, 0, 0] = (r01 * 2^-3K + r02 * 2^-4K + r03 * 2^-5K) + ...
/// [ 0, 0, 0, 0]
/// [r01, r11, r21, r31]
/// [r02, r12, r22, r32]
/// [r03, r13, r23, r33]
///
/// If res.size() < a.size() + b.size() + 1 + k, result is truncated accordingly in the Y dimension.
fn bivariate_convolution_single_add<R, A, B>(
&self,
k: i64,
res: &mut R,
res_col: usize,
a: &A,
a_col: usize,
b: &B,
b_col: usize,
scratch: &mut Scratch<BE>,
) where
R: VecZnxDftToMut<BE>,
A: VecZnxToRef,
B: VecZnxDftToRef<BE>,
{
let res: &mut crate::layouts::VecZnxDft<&mut [u8], BE> = &mut res.to_mut();
let a: &crate::layouts::VecZnx<&[u8]> = &a.to_ref();
let b: &crate::layouts::VecZnxDft<&[u8], BE> = &b.to_ref();
let (mut ppol, scratch_1) = scratch.take_svp_ppol(self, 1); let (mut ppol, scratch_1) = scratch.take_svp_ppol(self, 1);
let (mut res_tmp, _) = scratch_1.take_vec_znx_dft(self, 1, res.size()); let (mut res_tmp, _) = scratch_1.take_vec_znx_dft(self, 1, b.size());
for a_col in 0..a.cols() {
for a_limb in 0..a.size() { for a_limb in 0..a.size() {
// Prepares the j-th limb of the i-th col of A
self.svp_prepare(&mut ppol, 0, &a.as_scalar_znx_ref(a_col, a_limb), 0); self.svp_prepare(&mut ppol, 0, &a.as_scalar_znx_ref(a_col, a_limb), 0);
for b_col in 0..b.cols() {
// Multiplies with the i-th col of B
self.svp_apply_dft_to_dft(&mut res_tmp, 0, &ppol, 0, b, b_col); self.svp_apply_dft_to_dft(&mut res_tmp, 0, &ppol, 0, b, b_col);
self.vec_znx_dft_add_scaled_inplace(res, res_col, &res_tmp, 0, -(1 + a_limb as i64) + k);
}
}
// Adds on the [a_col + b_col] of res, scaled by 2^{-(a_limb + 1) * Base2K} fn bivariate_convolution_single<R, A, B>(
self.vec_znx_dft_add_scaled_inplace( &self,
res, k: i64,
a_col + b_col, res: &mut R,
&res_tmp, res_col: usize,
0, a: &A,
-(1 + a_limb as i64) + res_scale, a_col: usize,
); b: &B,
} b_col: usize,
} scratch: &mut Scratch<BE>,
} ) where
R: VecZnxDftToMut<BE>,
A: VecZnxToRef,
B: VecZnxDftToRef<BE>,
{
self.vec_znx_dft_zero(res, res_col);
self.bivariate_convolution_single_add(k, res, res_col, a, a_col, b, b_col, scratch);
} }
} }

View File

@@ -97,7 +97,7 @@ pub trait VecZnxDftCopy<B: Backend> {
} }
pub trait VecZnxDftZero<B: Backend> { pub trait VecZnxDftZero<B: Backend> {
fn vec_znx_dft_zero<R>(&self, res: &mut R) fn vec_znx_dft_zero<R>(&self, res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<B>; R: VecZnxDftToMut<B>;
} }

View File

@@ -200,10 +200,10 @@ impl<B> VecZnxDftZero<B> for Module<B>
where where
B: Backend + VecZnxDftZeroImpl<B>, B: Backend + VecZnxDftZeroImpl<B>,
{ {
fn vec_znx_dft_zero<R>(&self, res: &mut R) fn vec_znx_dft_zero<R>(&self, res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<B>, R: VecZnxDftToMut<B>,
{ {
B::vec_znx_dft_zero_impl(self, res); B::vec_znx_dft_zero_impl(self, res, res_col);
} }
} }

View File

@@ -188,7 +188,7 @@ pub unsafe trait VecZnxDftCopyImpl<B: Backend> {
/// * See [crate::api::VecZnxDftZero] for corresponding public API. /// * See [crate::api::VecZnxDftZero] for corresponding public API.
/// # Safety [crate::doc::backend_safety] for safety contract. /// # Safety [crate::doc::backend_safety] for safety contract.
pub unsafe trait VecZnxDftZeroImpl<B: Backend> { pub unsafe trait VecZnxDftZeroImpl<B: Backend> {
fn vec_znx_dft_zero_impl<R>(module: &Module<B>, res: &mut R) fn vec_znx_dft_zero_impl<R>(module: &Module<B>, res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<B>; R: VecZnxDftToMut<B>;
} }

View File

@@ -118,7 +118,7 @@ where
} }
} else if a_scale < 0 { } else if a_scale < 0 {
let shift: usize = (a_scale.unsigned_abs() as usize).min(res_size); let shift: usize = (a_scale.unsigned_abs() as usize).min(res_size);
let sum_size: usize = a_size.min(res_size).saturating_sub(shift); let sum_size: usize = a_size.min(res_size.saturating_sub(shift));
for j in 0..sum_size { for j in 0..sum_size {
BE::reim_add_inplace(res.at_mut(res_col, j + shift), a.at(a_col, j)); BE::reim_add_inplace(res.at_mut(res_col, j + shift), a.at(a_col, j));
} }
@@ -398,10 +398,13 @@ where
} }
} }
pub fn vec_znx_dft_zero<R, BE>(res: &mut R) pub fn vec_znx_dft_zero<R, BE>(res: &mut R, res_col: usize)
where where
R: VecZnxDftToMut<BE>, R: VecZnxDftToMut<BE>,
BE: Backend<ScalarPrep = f64> + ReimZero, BE: Backend<ScalarPrep = f64> + ReimZero,
{ {
BE::reim_zero(res.to_mut().raw_mut()); let res: &mut VecZnxDft<&mut [u8], BE> = &mut res.to_mut();
for j in 0..res.size() {
BE::reim_zero(res.at_mut(res_col, j))
}
} }

View File

@@ -1,7 +1,7 @@
use crate::{ use crate::{
api::{ api::{
Convolution, ModuleN, ScratchOwnedAlloc, ScratchOwnedBorrow, ScratchTakeBasic, TakeSlice, VecZnxBigNormalize, Convolution, ModuleN, ScratchOwnedAlloc, ScratchOwnedBorrow, ScratchTakeBasic, TakeSlice, VecZnxBigAlloc,
VecZnxDftAlloc, VecZnxDftApply, VecZnxIdftApplyConsume, VecZnxNormalizeInplace, VecZnxBigNormalize, VecZnxDftAlloc, VecZnxDftApply, VecZnxIdftApplyTmpA, VecZnxNormalizeInplace,
}, },
layouts::{ layouts::{
Backend, FillUniform, Scratch, ScratchOwned, VecZnx, VecZnxBig, VecZnxDft, VecZnxToMut, VecZnxToRef, ZnxInfos, ZnxView, Backend, FillUniform, Scratch, ScratchOwned, VecZnx, VecZnxBig, VecZnxDft, VecZnxToMut, VecZnxToRef, ZnxInfos, ZnxView,
@@ -16,9 +16,10 @@ where
+ Convolution<BE> + Convolution<BE>
+ VecZnxDftAlloc<BE> + VecZnxDftAlloc<BE>
+ VecZnxDftApply<BE> + VecZnxDftApply<BE>
+ VecZnxIdftApplyConsume<BE> + VecZnxIdftApplyTmpA<BE>
+ VecZnxBigNormalize<BE> + VecZnxBigNormalize<BE>
+ VecZnxNormalizeInplace<BE>, + VecZnxNormalizeInplace<BE>
+ VecZnxBigAlloc<BE>,
Scratch<BE>: ScratchTakeBasic, Scratch<BE>: ScratchTakeBasic,
ScratchOwned<BE>: ScratchOwnedAlloc<BE> + ScratchOwnedBorrow<BE>, ScratchOwned<BE>: ScratchOwnedAlloc<BE> + ScratchOwnedBorrow<BE>,
{ {
@@ -26,36 +27,41 @@ where
let base2k: usize = 12; let base2k: usize = 12;
for a_cols in 1..3 { let a_cols: usize = 3;
for b_cols in 1..3 { let b_cols: usize = 3;
for a_size in 1..5 { let a_size: usize = 3;
for b_size in 1..5 { let b_size: usize = 3;
let c_cols: usize = a_cols + b_cols - 1;
let c_size: usize = a_size + b_size;
let mut a: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), a_cols, a_size); let mut a: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), a_cols, a_size);
let mut b: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), b_cols, b_size); let mut b: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), b_cols, b_size);
let mut c_want: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), a_cols + b_cols - 1, b_size + a_size); let mut c_want: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), c_cols, c_size);
let mut c_have: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), c_want.cols(), c_want.size()); let mut c_have: VecZnx<Vec<u8>> = VecZnx::alloc(module.n(), c_cols, c_size);
let mut c_have_dft: VecZnxDft<Vec<u8>, BE> = module.vec_znx_dft_alloc(c_cols, c_size);
let mut c_have_big: VecZnxBig<Vec<u8>, BE> = module.vec_znx_big_alloc(c_cols, c_size);
let mut scratch: ScratchOwned<BE> = ScratchOwned::alloc(module.convolution_tmp_bytes(c_want.size())); let mut scratch: ScratchOwned<BE> = ScratchOwned::alloc(module.convolution_tmp_bytes(b_size));
a.fill_uniform(base2k, &mut source); a.fill_uniform(base2k, &mut source);
b.fill_uniform(base2k, &mut source); b.fill_uniform(base2k, &mut source);
let mut b_dft: VecZnxDft<Vec<u8>, BE> = module.vec_znx_dft_alloc(b.cols(), b.size()); let mut b_dft: VecZnxDft<Vec<u8>, BE> = module.vec_znx_dft_alloc(b_cols, b_size);
for i in 0..b.cols() { for i in 0..b.cols() {
module.vec_znx_dft_apply(1, 0, &mut b_dft, i, &b, i); module.vec_znx_dft_apply(1, 0, &mut b_dft, i, &b, i);
} }
for mut res_scale in 0..2 * c_want.size() as i64 + 1 { for mut k in 0..(2 * c_size + 1) as i64 {
res_scale -= c_want.size() as i64; k -= c_size as i64;
let mut c_have_dft: VecZnxDft<Vec<u8>, BE> = module.vec_znx_dft_alloc(c_have.cols(), c_have.size()); module.bivariate_convolution_full(k, &mut c_have_dft, &a, &b_dft, scratch.borrow());
module.convolution(&mut c_have_dft, res_scale, &a, &b_dft, scratch.borrow());
let c_have_big: VecZnxBig<Vec<u8>, BE> = module.vec_znx_idft_apply_consume(c_have_dft); for i in 0..c_cols {
module.vec_znx_idft_apply_tmpa(&mut c_have_big, i, &mut c_have_dft, i);
}
for i in 0..c_have.cols() { for i in 0..c_cols {
module.vec_znx_big_normalize( module.vec_znx_big_normalize(
base2k, base2k,
&mut c_have, &mut c_have,
@@ -67,29 +73,17 @@ where
); );
} }
convolution_naive( convolution_naive(module, base2k, k, &mut c_want, &a, &b, scratch.borrow());
module,
base2k,
&mut c_want,
res_scale,
&a,
&b,
scratch.borrow(),
);
assert_eq!(c_want, c_have); assert_eq!(c_want, c_have);
} }
}
}
}
}
} }
fn convolution_naive<R, A, B, M, BE: Backend>( fn convolution_naive<R, A, B, M, BE: Backend>(
module: &M, module: &M,
base2k: usize, base2k: usize,
k: i64,
res: &mut R, res: &mut R,
res_scale: i64,
a: &A, a: &A,
b: &B, b: &B,
scratch: &mut Scratch<BE>, scratch: &mut Scratch<BE>,
@@ -112,11 +106,11 @@ fn convolution_naive<R, A, B, M, BE: Backend>(
for a_limb in 0..a.size() { for a_limb in 0..a.size() {
for b_col in 0..b.cols() { for b_col in 0..b.cols() {
for b_limb in 0..b.size() { for b_limb in 0..b.size() {
let res_scale_abs = res_scale.unsigned_abs() as usize; let res_scale_abs = k.unsigned_abs() as usize;
let mut res_limb: usize = a_limb + b_limb + 1; let mut res_limb: usize = a_limb + b_limb + 1;
if res_scale <= 0 { if k <= 0 {
res_limb += res_scale_abs; res_limb += res_scale_abs;
if res_limb < res.size() { if res_limb < res.size() {

View File

@@ -189,12 +189,12 @@ fn execute_block_binary_extended<DataRes, DataIn, DataBrk, M, BE: Backend>(
brk.data.chunks_exact(block_size) brk.data.chunks_exact(block_size)
) )
.for_each(|(ai, ski)| { .for_each(|(ai, ski)| {
(0..extension_factor).for_each(|i| { for i in 0..extension_factor {
(0..cols).for_each(|j| { for j in 0..cols {
module.vec_znx_dft_apply(1, 0, &mut acc_dft[i], j, &acc[i], j); module.vec_znx_dft_apply(1, 0, &mut acc_dft[i], j, &acc[i], j);
}); module.vec_znx_dft_zero(&mut acc_add_dft[i], j)
module.vec_znx_dft_zero(&mut acc_add_dft[i]) }
}); }
// TODO: first & last iterations can be optimized // TODO: first & last iterations can be optimized
izip!(ai.iter(), ski.iter()).for_each(|(aii, skii)| { izip!(ai.iter(), ski.iter()).for_each(|(aii, skii)| {
@@ -342,11 +342,10 @@ fn execute_block_binary<DataRes, DataIn, DataBrk, M, BE: Backend>(
brk.data.chunks_exact(block_size) brk.data.chunks_exact(block_size)
) )
.for_each(|(ai, ski)| { .for_each(|(ai, ski)| {
(0..cols).for_each(|j| { for j in 0..cols {
module.vec_znx_dft_apply(1, 0, &mut acc_dft, j, out_mut.data_mut(), j); module.vec_znx_dft_apply(1, 0, &mut acc_dft, j, out_mut.data_mut(), j);
}); module.vec_znx_dft_zero(&mut acc_add_dft, j)
}
module.vec_znx_dft_zero(&mut acc_add_dft);
izip!(ai.iter(), ski.iter()).for_each(|(aii, skii)| { izip!(ai.iter(), ski.iter()).for_each(|(aii, skii)| {
let ai_pos: usize = ((aii + two_n as i64) & (two_n - 1) as i64) as usize; let ai_pos: usize = ((aii + two_n as i64) & (two_n - 1) as i64) as usize;