Ref. + AVX code & generic tests + benches (#85)

This commit is contained in:
Jean-Philippe Bossuat
2025-09-15 16:16:11 +02:00
committed by GitHub
parent 99b9e3e10e
commit 56dbd29c59
286 changed files with 27797 additions and 7270 deletions

View File

@@ -1,7 +1,8 @@
use poulpy_hal::{
api::{
DFT, IDFTConsume, ScratchAvailable, TakeVecZnxDft, VecZnxBigAddSmallInplace, VecZnxBigNormalize,
VecZnxBigNormalizeTmpBytes, VecZnxDftAllocBytes, VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
ScratchAvailable, TakeVecZnxDft, VecZnxBigAddSmallInplace, VecZnxBigNormalize, VecZnxBigNormalizeTmpBytes,
VecZnxDftAllocBytes, VecZnxDftApply, VecZnxIdftApplyConsume, VmpApplyDftToDft, VmpApplyDftToDftAdd,
VmpApplyDftToDftTmpBytes,
},
layouts::{Backend, DataMut, DataRef, DataViewMut, Module, Scratch, VecZnx, VecZnxBig, VecZnxDft, VmpPMat, ZnxInfos},
};
@@ -117,6 +118,63 @@ impl<DataSelf: DataRef> GLWECiphertext<DataSelf> {
)
);
}
#[allow(dead_code)]
pub(crate) fn assert_keyswitch_inplace<B: Backend, DataRhs>(
&self,
module: &Module<B>,
rhs: &GGLWESwitchingKeyPrepared<DataRhs, B>,
scratch: &Scratch<B>,
) where
DataRhs: DataRef,
Module<B>: VecZnxDftAllocBytes + VmpApplyDftToDftTmpBytes + VecZnxBigNormalizeTmpBytes,
Scratch<B>: ScratchAvailable,
{
let basek: usize = self.basek();
assert_eq!(
self.rank(),
rhs.rank_out(),
"self.rank(): {} != rhs.rank_out(): {}",
self.rank(),
rhs.rank_out()
);
assert_eq!(self.basek(), basek);
assert_eq!(rhs.n(), self.n());
assert!(
scratch.available()
>= GLWECiphertext::keyswitch_scratch_space(
module,
self.basek(),
self.k(),
self.k(),
rhs.k(),
rhs.digits(),
rhs.rank_in(),
rhs.rank_out(),
),
"scratch.available()={} < GLWECiphertext::keyswitch_scratch_space(
module,
self.basek(),
self.k(),
self.k(),
rhs.k(),
rhs.digits(),
rhs.rank_in(),
rhs.rank_out(),
)={}",
scratch.available(),
GLWECiphertext::keyswitch_scratch_space(
module,
self.basek(),
self.k(),
self.k(),
rhs.k(),
rhs.digits(),
rhs.rank_in(),
rhs.rank_out(),
)
);
}
}
impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
@@ -130,11 +188,10 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxBigNormalizeTmpBytes
+ VmpApplyDftToDftTmpBytes
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ DFT<B>
+ IDFTConsume<B>
+ VecZnxDftApply<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigAddSmallInplace<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: ScratchAvailable + TakeVecZnxDft<B>,
@@ -143,10 +200,10 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
{
self.assert_keyswitch(module, lhs, rhs, scratch);
}
let (res_dft, scratch1) = scratch.take_vec_znx_dft(self.n(), self.cols(), rhs.size()); // Todo optimise
let res_big: VecZnxBig<_, B> = lhs.keyswitch_internal(module, res_dft, rhs, scratch1);
let (res_dft, scratch_1) = scratch.take_vec_znx_dft(self.n(), self.cols(), rhs.size()); // Todo optimise
let res_big: VecZnxBig<_, B> = lhs.keyswitch_internal(module, res_dft, rhs, scratch_1);
(0..self.cols()).for_each(|i| {
module.vec_znx_big_normalize(self.basek(), &mut self.data, i, &res_big, i, scratch1);
module.vec_znx_big_normalize(self.basek(), &mut self.data, i, &res_big, i, scratch_1);
})
}
@@ -162,16 +219,21 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
+ VmpApplyDftToDftTmpBytes
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ DFT<B>
+ IDFTConsume<B>
+ VecZnxDftApply<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigAddSmallInplace<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: ScratchAvailable + TakeVecZnxDft<B>,
{
unsafe {
let self_ptr: *mut GLWECiphertext<DataSelf> = self as *mut GLWECiphertext<DataSelf>;
self.keyswitch(module, &*self_ptr, rhs, scratch);
#[cfg(debug_assertions)]
{
self.assert_keyswitch_inplace(module, rhs, scratch);
}
let (res_dft, scratch_1) = scratch.take_vec_znx_dft(self.n(), self.cols(), rhs.size()); // Todo optimise
let res_big: VecZnxBig<_, B> = self.keyswitch_internal(module, res_dft, rhs, scratch_1);
(0..self.cols()).for_each(|i| {
module.vec_znx_big_normalize(self.basek(), &mut self.data, i, &res_big, i, scratch_1);
})
}
}
@@ -192,8 +254,8 @@ impl<D: DataRef> GLWECiphertext<D> {
+ VmpApplyDftToDftTmpBytes
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ DFT<B>
+ IDFTConsume<B>
+ VecZnxDftApply<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigAddSmallInplace<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B>,
@@ -224,16 +286,17 @@ where
DataRes: DataMut,
DataIn: DataRef,
DataVmp: DataRef,
Module<B>: VecZnxDftAllocBytes + DFT<B> + VmpApplyDftToDft<B> + IDFTConsume<B> + VecZnxBigAddSmallInplace<B>,
Module<B>:
VecZnxDftAllocBytes + VecZnxDftApply<B> + VmpApplyDftToDft<B> + VecZnxIdftApplyConsume<B> + VecZnxBigAddSmallInplace<B>,
Scratch<B>: TakeVecZnxDft<B>,
{
let cols: usize = a.cols();
let (mut ai_dft, scratch1) = scratch.take_vec_znx_dft(a.n(), cols - 1, a.size());
let (mut ai_dft, scratch_1) = scratch.take_vec_znx_dft(a.n(), cols - 1, a.size());
(0..cols - 1).for_each(|col_i| {
module.dft(1, 0, &mut ai_dft, col_i, a, col_i + 1);
module.vec_znx_dft_apply(1, 0, &mut ai_dft, col_i, a, col_i + 1);
});
module.vmp_apply_dft_to_dft(&mut res_dft, &ai_dft, mat, scratch1);
let mut res_big: VecZnxBig<DataRes, B> = module.vec_znx_idft_consume(res_dft);
module.vmp_apply_dft_to_dft(&mut res_dft, &ai_dft, mat, scratch_1);
let mut res_big: VecZnxBig<DataRes, B> = module.vec_znx_idft_apply_consume(res_dft);
module.vec_znx_big_add_small_inplace(&mut res_big, 0, a, 0);
res_big
}
@@ -251,16 +314,16 @@ where
DataIn: DataRef,
DataVmp: DataRef,
Module<B>: VecZnxDftAllocBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigAddSmallInplace<B>,
Scratch<B>: TakeVecZnxDft<B>,
{
let cols: usize = a.cols();
let size: usize = a.size();
let (mut ai_dft, scratch1) = scratch.take_vec_znx_dft(a.n(), cols - 1, size.div_ceil(digits));
let (mut ai_dft, scratch_1) = scratch.take_vec_znx_dft(a.n(), cols - 1, size.div_ceil(digits));
ai_dft.data_mut().fill(0);
@@ -277,18 +340,18 @@ where
res_dft.set_size(mat.size() - ((digits - di) as isize - 2).max(0) as usize);
(0..cols - 1).for_each(|col_i| {
module.dft(digits, digits - di - 1, &mut ai_dft, col_i, a, col_i + 1);
module.vec_znx_dft_apply(digits, digits - di - 1, &mut ai_dft, col_i, a, col_i + 1);
});
if di == 0 {
module.vmp_apply_dft_to_dft(&mut res_dft, &ai_dft, mat, scratch1);
module.vmp_apply_dft_to_dft(&mut res_dft, &ai_dft, mat, scratch_1);
} else {
module.vmp_apply_dft_to_dft_add(&mut res_dft, &ai_dft, mat, di, scratch1);
module.vmp_apply_dft_to_dft_add(&mut res_dft, &ai_dft, mat, di, scratch_1);
}
});
res_dft.set_size(res_dft.max_size());
let mut res_big: VecZnxBig<DataRes, B> = module.vec_znx_idft_consume(res_dft);
let mut res_big: VecZnxBig<DataRes, B> = module.vec_znx_idft_apply_consume(res_dft);
module.vec_znx_big_add_small_inplace(&mut res_big, 0, a, 0);
res_big
}