Ref. + AVX code & generic tests + benches (#85)

This commit is contained in:
Jean-Philippe Bossuat
2025-09-15 16:16:11 +02:00
committed by GitHub
parent 99b9e3e10e
commit 56dbd29c59
286 changed files with 27797 additions and 7270 deletions

View File

@@ -1,7 +1,7 @@
use poulpy_hal::{
api::{
DFT, IDFTConsume, ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxNormalizeTmpBytes,
VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxDftApply, VecZnxIdftApplyConsume,
VecZnxNormalizeTmpBytes, VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
},
layouts::{Backend, DataMut, DataRef, Module, Scratch},
};
@@ -51,10 +51,10 @@ impl<DataSelf: DataMut> GGLWEAutomorphismKey<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{
@@ -70,10 +70,10 @@ impl<DataSelf: DataMut> GGLWEAutomorphismKey<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{

View File

@@ -1,7 +1,7 @@
use poulpy_hal::{
api::{
DFT, IDFTConsume, ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxNormalizeTmpBytes,
VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxDftApply, VecZnxIdftApplyConsume,
VecZnxNormalizeTmpBytes, VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
},
layouts::{Backend, DataMut, DataRef, Module, Scratch, ZnxZero},
};
@@ -51,10 +51,10 @@ impl<DataSelf: DataMut> GGLWESwitchingKey<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{
@@ -106,10 +106,10 @@ impl<DataSelf: DataMut> GGLWESwitchingKey<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{

View File

@@ -1,7 +1,7 @@
use poulpy_hal::{
api::{
DFT, IDFTConsume, ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxNormalizeTmpBytes,
VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxDftApply, VecZnxIdftApplyConsume,
VecZnxNormalizeTmpBytes, VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
},
layouts::{Backend, DataMut, DataRef, Module, Scratch, ZnxZero},
};
@@ -51,10 +51,10 @@ impl<DataSelf: DataMut> GGSWCiphertext<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{
@@ -116,10 +116,10 @@ impl<DataSelf: DataMut> GGSWCiphertext<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{

View File

@@ -1,7 +1,7 @@
use poulpy_hal::{
api::{
DFT, IDFTConsume, ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxNormalizeTmpBytes,
VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
ScratchAvailable, TakeVecZnxDft, VecZnxBigNormalize, VecZnxDftAllocBytes, VecZnxDftApply, VecZnxIdftApplyConsume,
VecZnxNormalizeTmpBytes, VmpApplyDftToDft, VmpApplyDftToDftAdd, VmpApplyDftToDftTmpBytes,
},
layouts::{Backend, DataMut, DataRef, DataViewMut, Module, Scratch, VecZnxBig},
};
@@ -65,10 +65,10 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{
@@ -101,8 +101,8 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
let cols: usize = rhs.rank() + 1;
let digits: usize = rhs.digits();
let (mut res_dft, scratch1) = scratch.take_vec_znx_dft(self.n(), cols, rhs.size()); // Todo optimise
let (mut a_dft, scratch2) = scratch1.take_vec_znx_dft(self.n(), cols, lhs.size().div_ceil(digits));
let (mut res_dft, scratch_1) = scratch.take_vec_znx_dft(self.n(), cols, rhs.size()); // Todo optimise
let (mut a_dft, scratch_2) = scratch_1.take_vec_znx_dft(self.n(), cols, lhs.size().div_ceil(digits));
a_dft.data_mut().fill(0);
@@ -121,21 +121,21 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
res_dft.set_size(rhs.size() - ((digits - di) as isize - 2).max(0) as usize);
(0..cols).for_each(|col_i| {
module.dft(digits, digits - 1 - di, &mut a_dft, col_i, &lhs.data, col_i);
module.vec_znx_dft_apply(digits, digits - 1 - di, &mut a_dft, col_i, &lhs.data, col_i);
});
if di == 0 {
module.vmp_apply_dft_to_dft(&mut res_dft, &a_dft, &rhs.data, scratch2);
module.vmp_apply_dft_to_dft(&mut res_dft, &a_dft, &rhs.data, scratch_2);
} else {
module.vmp_apply_dft_to_dft_add(&mut res_dft, &a_dft, &rhs.data, di, scratch2);
module.vmp_apply_dft_to_dft_add(&mut res_dft, &a_dft, &rhs.data, di, scratch_2);
}
});
}
let res_big: VecZnxBig<&mut [u8], B> = module.vec_znx_idft_consume(res_dft);
let res_big: VecZnxBig<&mut [u8], B> = module.vec_znx_idft_apply_consume(res_dft);
(0..cols).for_each(|i| {
module.vec_znx_big_normalize(basek, &mut self.data, i, &res_big, i, scratch1);
module.vec_znx_big_normalize(basek, &mut self.data, i, &res_big, i, scratch_1);
});
}
@@ -148,16 +148,81 @@ impl<DataSelf: DataMut> GLWECiphertext<DataSelf> {
Module<B>: VecZnxDftAllocBytes
+ VmpApplyDftToDftTmpBytes
+ VecZnxNormalizeTmpBytes
+ DFT<B>
+ VecZnxDftApply<B>
+ VmpApplyDftToDft<B>
+ VmpApplyDftToDftAdd<B>
+ IDFTConsume<B>
+ VecZnxIdftApplyConsume<B>
+ VecZnxBigNormalize<B>,
Scratch<B>: TakeVecZnxDft<B> + ScratchAvailable,
{
unsafe {
let self_ptr: *mut GLWECiphertext<DataSelf> = self as *mut GLWECiphertext<DataSelf>;
self.external_product(module, &*self_ptr, rhs, scratch);
let basek: usize = self.basek();
#[cfg(debug_assertions)]
{
use poulpy_hal::api::ScratchAvailable;
assert_eq!(rhs.rank(), self.rank());
assert_eq!(self.basek(), basek);
assert_eq!(rhs.n(), self.n());
assert!(
scratch.available()
>= GLWECiphertext::external_product_scratch_space(
module,
self.basek(),
self.k(),
self.k(),
rhs.k(),
rhs.digits(),
rhs.rank(),
)
);
}
let cols: usize = rhs.rank() + 1;
let digits: usize = rhs.digits();
let (mut res_dft, scratch_1) = scratch.take_vec_znx_dft(self.n(), cols, rhs.size()); // Todo optimise
let (mut a_dft, scratch_2) = scratch_1.take_vec_znx_dft(self.n(), cols, self.size().div_ceil(digits));
a_dft.data_mut().fill(0);
{
(0..digits).for_each(|di| {
// (lhs.size() + di) / digits = (a - (digit - di - 1)).div_ceil(digits)
a_dft.set_size((self.size() + di) / digits);
// Small optimization for digits > 2
// VMP produce some error e, and since we aggregate vmp * 2^{di * B}, then
// we also aggregate ei * 2^{di * B}, with the largest error being ei * 2^{(digits-1) * B}.
// As such we can ignore the last digits-2 limbs safely of the sum of vmp products.
// It is possible to further ignore the last digits-1 limbs, but this introduce
// ~0.5 to 1 bit of additional noise, and thus not chosen here to ensure that the same
// noise is kept with respect to the ideal functionality.
res_dft.set_size(rhs.size() - ((digits - di) as isize - 2).max(0) as usize);
(0..cols).for_each(|col_i| {
module.vec_znx_dft_apply(
digits,
digits - 1 - di,
&mut a_dft,
col_i,
&self.data,
col_i,
);
});
if di == 0 {
module.vmp_apply_dft_to_dft(&mut res_dft, &a_dft, &rhs.data, scratch_2);
} else {
module.vmp_apply_dft_to_dft_add(&mut res_dft, &a_dft, &rhs.data, di, scratch_2);
}
});
}
let res_big: VecZnxBig<&mut [u8], B> = module.vec_znx_idft_apply_consume(res_dft);
(0..cols).for_each(|i| {
module.vec_znx_big_normalize(basek, &mut self.data, i, &res_big, i, scratch_1);
});
}
}