Ref. + AVX code & generic tests + benches (#85)

This commit is contained in:
Jean-Philippe Bossuat
2025-09-15 16:16:11 +02:00
committed by GitHub
parent 99b9e3e10e
commit 56dbd29c59
286 changed files with 27797 additions and 7270 deletions

View File

@@ -1,60 +1,73 @@
use poulpy_hal::{
api::{TakeSlice, VecZnxIDFTTmpBytes},
api::{TakeSlice, VecZnxIdftApplyTmpBytes},
layouts::{
Backend, Data, Module, Scratch, VecZnx, VecZnxBig, VecZnxBigToMut, VecZnxDft, VecZnxDftOwned, VecZnxDftToMut,
VecZnxDftToRef, VecZnxToRef, ZnxInfos, ZnxSliceSize, ZnxView, ZnxViewMut, ZnxZero,
VecZnxDftToRef, VecZnxToRef, ZnxInfos, ZnxSliceSize, ZnxView, ZnxViewMut,
},
oep::{
DFTImpl, IDFTConsumeImpl, IDFTImpl, IDFTTmpAImpl, VecZnxDftAddImpl, VecZnxDftAddInplaceImpl, VecZnxDftAllocBytesImpl,
VecZnxDftAllocImpl, VecZnxDftCopyImpl, VecZnxDftFromBytesImpl, VecZnxDftSubABInplaceImpl, VecZnxDftSubBAInplaceImpl,
VecZnxDftSubImpl, VecZnxDftZeroImpl, VecZnxIDFTTmpBytesImpl,
VecZnxDftAddImpl, VecZnxDftAddInplaceImpl, VecZnxDftAllocBytesImpl, VecZnxDftAllocImpl, VecZnxDftApplyImpl,
VecZnxDftCopyImpl, VecZnxDftFromBytesImpl, VecZnxDftSubABInplaceImpl, VecZnxDftSubBAInplaceImpl, VecZnxDftSubImpl,
VecZnxDftZeroImpl, VecZnxIdftApplyConsumeImpl, VecZnxIdftApplyImpl, VecZnxIdftApplyTmpAImpl, VecZnxIdftApplyTmpBytesImpl,
},
reference::{
fft64::{
reim::{ReimCopy, ReimZero, reim_copy_ref, reim_negate_inplace_ref, reim_negate_ref, reim_zero_ref},
vec_znx_dft::vec_znx_dft_copy,
},
znx::znx_zero_ref,
},
};
use crate::cpu_spqlios::{
FFT64,
FFT64Spqlios,
ffi::{vec_znx_big, vec_znx_dft},
};
unsafe impl VecZnxDftFromBytesImpl<Self> for FFT64 {
unsafe impl VecZnxDftFromBytesImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_from_bytes_impl(n: usize, cols: usize, size: usize, bytes: Vec<u8>) -> VecZnxDftOwned<Self> {
VecZnxDft::<Vec<u8>, FFT64>::from_bytes(n, cols, size, bytes)
VecZnxDft::<Vec<u8>, Self>::from_bytes(n, cols, size, bytes)
}
}
unsafe impl VecZnxDftAllocBytesImpl<Self> for FFT64 {
unsafe impl VecZnxDftAllocBytesImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_alloc_bytes_impl(n: usize, cols: usize, size: usize) -> usize {
FFT64::layout_prep_word_count() * n * cols * size * size_of::<<FFT64 as Backend>::ScalarPrep>()
Self::layout_prep_word_count() * n * cols * size * size_of::<<FFT64Spqlios as Backend>::ScalarPrep>()
}
}
unsafe impl VecZnxDftAllocImpl<Self> for FFT64 {
unsafe impl VecZnxDftAllocImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_alloc_impl(n: usize, cols: usize, size: usize) -> VecZnxDftOwned<Self> {
VecZnxDftOwned::alloc(n, cols, size)
}
}
unsafe impl VecZnxIDFTTmpBytesImpl<Self> for FFT64 {
fn vec_znx_idft_tmp_bytes_impl(module: &Module<Self>) -> usize {
unsafe impl VecZnxIdftApplyTmpBytesImpl<Self> for FFT64Spqlios {
fn vec_znx_idft_apply_tmp_bytes_impl(module: &Module<Self>) -> usize {
unsafe { vec_znx_dft::vec_znx_idft_tmp_bytes(module.ptr()) as usize }
}
}
unsafe impl IDFTImpl<Self> for FFT64 {
fn idft_impl<R, A>(module: &Module<Self>, res: &mut R, res_col: usize, a: &A, a_col: usize, scratch: &mut Scratch<Self>)
where
unsafe impl VecZnxIdftApplyImpl<Self> for FFT64Spqlios {
fn vec_znx_idft_apply_impl<R, A>(
module: &Module<Self>,
res: &mut R,
res_col: usize,
a: &A,
a_col: usize,
scratch: &mut Scratch<Self>,
) where
R: VecZnxBigToMut<Self>,
A: VecZnxDftToRef<Self>,
{
let mut res: VecZnxBig<&mut [u8], FFT64> = res.to_mut();
let a: VecZnxDft<&[u8], FFT64> = a.to_ref();
let mut res: VecZnxBig<&mut [u8], Self> = res.to_mut();
let a: VecZnxDft<&[u8], Self> = a.to_ref();
#[cfg(debug_assertions)]
{
assert_eq!(res.n(), a.n())
}
let (tmp_bytes, _) = scratch.take_slice(module.vec_znx_idft_tmp_bytes());
let (tmp_bytes, _) = scratch.take_slice(module.vec_znx_idft_apply_tmp_bytes());
let min_size: usize = res.size().min(a.size());
@@ -69,47 +82,43 @@ unsafe impl IDFTImpl<Self> for FFT64 {
tmp_bytes.as_mut_ptr(),
)
});
(min_size..res.size()).for_each(|j| {
res.zero_at(res_col, j);
});
(min_size..res.size()).for_each(|j| znx_zero_ref(res.at_mut(res_col, j)));
}
}
}
unsafe impl IDFTTmpAImpl<Self> for FFT64 {
fn idft_tmp_a_impl<R, A>(module: &Module<Self>, res: &mut R, res_col: usize, a: &mut A, a_col: usize)
unsafe impl VecZnxIdftApplyTmpAImpl<Self> for FFT64Spqlios {
fn vec_znx_idft_apply_tmpa_impl<R, A>(module: &Module<Self>, res: &mut R, res_col: usize, a: &mut A, a_col: usize)
where
R: VecZnxBigToMut<Self>,
A: VecZnxDftToMut<Self>,
{
let mut res_mut: VecZnxBig<&mut [u8], FFT64> = res.to_mut();
let mut a_mut: VecZnxDft<&mut [u8], FFT64> = a.to_mut();
let mut res: VecZnxBig<&mut [u8], Self> = res.to_mut();
let mut a_mut: VecZnxDft<&mut [u8], Self> = a.to_mut();
let min_size: usize = res_mut.size().min(a_mut.size());
let min_size: usize = res.size().min(a_mut.size());
unsafe {
(0..min_size).for_each(|j| {
vec_znx_dft::vec_znx_idft_tmp_a(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_big::vec_znx_big_t,
res.at_mut_ptr(res_col, j) as *mut vec_znx_big::vec_znx_big_t,
1_u64,
a_mut.at_mut_ptr(a_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1_u64,
)
});
(min_size..res_mut.size()).for_each(|j| {
res_mut.zero_at(res_col, j);
})
(min_size..res.size()).for_each(|j| znx_zero_ref(res.at_mut(res_col, j)))
}
}
}
unsafe impl IDFTConsumeImpl<Self> for FFT64 {
fn idft_consume_impl<D: Data>(module: &Module<Self>, mut a: VecZnxDft<D, FFT64>) -> VecZnxBig<D, FFT64>
unsafe impl VecZnxIdftApplyConsumeImpl<Self> for FFT64Spqlios {
fn vec_znx_idft_apply_consume_impl<D: Data>(module: &Module<Self>, mut a: VecZnxDft<D, Self>) -> VecZnxBig<D, Self>
where
VecZnxDft<D, FFT64>: VecZnxDftToMut<Self>,
VecZnxDft<D, Self>: VecZnxDftToMut<Self>,
{
let mut a_mut: VecZnxDft<&mut [u8], FFT64> = a.to_mut();
let mut a_mut: VecZnxDft<&mut [u8], Self> = a.to_mut();
unsafe {
// Rev col and rows because ZnxDft.sl() >= ZnxBig.sl()
@@ -130,89 +139,129 @@ unsafe impl IDFTConsumeImpl<Self> for FFT64 {
}
}
unsafe impl DFTImpl<Self> for FFT64 {
fn dft_impl<R, A>(module: &Module<Self>, step: usize, offset: usize, res: &mut R, res_col: usize, a: &A, a_col: usize)
where
unsafe impl VecZnxDftApplyImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_apply_impl<R, A>(
module: &Module<Self>,
step: usize,
offset: usize,
res: &mut R,
res_col: usize,
a: &A,
a_col: usize,
) where
R: VecZnxDftToMut<Self>,
A: VecZnxToRef,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnx<&[u8]> = a.to_ref();
let steps: usize = a_ref.size().div_ceil(step);
let min_steps: usize = res_mut.size().min(steps);
let mut res: VecZnxDft<&mut [u8], Self> = res.to_mut();
let a: VecZnx<&[u8]> = a.to_ref();
let steps: usize = a.size().div_ceil(step);
let min_steps: usize = res.size().min(steps);
unsafe {
(0..min_steps).for_each(|j| {
let limb: usize = offset + j * step;
if limb < a_ref.size() {
if limb < a.size() {
vec_znx_dft::vec_znx_dft(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1_u64,
a_ref.at_ptr(a_col, limb),
a.at_ptr(a_col, limb),
1_u64,
a_ref.sl() as u64,
a.sl() as u64,
)
}
});
(min_steps..res_mut.size()).for_each(|j| {
res_mut.zero_at(res_col, j);
});
(min_steps..res.size()).for_each(|j| reim_zero_ref(res.at_mut(res_col, j)));
}
}
}
unsafe impl VecZnxDftAddImpl<Self> for FFT64 {
unsafe impl VecZnxDftAddImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_add_impl<R, A, D>(module: &Module<Self>, res: &mut R, res_col: usize, a: &A, a_col: usize, b: &D, b_col: usize)
where
R: VecZnxDftToMut<Self>,
A: VecZnxDftToRef<Self>,
D: VecZnxDftToRef<Self>,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnxDft<&[u8], FFT64> = a.to_ref();
let b_ref: VecZnxDft<&[u8], FFT64> = b.to_ref();
let mut res: VecZnxDft<&mut [u8], Self> = res.to_mut();
let a: VecZnxDft<&[u8], Self> = a.to_ref();
let b: VecZnxDft<&[u8], Self> = b.to_ref();
let min_size: usize = res_mut.size().min(a_ref.size()).min(b_ref.size());
let res_size: usize = res.size();
let a_size: usize = a.size();
let b_size: usize = b.size();
unsafe {
(0..min_size).for_each(|j| {
vec_znx_dft::vec_dft_add(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a_ref.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
b_ref.at_ptr(b_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
if a_size <= b_size {
let sum_size: usize = a_size.min(res_size);
let cpy_size: usize = b_size.min(res_size);
(0..sum_size).for_each(|j| {
vec_znx_dft::vec_dft_add(
module.ptr(),
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
b.at_ptr(b_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
for j in sum_size..cpy_size {
reim_copy_ref(res.at_mut(res_col, j), b.at(b_col, j));
}
for j in cpy_size..res_size {
reim_zero_ref(res.at_mut(res_col, j));
}
} else {
let sum_size: usize = b_size.min(res_size);
let cpy_size: usize = a_size.min(res_size);
(0..sum_size).for_each(|j| {
vec_znx_dft::vec_dft_add(
module.ptr(),
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
b.at_ptr(b_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
for j in sum_size..cpy_size {
reim_copy_ref(res.at_mut(res_col, j), a.at(b_col, j));
}
for j in cpy_size..res_size {
reim_zero_ref(res.at_mut(res_col, j));
}
}
}
(min_size..res_mut.size()).for_each(|j| {
res_mut.zero_at(res_col, j);
})
}
}
unsafe impl VecZnxDftAddInplaceImpl<Self> for FFT64 {
unsafe impl VecZnxDftAddInplaceImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_add_inplace_impl<R, A>(module: &Module<Self>, res: &mut R, res_col: usize, a: &A, a_col: usize)
where
R: VecZnxDftToMut<Self>,
A: VecZnxDftToRef<Self>,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnxDft<&[u8], FFT64> = a.to_ref();
let mut res: VecZnxDft<&mut [u8], Self> = res.to_mut();
let a: VecZnxDft<&[u8], Self> = a.to_ref();
let min_size: usize = res_mut.size().min(a_ref.size());
let min_size: usize = res.size().min(a.size());
unsafe {
(0..min_size).for_each(|j| {
vec_znx_dft::vec_dft_add(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
res_mut.at_ptr(res_col, j) as *const vec_znx_dft::vec_znx_dft_t,
res.at_ptr(res_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
a_ref.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
@@ -220,58 +269,93 @@ unsafe impl VecZnxDftAddInplaceImpl<Self> for FFT64 {
}
}
unsafe impl VecZnxDftSubImpl<Self> for FFT64 {
unsafe impl VecZnxDftSubImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_sub_impl<R, A, D>(module: &Module<Self>, res: &mut R, res_col: usize, a: &A, a_col: usize, b: &D, b_col: usize)
where
R: VecZnxDftToMut<Self>,
A: VecZnxDftToRef<Self>,
D: VecZnxDftToRef<Self>,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnxDft<&[u8], FFT64> = a.to_ref();
let b_ref: VecZnxDft<&[u8], FFT64> = b.to_ref();
let min_size: usize = res_mut.size().min(a_ref.size()).min(b_ref.size());
let mut res: VecZnxDft<&mut [u8], Self> = res.to_mut();
let a: VecZnxDft<&[u8], Self> = a.to_ref();
let b: VecZnxDft<&[u8], Self> = b.to_ref();
unsafe {
(0..min_size).for_each(|j| {
vec_znx_dft::vec_dft_sub(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a_ref.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
b_ref.at_ptr(b_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
let res_size: usize = res.size();
let a_size: usize = a.size();
let b_size: usize = b.size();
if a_size <= b_size {
let sum_size: usize = a_size.min(res_size);
let cpy_size: usize = b_size.min(res_size);
(0..sum_size).for_each(|j| {
vec_znx_dft::vec_dft_sub(
module.ptr(),
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
b.at_ptr(b_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
for j in sum_size..cpy_size {
reim_negate_ref(res.at_mut(res_col, j), b.at(b_col, j));
}
for j in cpy_size..res_size {
reim_zero_ref(res.at_mut(res_col, j));
}
} else {
let sum_size: usize = b_size.min(res_size);
let cpy_size: usize = a_size.min(res_size);
(0..sum_size).for_each(|j| {
vec_znx_dft::vec_dft_sub(
module.ptr(),
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
b.at_ptr(b_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
for j in sum_size..cpy_size {
reim_copy_ref(res.at_mut(res_col, j), a.at(a_col, j));
}
for j in cpy_size..res_size {
reim_zero_ref(res.at_mut(res_col, j));
}
}
}
(min_size..res_mut.size()).for_each(|j| {
res_mut.zero_at(res_col, j);
})
}
}
unsafe impl VecZnxDftSubABInplaceImpl<Self> for FFT64 {
unsafe impl VecZnxDftSubABInplaceImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_sub_ab_inplace_impl<R, A>(module: &Module<Self>, res: &mut R, res_col: usize, a: &A, a_col: usize)
where
R: VecZnxDftToMut<Self>,
A: VecZnxDftToRef<Self>,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnxDft<&[u8], FFT64> = a.to_ref();
let mut res: VecZnxDft<&mut [u8], Self> = res.to_mut();
let a: VecZnxDft<&[u8], Self> = a.to_ref();
let min_size: usize = res_mut.size().min(a_ref.size());
let min_size: usize = res.size().min(a.size());
unsafe {
(0..min_size).for_each(|j| {
vec_znx_dft::vec_dft_sub(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
res_mut.at_ptr(res_col, j) as *const vec_znx_dft::vec_znx_dft_t,
res.at_ptr(res_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
a_ref.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
@@ -279,34 +363,38 @@ unsafe impl VecZnxDftSubABInplaceImpl<Self> for FFT64 {
}
}
unsafe impl VecZnxDftSubBAInplaceImpl<Self> for FFT64 {
unsafe impl VecZnxDftSubBAInplaceImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_sub_ba_inplace_impl<R, A>(module: &Module<Self>, res: &mut R, res_col: usize, a: &A, a_col: usize)
where
R: VecZnxDftToMut<Self>,
A: VecZnxDftToRef<Self>,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnxDft<&[u8], FFT64> = a.to_ref();
let mut res: VecZnxDft<&mut [u8], Self> = res.to_mut();
let a: VecZnxDft<&[u8], Self> = a.to_ref();
let min_size: usize = res_mut.size().min(a_ref.size());
let min_size: usize = res.size().min(a.size());
unsafe {
(0..min_size).for_each(|j| {
vec_znx_dft::vec_dft_sub(
module.ptr(),
res_mut.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
res.at_mut_ptr(res_col, j) as *mut vec_znx_dft::vec_znx_dft_t,
1,
a_ref.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
a.at_ptr(a_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
res_mut.at_ptr(res_col, j) as *const vec_znx_dft::vec_znx_dft_t,
res.at_ptr(res_col, j) as *const vec_znx_dft::vec_znx_dft_t,
1,
);
});
for j in min_size..res.size() {
reim_negate_inplace_ref(res.at_mut(res_col, j));
}
}
}
}
unsafe impl VecZnxDftCopyImpl<Self> for FFT64 {
unsafe impl VecZnxDftCopyImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_copy_impl<R, A>(
_module: &Module<Self>,
step: usize,
@@ -319,27 +407,25 @@ unsafe impl VecZnxDftCopyImpl<Self> for FFT64 {
R: VecZnxDftToMut<Self>,
A: VecZnxDftToRef<Self>,
{
let mut res_mut: VecZnxDft<&mut [u8], FFT64> = res.to_mut();
let a_ref: VecZnxDft<&[u8], FFT64> = a.to_ref();
let steps: usize = a_ref.size().div_ceil(step);
let min_steps: usize = res_mut.size().min(steps);
(0..min_steps).for_each(|j| {
let limb: usize = offset + j * step;
if limb < a_ref.size() {
res_mut
.at_mut(res_col, j)
.copy_from_slice(a_ref.at(a_col, limb));
}
});
(min_steps..res_mut.size()).for_each(|j| {
res_mut.zero_at(res_col, j);
})
vec_znx_dft_copy(step, offset, res, res_col, a, a_col);
}
}
unsafe impl VecZnxDftZeroImpl<Self> for FFT64 {
impl ReimCopy for FFT64Spqlios {
#[inline(always)]
fn reim_copy(res: &mut [f64], a: &[f64]) {
reim_copy_ref(res, a);
}
}
impl ReimZero for FFT64Spqlios {
#[inline(always)]
fn reim_zero(res: &mut [f64]) {
reim_zero_ref(res);
}
}
unsafe impl VecZnxDftZeroImpl<Self> for FFT64Spqlios {
fn vec_znx_dft_zero_impl<R>(_module: &Module<Self>, res: &mut R)
where
R: VecZnxDftToMut<Self>,