Changes to build on macos (#92)

* update spqlios-airthmetic to latest e8aed63

* finally, builds on macos

* clippy

---------

Co-authored-by: Pro7ech <jeanphilippe.bossuat@gmail.com>
This commit is contained in:
Janmajayamall
2025-09-25 14:39:27 +02:00
committed by GitHub
parent af5d8a308f
commit 4da790ea6a
18 changed files with 127 additions and 154 deletions

View File

@@ -32,47 +32,6 @@ pub fn bench_fft_ref(c: &mut Criterion) {
group.finish(); group.finish();
} }
pub fn bench_fft_avx2_fma(c: &mut Criterion) {
let group_name: String = "fft_avx2_fma".to_string();
let mut group = c.benchmark_group(group_name);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")]
fn runner(m: usize) -> impl FnMut() {
let mut values: Vec<f64> = vec![0f64; m << 1];
let scale = 1.0f64 / (2 * m) as f64;
values
.iter_mut()
.enumerate()
.for_each(|(i, x)| *x = (i + 1) as f64 * scale);
let table: ReimFFTTable<f64> = ReimFFTTable::<f64>::new(m);
move || {
use poulpy_backend::cpu_fft64_avx::ReimFFTAvx;
ReimFFTAvx::reim_dft_execute(&table, &mut values);
black_box(());
}
}
if std::is_x86_feature_detected!("avx2") {
for log_m in [9, 10, 11, 12, 13, 14, 15] {
let id: BenchmarkId = BenchmarkId::from_parameter(format!("n: {}", 2 << log_m));
unsafe {
let mut runner = runner(1 << log_m);
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
}
}
} else {
eprintln!("skipping: CPU lacks avx2");
return;
}
group.finish();
}
pub fn bench_fft_spqlios(c: &mut Criterion) { pub fn bench_fft_spqlios(c: &mut Criterion) {
let group_name: String = "fft_spqlios".to_string(); let group_name: String = "fft_spqlios".to_string();
@@ -136,47 +95,6 @@ pub fn bench_ifft_ref(c: &mut Criterion) {
group.finish(); group.finish();
} }
pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
let group_name: String = "ifft_avx2_fma".to_string();
let mut group = c.benchmark_group(group_name);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")]
fn runner(m: usize) -> impl FnMut() {
let mut values: Vec<f64> = vec![0f64; m << 1];
let scale = 1.0f64 / (2 * m) as f64;
values
.iter_mut()
.enumerate()
.for_each(|(i, x)| *x = (i + 1) as f64 * scale);
let table: ReimIFFTTable<f64> = ReimIFFTTable::<f64>::new(m);
move || {
use poulpy_backend::cpu_fft64_avx::ReimIFFTAvx;
ReimIFFTAvx::reim_dft_execute(&table, &mut values);
black_box(());
}
}
if std::is_x86_feature_detected!("avx2") {
for log_m in [9, 10, 11, 12, 13, 14, 15] {
let id: BenchmarkId = BenchmarkId::from_parameter(format!("n: {}", 2 << log_m));
unsafe {
let mut runner = runner(1 << log_m);
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
}
}
} else {
eprintln!("skipping: CPU lacks avx2");
return;
}
group.finish();
}
pub fn bench_ifft_spqlios(c: &mut Criterion) { pub fn bench_ifft_spqlios(c: &mut Criterion) {
let group_name: String = "ifft_spqlios".to_string(); let group_name: String = "ifft_spqlios".to_string();
@@ -212,13 +130,98 @@ pub fn bench_ifft_spqlios(c: &mut Criterion) {
group.finish(); group.finish();
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
mod x86 {
use super::*;
#[allow(dead_code)]
pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
let group_name: String = "ifft_avx2_fma".to_string();
let mut group = c.benchmark_group(group_name);
if std::is_x86_feature_detected!("avx2") {
fn runner(m: usize) -> impl FnMut() {
let mut values: Vec<f64> = vec![0f64; m << 1];
let scale = 1.0f64 / (2 * m) as f64;
values
.iter_mut()
.enumerate()
.for_each(|(i, x)| *x = (i + 1) as f64 * scale);
let table: ReimIFFTTable<f64> = ReimIFFTTable::<f64>::new(m);
move || {
use poulpy_backend::cpu_fft64_avx::ReimIFFTAvx;
ReimIFFTAvx::reim_dft_execute(&table, &mut values);
black_box(());
}
}
for log_m in [9, 10, 11, 12, 13, 14, 15] {
let id: BenchmarkId = BenchmarkId::from_parameter(format!("n: {}", 2 << log_m));
let mut runner = runner(1 << log_m);
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
}
} else {
eprintln!("skipping: CPU lacks avx2");
return;
}
group.finish();
}
#[allow(dead_code)]
pub fn bench_fft_avx2_fma(c: &mut Criterion) {
let group_name: String = "fft_avx2_fma".to_string();
let mut group = c.benchmark_group(group_name);
if std::is_x86_feature_detected!("avx2") {
fn runner(m: usize) -> impl FnMut() {
let mut values: Vec<f64> = vec![0f64; m << 1];
let scale = 1.0f64 / (2 * m) as f64;
values
.iter_mut()
.enumerate()
.for_each(|(i, x)| *x = (i + 1) as f64 * scale);
let table: ReimFFTTable<f64> = ReimFFTTable::<f64>::new(m);
move || {
use poulpy_backend::cpu_fft64_avx::ReimFFTAvx;
ReimFFTAvx::reim_dft_execute(&table, &mut values);
black_box(());
}
}
for log_m in [9, 10, 11, 12, 13, 14, 15] {
let id: BenchmarkId = BenchmarkId::from_parameter(format!("n: {}", 2 << log_m));
let mut runner = runner(1 << log_m);
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
}
} else {
eprintln!("skipping: CPU lacks avx2");
return;
}
group.finish();
}
criterion_group!(benches_x86, bench_fft_avx2_fma, bench_ifft_avx2_fma,);
criterion_main!(benches_x86);
}
criterion_group!( criterion_group!(
benches, benches,
bench_fft_ref, bench_fft_ref,
bench_fft_avx2_fma,
bench_fft_spqlios, bench_fft_spqlios,
bench_ifft_ref, bench_ifft_ref,
bench_ifft_avx2_fma,
bench_ifft_spqlios bench_ifft_spqlios
); );
criterion_main!(benches); criterion_main!(benches);

View File

@@ -1,6 +1,6 @@
// poulpy-backend/benches/vec_znx_add.rs // poulpy-backend/benches/vec_znx_add.rs
use criterion::{Criterion, criterion_group, criterion_main}; use criterion::{Criterion, criterion_group, criterion_main};
use poulpy_backend::{FFT64Avx, FFT64Ref, FFT64Spqlios}; use poulpy_backend::{FFT64Ref, FFT64Spqlios};
use poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft; use poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft;
fn bench_vmp_apply_dft_to_dft_cpu_spqlios_fft64(c: &mut Criterion) { fn bench_vmp_apply_dft_to_dft_cpu_spqlios_fft64(c: &mut Criterion) {
@@ -11,14 +11,23 @@ fn bench_vmp_apply_dft_to_dft_cpu_ref_fft64(c: &mut Criterion) {
bench_vmp_apply_dft_to_dft::<FFT64Ref>(c, "cpu_ref::fft64"); bench_vmp_apply_dft_to_dft::<FFT64Ref>(c, "cpu_ref::fft64");
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
mod x86 {
use super::*;
use poulpy_backend::FFT64Avx;
#[allow(dead_code)]
fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(c: &mut Criterion) { fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(c: &mut Criterion) {
bench_vmp_apply_dft_to_dft::<FFT64Avx>(c, "cpu_avx::fft64"); bench_vmp_apply_dft_to_dft::<FFT64Avx>(c, "cpu_avx::fft64");
} }
criterion_group!(benches_x86, bench_vmp_apply_dft_to_dft_cpu_avx_fft64,);
criterion_main!(benches_x86);
}
criterion_group!( criterion_group!(
benches, benches,
bench_vmp_apply_dft_to_dft_cpu_spqlios_fft64, bench_vmp_apply_dft_to_dft_cpu_spqlios_fft64,
bench_vmp_apply_dft_to_dft_cpu_ref_fft64, bench_vmp_apply_dft_to_dft_cpu_ref_fft64,
bench_vmp_apply_dft_to_dft_cpu_avx_fft64,
); );
criterion_main!(benches); criterion_main!(benches);

View File

@@ -10,7 +10,6 @@ mod vmp;
mod zn; mod zn;
mod znx_avx; mod znx_avx;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub struct FFT64Avx {} pub struct FFT64Avx {}
pub use reim::*; pub use reim::*;

View File

@@ -2,7 +2,6 @@
/// Ensured for inputs absolute value bounded by 2^50-1 /// Ensured for inputs absolute value bounded by 2^50-1
/// # Safety /// # Safety
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma")`); /// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "fma")] #[target_feature(enable = "fma")]
pub fn reim_from_znx_i64_bnd50_fma(res: &mut [f64], a: &[i64]) { pub fn reim_from_znx_i64_bnd50_fma(res: &mut [f64], a: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -58,7 +57,6 @@ pub fn reim_from_znx_i64_bnd50_fma(res: &mut [f64], a: &[i64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma,avx2")`); /// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma,avx2")`);
#[allow(dead_code)] #[allow(dead_code)]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_to_znx_i64_bnd63_avx2_fma(res: &mut [i64], divisor: f64, a: &[f64]) { pub fn reim_to_znx_i64_bnd63_avx2_fma(res: &mut [i64], divisor: f64, a: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -141,7 +139,6 @@ pub fn reim_to_znx_i64_bnd63_avx2_fma(res: &mut [i64], divisor: f64, a: &[f64])
/// Only ensured for inputs absoluate value bounded by 2^63-1 /// Only ensured for inputs absoluate value bounded by 2^63-1
/// # Safety /// # Safety
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma,avx2")`); /// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma,avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_to_znx_i64_inplace_bnd63_avx2_fma(res: &mut [f64], divisor: f64) { pub fn reim_to_znx_i64_inplace_bnd63_avx2_fma(res: &mut [f64], divisor: f64) {
let sign_mask: u64 = 0x8000000000000000u64; let sign_mask: u64 = 0x8000000000000000u64;
@@ -220,7 +217,6 @@ pub fn reim_to_znx_i64_inplace_bnd63_avx2_fma(res: &mut [f64], divisor: f64) {
/// Only ensured for inputs absoluate value bounded by 2^50-1 /// Only ensured for inputs absoluate value bounded by 2^50-1
/// # Safety /// # Safety
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma")`); /// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "fma")] #[target_feature(enable = "fma")]
#[allow(dead_code)] #[allow(dead_code)]
pub fn reim_to_znx_i64_avx2_bnd50_fma(res: &mut [i64], divisor: f64, a: &[f64]) { pub fn reim_to_znx_i64_avx2_bnd50_fma(res: &mut [i64], divisor: f64, a: &[f64]) {

View File

@@ -5,7 +5,6 @@ use std::arch::x86_64::{
use crate::cpu_fft64_avx::reim::{as_arr, as_arr_mut}; use crate::cpu_fft64_avx::reim::{as_arr, as_arr_mut};
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub(crate) fn fft_avx2_fma(m: usize, omg: &[f64], data: &mut [f64]) { pub(crate) fn fft_avx2_fma(m: usize, omg: &[f64], data: &mut [f64]) {
if m < 16 { if m < 16 {
@@ -35,7 +34,6 @@ unsafe extern "sysv64" {
unsafe fn fft16_avx2_fma_asm(re: *mut f64, im: *mut f64, omg: *const f64); unsafe fn fft16_avx2_fma_asm(re: *mut f64, im: *mut f64, omg: *const f64);
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn fft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) { fn fft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
unsafe { unsafe {
@@ -43,7 +41,6 @@ fn fft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
} }
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn fft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize { fn fft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
if m <= 2048 { if m <= 2048 {
@@ -58,7 +55,6 @@ fn fft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mu
pos pos
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn fft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize { fn fft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
let log_m: usize = (usize::BITS - (m - 1).leading_zeros()) as usize; let log_m: usize = (usize::BITS - (m - 1).leading_zeros()) as usize;
@@ -99,7 +95,6 @@ fn fft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mu
pos pos
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn twiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2]) { fn twiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2]) {
unsafe { unsafe {
@@ -140,7 +135,6 @@ fn twiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2])
} }
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn bitwiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64; 4]) { fn bitwiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64; 4]) {
unsafe { unsafe {
@@ -228,7 +222,6 @@ fn bitwiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64;
fn test_fft_avx2_fma() { fn test_fft_avx2_fma() {
use super::*; use super::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn internal(log_m: usize) { fn internal(log_m: usize) {
use poulpy_hal::reference::fft64::reim::ReimFFTRef; use poulpy_hal::reference::fft64::reim::ReimFFTRef;

View File

@@ -1,6 +1,5 @@
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_add_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) { pub fn reim_add_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -31,7 +30,6 @@ pub fn reim_add_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_add_inplace_avx2_fma(res: &mut [f64], a: &[f64]) { pub fn reim_add_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -59,7 +57,6 @@ pub fn reim_add_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_sub_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) { pub fn reim_sub_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -90,7 +87,6 @@ pub fn reim_sub_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_sub_ab_inplace_avx2_fma(res: &mut [f64], a: &[f64]) { pub fn reim_sub_ab_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -118,7 +114,6 @@ pub fn reim_sub_ab_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_sub_ba_inplace_avx2_fma(res: &mut [f64], a: &[f64]) { pub fn reim_sub_ba_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -146,7 +141,6 @@ pub fn reim_sub_ba_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_negate_avx2_fma(res: &mut [f64], a: &[f64]) { pub fn reim_negate_avx2_fma(res: &mut [f64], a: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -177,7 +171,6 @@ pub fn reim_negate_avx2_fma(res: &mut [f64], a: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_negate_inplace_avx2_fma(res: &mut [f64]) { pub fn reim_negate_inplace_avx2_fma(res: &mut [f64]) {
use std::arch::x86_64::{__m256d, _mm256_loadu_pd, _mm256_storeu_pd, _mm256_xor_pd}; use std::arch::x86_64::{__m256d, _mm256_loadu_pd, _mm256_storeu_pd, _mm256_xor_pd};
@@ -200,7 +193,6 @@ pub fn reim_negate_inplace_avx2_fma(res: &mut [f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_addmul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) { pub fn reim_addmul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -253,7 +245,6 @@ pub fn reim_addmul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_mul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) { pub fn reim_mul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -305,7 +296,6 @@ pub fn reim_mul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim_mul_inplace_avx2_fma(res: &mut [f64], a: &[f64]) { pub fn reim_mul_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]

View File

@@ -5,7 +5,6 @@ use std::arch::x86_64::{
use crate::cpu_fft64_avx::reim::{as_arr, as_arr_mut}; use crate::cpu_fft64_avx::reim::{as_arr, as_arr_mut};
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub(crate) fn ifft_avx2_fma(m: usize, omg: &[f64], data: &mut [f64]) { pub(crate) fn ifft_avx2_fma(m: usize, omg: &[f64], data: &mut [f64]) {
if m < 16 { if m < 16 {
@@ -34,7 +33,6 @@ unsafe extern "sysv64" {
unsafe fn ifft16_avx2_fma_asm(re: *mut f64, im: *mut f64, omg: *const f64); unsafe fn ifft16_avx2_fma_asm(re: *mut f64, im: *mut f64, omg: *const f64);
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn ifft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) { fn ifft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
unsafe { unsafe {
@@ -42,7 +40,6 @@ fn ifft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
} }
} }
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn ifft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize { fn ifft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
if m <= 2048 { if m <= 2048 {
@@ -56,7 +53,6 @@ fn ifft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], m
pos pos
} }
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn ifft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize { fn ifft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
let log_m: usize = (usize::BITS - (m - 1).leading_zeros()) as usize; let log_m: usize = (usize::BITS - (m - 1).leading_zeros()) as usize;
@@ -95,7 +91,6 @@ fn ifft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], m
pos pos
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn inv_twiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2]) { fn inv_twiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2]) {
unsafe { unsafe {
@@ -133,7 +128,6 @@ fn inv_twiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64
} }
} }
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn inv_bitwiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64; 4]) { fn inv_bitwiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64; 4]) {
unsafe { unsafe {
@@ -221,7 +215,6 @@ fn inv_bitwiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[
fn test_ifft_avx2_fma() { fn test_ifft_avx2_fma() {
use super::*; use super::*;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
fn internal(log_m: usize) { fn internal(log_m: usize) {
use poulpy_hal::reference::fft64::reim::ReimIFFTRef; use poulpy_hal::reference::fft64::reim::ReimIFFTRef;

View File

@@ -1,6 +1,5 @@
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx")] #[target_feature(enable = "avx")]
pub fn reim4_extract_1blk_from_reim_avx(m: usize, rows: usize, blk: usize, dst: &mut [f64], src: &[f64]) { pub fn reim4_extract_1blk_from_reim_avx(m: usize, rows: usize, blk: usize, dst: &mut [f64], src: &[f64]) {
use core::arch::x86_64::{__m256d, _mm256_loadu_pd, _mm256_storeu_pd}; use core::arch::x86_64::{__m256d, _mm256_loadu_pd, _mm256_storeu_pd};
@@ -23,7 +22,6 @@ pub fn reim4_extract_1blk_from_reim_avx(m: usize, rows: usize, blk: usize, dst:
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim4_save_1blk_to_reim_avx<const OVERWRITE: bool>(m: usize, blk: usize, dst: &mut [f64], src: &[f64]) { pub fn reim4_save_1blk_to_reim_avx<const OVERWRITE: bool>(m: usize, blk: usize, dst: &mut [f64], src: &[f64]) {
use core::arch::x86_64::{__m256d, _mm256_add_pd, _mm256_loadu_pd, _mm256_storeu_pd}; use core::arch::x86_64::{__m256d, _mm256_add_pd, _mm256_loadu_pd, _mm256_storeu_pd};
@@ -51,7 +49,6 @@ pub fn reim4_save_1blk_to_reim_avx<const OVERWRITE: bool>(m: usize, blk: usize,
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2,fma")] #[target_feature(enable = "avx2,fma")]
pub fn reim4_save_2blk_to_reim_avx<const OVERWRITE: bool>( pub fn reim4_save_2blk_to_reim_avx<const OVERWRITE: bool>(
m: usize, // m: usize, //
@@ -94,7 +91,6 @@ pub fn reim4_save_2blk_to_reim_avx<const OVERWRITE: bool>(
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "fma")] #[target_feature(enable = "avx2", enable = "fma")]
pub fn reim4_vec_mat1col_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) { pub fn reim4_vec_mat1col_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) {
use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd}; use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd};
@@ -146,7 +142,6 @@ pub fn reim4_vec_mat1col_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "fma")] #[target_feature(enable = "avx2", enable = "fma")]
pub fn reim4_vec_mat2cols_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) { pub fn reim4_vec_mat2cols_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) {
use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_fmsub_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd}; use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_fmsub_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd};
@@ -216,7 +211,6 @@ pub fn reim4_vec_mat2cols_product_avx(nrows: usize, dst: &mut [f64], u: &[f64],
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "fma")] #[target_feature(enable = "avx2", enable = "fma")]
pub fn reim4_vec_mat2cols_2ndcol_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) { pub fn reim4_vec_mat2cols_2ndcol_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) {
use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_fmsub_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd}; use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_fmsub_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd};

View File

@@ -1,7 +1,6 @@
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_add_avx(res: &mut [i64], a: &[i64], b: &[i64]) { pub fn znx_add_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -41,7 +40,6 @@ pub fn znx_add_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_add_inplace_avx(res: &mut [i64], a: &[i64]) { pub fn znx_add_inplace_avx(res: &mut [i64], a: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]

View File

@@ -17,7 +17,6 @@ fn inv_mod_pow2(p: usize, bits: u32) -> usize {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2", enable = "fma")] #[target_feature(enable = "avx2", enable = "fma")]
pub fn znx_automorphism_avx(p: i64, res: &mut [i64], a: &[i64]) { pub fn znx_automorphism_avx(p: i64, res: &mut [i64], a: &[i64]) {
debug_assert_eq!(res.len(), a.len()); debug_assert_eq!(res.len(), a.len());
@@ -99,12 +98,12 @@ pub fn znx_automorphism_avx(p: i64, res: &mut [i64], a: &[i64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(all(test, any(target_arch = "x86_64", target_arch = "x86")))]
mod tests { mod tests {
use poulpy_hal::reference::znx::znx_automorphism_ref; use poulpy_hal::reference::znx::znx_automorphism_ref;
use super::*; use super::*;
#[allow(dead_code)]
#[target_feature(enable = "avx2", enable = "fma")] #[target_feature(enable = "avx2", enable = "fma")]
fn test_znx_automorphism_internal() { fn test_znx_automorphism_internal() {
let a: [i64; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; let a: [i64; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];

View File

@@ -1,7 +1,6 @@
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_negate_avx(res: &mut [i64], src: &[i64]) { pub fn znx_negate_avx(res: &mut [i64], src: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -37,7 +36,6 @@ pub fn znx_negate_avx(res: &mut [i64], src: &[i64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_negate_inplace_avx(res: &mut [i64]) { pub fn znx_negate_inplace_avx(res: &mut [i64]) {
let n: usize = res.len(); let n: usize = res.len();

View File

@@ -1,4 +1,3 @@
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::__m256i; use std::arch::x86_64::__m256i;
/// Vector forms of those constants (broadcast to all lanes) /// Vector forms of those constants (broadcast to all lanes)
@@ -6,7 +5,6 @@ use std::arch::x86_64::__m256i;
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn normalize_consts_avx(basek: usize) -> (__m256i, __m256i, __m256i, __m256i) { fn normalize_consts_avx(basek: usize) -> (__m256i, __m256i, __m256i, __m256i) {
use std::arch::x86_64::_mm256_set1_epi64x; use std::arch::x86_64::_mm256_set1_epi64x;
@@ -30,7 +28,6 @@ fn normalize_consts_avx(basek: usize) -> (__m256i, __m256i, __m256i, __m256i) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn get_digit_avx(x: __m256i, mask_k: __m256i, sign_k: __m256i) -> __m256i { fn get_digit_avx(x: __m256i, mask_k: __m256i, sign_k: __m256i) -> __m256i {
use std::arch::x86_64::{_mm256_and_si256, _mm256_sub_epi64, _mm256_xor_si256}; use std::arch::x86_64::{_mm256_and_si256, _mm256_sub_epi64, _mm256_xor_si256};
@@ -45,7 +42,6 @@ fn get_digit_avx(x: __m256i, mask_k: __m256i, sign_k: __m256i) -> __m256i {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
unsafe fn get_carry_avx( unsafe fn get_carry_avx(
x: __m256i, x: __m256i,
@@ -66,7 +62,6 @@ unsafe fn get_carry_avx(
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_first_step_carry_only_avx(basek: usize, lsh: usize, x: &[i64], carry: &mut [i64]) { pub fn znx_normalize_first_step_carry_only_avx(basek: usize, lsh: usize, x: &[i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -118,7 +113,6 @@ pub fn znx_normalize_first_step_carry_only_avx(basek: usize, lsh: usize, x: &[i6
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_first_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) { pub fn znx_normalize_first_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -189,7 +183,6 @@ pub fn znx_normalize_first_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_first_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) { pub fn znx_normalize_first_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -272,7 +265,6 @@ pub fn znx_normalize_first_step_avx(basek: usize, lsh: usize, x: &mut [i64], a:
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_middle_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) { pub fn znx_normalize_middle_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -352,7 +344,6 @@ pub fn znx_normalize_middle_step_inplace_avx(basek: usize, lsh: usize, x: &mut [
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_middle_step_carry_only_avx(basek: usize, lsh: usize, x: &[i64], carry: &mut [i64]) { pub fn znx_normalize_middle_step_carry_only_avx(basek: usize, lsh: usize, x: &[i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -430,7 +421,6 @@ pub fn znx_normalize_middle_step_carry_only_avx(basek: usize, lsh: usize, x: &[i
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_middle_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) { pub fn znx_normalize_middle_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -520,7 +510,6 @@ pub fn znx_normalize_middle_step_avx(basek: usize, lsh: usize, x: &mut [i64], a:
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_final_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) { pub fn znx_normalize_final_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -591,7 +580,6 @@ pub fn znx_normalize_final_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_normalize_final_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) { pub fn znx_normalize_final_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -668,7 +656,6 @@ pub fn znx_normalize_final_step_avx(basek: usize, lsh: usize, x: &mut [i64], a:
} }
} }
#[cfg(all(test, any(target_arch = "x86_64", target_arch = "x86")))]
mod tests { mod tests {
use poulpy_hal::reference::znx::{ use poulpy_hal::reference::znx::{
get_carry, get_digit, znx_normalize_final_step_inplace_ref, znx_normalize_final_step_ref, get_carry, get_digit, znx_normalize_final_step_inplace_ref, znx_normalize_final_step_ref,
@@ -680,6 +667,7 @@ mod tests {
use std::arch::x86_64::{_mm256_loadu_si256, _mm256_storeu_si256}; use std::arch::x86_64::{_mm256_loadu_si256, _mm256_storeu_si256};
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_get_digit_avx_internal() { fn test_get_digit_avx_internal() {
let basek: usize = 12; let basek: usize = 12;
@@ -716,6 +704,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_get_carry_avx_internal() { fn test_get_carry_avx_internal() {
let basek: usize = 12; let basek: usize = 12;
@@ -754,6 +743,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_znx_normalize_first_step_inplace_avx_internal() { fn test_znx_normalize_first_step_inplace_avx_internal() {
let mut y0: [i64; 4] = [ let mut y0: [i64; 4] = [
@@ -798,6 +788,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_znx_normalize_middle_step_inplace_avx_internal() { fn test_znx_normalize_middle_step_inplace_avx_internal() {
let mut y0: [i64; 4] = [ let mut y0: [i64; 4] = [
@@ -842,6 +833,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_znx_normalize_final_step_inplace_avx_internal() { fn test_znx_normalize_final_step_inplace_avx_internal() {
let mut y0: [i64; 4] = [ let mut y0: [i64; 4] = [
@@ -886,6 +878,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_znx_normalize_first_step_avx_internal() { fn test_znx_normalize_first_step_avx_internal() {
let mut y0: [i64; 4] = [ let mut y0: [i64; 4] = [
@@ -931,6 +924,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_znx_normalize_middle_step_avx_internal() { fn test_znx_normalize_middle_step_avx_internal() {
let mut y0: [i64; 4] = [ let mut y0: [i64; 4] = [
@@ -976,6 +970,7 @@ mod tests {
} }
} }
#[allow(dead_code)]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
fn test_znx_normalize_final_step_avx_internal() { fn test_znx_normalize_final_step_avx_internal() {
let mut y0: [i64; 4] = [ let mut y0: [i64; 4] = [

View File

@@ -1,7 +1,6 @@
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_sub_avx(res: &mut [i64], a: &[i64], b: &[i64]) { pub fn znx_sub_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -41,7 +40,6 @@ pub fn znx_sub_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_sub_ab_inplace_avx(res: &mut [i64], a: &[i64]) { pub fn znx_sub_ab_inplace_avx(res: &mut [i64], a: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]
@@ -78,7 +76,6 @@ pub fn znx_sub_ab_inplace_avx(res: &mut [i64], a: &[i64]) {
/// # Safety /// # Safety
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`); /// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
/// all inputs must have the same length and must not alias. /// all inputs must have the same length and must not alias.
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub fn znx_sub_ba_inplace_avx(res: &mut [i64], a: &[i64]) { pub fn znx_sub_ba_inplace_avx(res: &mut [i64], a: &[i64]) {
#[cfg(debug_assertions)] #[cfg(debug_assertions)]

View File

@@ -1,4 +1,3 @@
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "avx2")] #[target_feature(enable = "avx2")]
pub unsafe fn znx_switch_ring_avx(res: &mut [i64], a: &[i64]) { pub unsafe fn znx_switch_ring_avx(res: &mut [i64], a: &[i64]) {
unsafe { unsafe {

View File

@@ -1,7 +1,11 @@
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub mod cpu_fft64_avx; pub mod cpu_fft64_avx;
pub mod cpu_fft64_ref; pub mod cpu_fft64_ref;
pub mod cpu_spqlios; pub mod cpu_spqlios;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
pub use cpu_fft64_avx::FFT64Avx; pub use cpu_fft64_avx::FFT64Avx;
pub use cpu_fft64_ref::FFT64Ref; pub use cpu_fft64_ref::FFT64Ref;
pub use cpu_spqlios::FFT64Spqlios; pub use cpu_spqlios::FFT64Spqlios;

View File

@@ -122,6 +122,7 @@ backend_test_suite!(
} }
); );
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(test)] #[cfg(test)]
backend_test_suite!( backend_test_suite!(
mod cpu_avx, mod cpu_avx,

View File

@@ -7,14 +7,18 @@ use poulpy_core::{
}; };
use std::time::Instant; use std::time::Instant;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
use poulpy_backend::FFT64Avx as BackendImpl;
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
use poulpy_backend::FFT64Ref as BackendImpl;
use poulpy_hal::{ use poulpy_hal::{
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, ZnNormalizeInplace}, api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, ZnNormalizeInplace},
layouts::{Module, ScalarZnx, ScratchOwned, ZnxView, ZnxViewMut}, layouts::{Module, ScalarZnx, ScratchOwned, ZnxView, ZnxViewMut},
source::Source, source::Source,
}; };
use poulpy_backend::FFT64Avx;
use poulpy_schemes::tfhe::{ use poulpy_schemes::tfhe::{
blind_rotation::CGGI, blind_rotation::CGGI,
circuit_bootstrapping::{ circuit_bootstrapping::{
@@ -27,7 +31,7 @@ fn main() {
let n_glwe: usize = 1024; let n_glwe: usize = 1024;
// Module provides access to the backend arithmetic // Module provides access to the backend arithmetic
let module: Module<FFT64Avx> = Module::<FFT64Avx>::new(n_glwe as u64); let module: Module<BackendImpl> = Module::<BackendImpl>::new(n_glwe as u64);
// Base 2 loga // Base 2 loga
let basek: usize = 13; let basek: usize = 13;
@@ -75,7 +79,7 @@ fn main() {
let k_tsk: usize = (rows_tsk + 1) * basek; let k_tsk: usize = (rows_tsk + 1) * basek;
// Scratch space (4MB) // Scratch space (4MB)
let mut scratch: ScratchOwned<FFT64Avx> = ScratchOwned::alloc(1 << 22); let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(1 << 22);
// Secret key sampling source // Secret key sampling source
let mut source_xs: Source = Source::new([1u8; 32]); let mut source_xs: Source = Source::new([1u8; 32]);
@@ -97,7 +101,7 @@ fn main() {
// sk_glwe.fill_zero(); // sk_glwe.fill_zero();
// GLWE secret prepared (opaque backend dependant write only struct) // GLWE secret prepared (opaque backend dependant write only struct)
let sk_glwe_prepared: GLWESecretPrepared<Vec<u8>, FFT64Avx> = sk_glwe.prepare_alloc(&module, scratch.borrow()); let sk_glwe_prepared: GLWESecretPrepared<Vec<u8>, BackendImpl> = sk_glwe.prepare_alloc(&module, scratch.borrow());
// Plaintext value to circuit bootstrap // Plaintext value to circuit bootstrap
let data: i64 = 1 % (1 << k_lwe_pt); let data: i64 = 1 % (1 << k_lwe_pt);
@@ -142,7 +146,8 @@ fn main() {
let mut res: GGSWCiphertext<Vec<u8>> = GGSWCiphertext::alloc(n_glwe, basek, k_ggsw_res, rows_ggsw_res, 1, rank); let mut res: GGSWCiphertext<Vec<u8>> = GGSWCiphertext::alloc(n_glwe, basek, k_ggsw_res, rows_ggsw_res, 1, rank);
// Circuit bootstrapping key prepared (opaque backend dependant write only struct) // Circuit bootstrapping key prepared (opaque backend dependant write only struct)
let cbt_prepared: CircuitBootstrappingKeyPrepared<Vec<u8>, CGGI, FFT64Avx> = cbt_key.prepare_alloc(&module, scratch.borrow()); let cbt_prepared: CircuitBootstrappingKeyPrepared<Vec<u8>, CGGI, BackendImpl> =
cbt_key.prepare_alloc(&module, scratch.borrow());
// Apply circuit bootstrapping: LWE(data * 2^{- (k_lwe_pt + 2)}) -> GGSW(data) // Apply circuit bootstrapping: LWE(data * 2^{- (k_lwe_pt + 2)}) -> GGSW(data)
let now: Instant = Instant::now(); let now: Instant = Instant::now();
@@ -193,7 +198,7 @@ fn main() {
); );
// Prepare GGSW output of circuit bootstrapping (opaque backend dependant write only struct) // Prepare GGSW output of circuit bootstrapping (opaque backend dependant write only struct)
let res_prepared: GGSWCiphertextPrepared<Vec<u8>, FFT64Avx> = res.prepare_alloc(&module, scratch.borrow()); let res_prepared: GGSWCiphertextPrepared<Vec<u8>, BackendImpl> = res.prepare_alloc(&module, scratch.borrow());
// Apply GLWE x GGSW // Apply GLWE x GGSW
ct_glwe.external_product_inplace(&module, &res_prepared, scratch.borrow()); ct_glwe.external_product_inplace(&module, &res_prepared, scratch.borrow());