mirror of
https://github.com/arnaucube/poulpy.git
synced 2026-02-10 13:16:44 +01:00
Changes to build on macos (#92)
* update spqlios-airthmetic to latest e8aed63 * finally, builds on macos * clippy --------- Co-authored-by: Pro7ech <jeanphilippe.bossuat@gmail.com>
This commit is contained in:
@@ -10,7 +10,6 @@ mod vmp;
|
||||
mod zn;
|
||||
mod znx_avx;
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
pub struct FFT64Avx {}
|
||||
pub use reim::*;
|
||||
|
||||
|
||||
@@ -2,7 +2,6 @@
|
||||
/// Ensured for inputs absolute value bounded by 2^50-1
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "fma")]
|
||||
pub fn reim_from_znx_i64_bnd50_fma(res: &mut [f64], a: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -58,7 +57,6 @@ pub fn reim_from_znx_i64_bnd50_fma(res: &mut [f64], a: &[i64]) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma,avx2")`);
|
||||
#[allow(dead_code)]
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_to_znx_i64_bnd63_avx2_fma(res: &mut [i64], divisor: f64, a: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -141,7 +139,6 @@ pub fn reim_to_znx_i64_bnd63_avx2_fma(res: &mut [i64], divisor: f64, a: &[f64])
|
||||
/// Only ensured for inputs absoluate value bounded by 2^63-1
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma,avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_to_znx_i64_inplace_bnd63_avx2_fma(res: &mut [f64], divisor: f64) {
|
||||
let sign_mask: u64 = 0x8000000000000000u64;
|
||||
@@ -220,7 +217,6 @@ pub fn reim_to_znx_i64_inplace_bnd63_avx2_fma(res: &mut [f64], divisor: f64) {
|
||||
/// Only ensured for inputs absoluate value bounded by 2^50-1
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports FMA (e.g., via `is_x86_feature_detected!("fma")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "fma")]
|
||||
#[allow(dead_code)]
|
||||
pub fn reim_to_znx_i64_avx2_bnd50_fma(res: &mut [i64], divisor: f64, a: &[f64]) {
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::arch::x86_64::{
|
||||
|
||||
use crate::cpu_fft64_avx::reim::{as_arr, as_arr_mut};
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub(crate) fn fft_avx2_fma(m: usize, omg: &[f64], data: &mut [f64]) {
|
||||
if m < 16 {
|
||||
@@ -35,7 +34,6 @@ unsafe extern "sysv64" {
|
||||
unsafe fn fft16_avx2_fma_asm(re: *mut f64, im: *mut f64, omg: *const f64);
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn fft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
|
||||
unsafe {
|
||||
@@ -43,7 +41,6 @@ fn fft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn fft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
|
||||
if m <= 2048 {
|
||||
@@ -58,7 +55,6 @@ fn fft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mu
|
||||
pos
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn fft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
|
||||
let log_m: usize = (usize::BITS - (m - 1).leading_zeros()) as usize;
|
||||
@@ -99,7 +95,6 @@ fn fft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mu
|
||||
pos
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn twiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2]) {
|
||||
unsafe {
|
||||
@@ -140,7 +135,6 @@ fn twiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2])
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn bitwiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64; 4]) {
|
||||
unsafe {
|
||||
@@ -228,7 +222,6 @@ fn bitwiddle_fft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64;
|
||||
fn test_fft_avx2_fma() {
|
||||
use super::*;
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn internal(log_m: usize) {
|
||||
use poulpy_hal::reference::fft64::reim::ReimFFTRef;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_add_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -31,7 +30,6 @@ pub fn reim_add_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_add_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -59,7 +57,6 @@ pub fn reim_add_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_sub_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -90,7 +87,6 @@ pub fn reim_sub_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_sub_ab_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -118,7 +114,6 @@ pub fn reim_sub_ab_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_sub_ba_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -146,7 +141,6 @@ pub fn reim_sub_ba_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_negate_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -177,7 +171,6 @@ pub fn reim_negate_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_negate_inplace_avx2_fma(res: &mut [f64]) {
|
||||
use std::arch::x86_64::{__m256d, _mm256_loadu_pd, _mm256_storeu_pd, _mm256_xor_pd};
|
||||
@@ -200,7 +193,6 @@ pub fn reim_negate_inplace_avx2_fma(res: &mut [f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_addmul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -253,7 +245,6 @@ pub fn reim_addmul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_mul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -305,7 +296,6 @@ pub fn reim_mul_avx2_fma(res: &mut [f64], a: &[f64], b: &[f64]) {
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim_mul_inplace_avx2_fma(res: &mut [f64], a: &[f64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
|
||||
@@ -5,7 +5,6 @@ use std::arch::x86_64::{
|
||||
|
||||
use crate::cpu_fft64_avx::reim::{as_arr, as_arr_mut};
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub(crate) fn ifft_avx2_fma(m: usize, omg: &[f64], data: &mut [f64]) {
|
||||
if m < 16 {
|
||||
@@ -34,7 +33,6 @@ unsafe extern "sysv64" {
|
||||
unsafe fn ifft16_avx2_fma_asm(re: *mut f64, im: *mut f64, omg: *const f64);
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn ifft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
|
||||
unsafe {
|
||||
@@ -42,7 +40,6 @@ fn ifft16_avx2_fma(re: &mut [f64; 16], im: &mut [f64; 16], omg: &[f64; 16]) {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn ifft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
|
||||
if m <= 2048 {
|
||||
@@ -56,7 +53,6 @@ fn ifft_rec_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], m
|
||||
pos
|
||||
}
|
||||
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn ifft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], mut pos: usize) -> usize {
|
||||
let log_m: usize = (usize::BITS - (m - 1).leading_zeros()) as usize;
|
||||
@@ -95,7 +91,6 @@ fn ifft_bfs_16_avx2_fma(m: usize, re: &mut [f64], im: &mut [f64], omg: &[f64], m
|
||||
pos
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn inv_twiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64; 2]) {
|
||||
unsafe {
|
||||
@@ -133,7 +128,6 @@ fn inv_twiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: [f64
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn inv_bitwiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[f64; 4]) {
|
||||
unsafe {
|
||||
@@ -221,7 +215,6 @@ fn inv_bitwiddle_ifft_avx2_fma(h: usize, re: &mut [f64], im: &mut [f64], omg: &[
|
||||
fn test_ifft_avx2_fma() {
|
||||
use super::*;
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
fn internal(log_m: usize) {
|
||||
use poulpy_hal::reference::fft64::reim::ReimIFFTRef;
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx")]
|
||||
pub fn reim4_extract_1blk_from_reim_avx(m: usize, rows: usize, blk: usize, dst: &mut [f64], src: &[f64]) {
|
||||
use core::arch::x86_64::{__m256d, _mm256_loadu_pd, _mm256_storeu_pd};
|
||||
@@ -23,7 +22,6 @@ pub fn reim4_extract_1blk_from_reim_avx(m: usize, rows: usize, blk: usize, dst:
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim4_save_1blk_to_reim_avx<const OVERWRITE: bool>(m: usize, blk: usize, dst: &mut [f64], src: &[f64]) {
|
||||
use core::arch::x86_64::{__m256d, _mm256_add_pd, _mm256_loadu_pd, _mm256_storeu_pd};
|
||||
@@ -51,7 +49,6 @@ pub fn reim4_save_1blk_to_reim_avx<const OVERWRITE: bool>(m: usize, blk: usize,
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2,fma")]
|
||||
pub fn reim4_save_2blk_to_reim_avx<const OVERWRITE: bool>(
|
||||
m: usize, //
|
||||
@@ -94,7 +91,6 @@ pub fn reim4_save_2blk_to_reim_avx<const OVERWRITE: bool>(
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2", enable = "fma")]
|
||||
pub fn reim4_vec_mat1col_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) {
|
||||
use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd};
|
||||
@@ -146,7 +142,6 @@ pub fn reim4_vec_mat1col_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2", enable = "fma")]
|
||||
pub fn reim4_vec_mat2cols_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) {
|
||||
use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_fmsub_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd};
|
||||
@@ -216,7 +211,6 @@ pub fn reim4_vec_mat2cols_product_avx(nrows: usize, dst: &mut [f64], u: &[f64],
|
||||
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2", enable = "fma")]
|
||||
pub fn reim4_vec_mat2cols_2ndcol_product_avx(nrows: usize, dst: &mut [f64], u: &[f64], v: &[f64]) {
|
||||
use core::arch::x86_64::{__m256d, _mm256_fmadd_pd, _mm256_fmsub_pd, _mm256_loadu_pd, _mm256_setzero_pd, _mm256_storeu_pd};
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_add_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -41,7 +40,6 @@ pub fn znx_add_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_add_inplace_avx(res: &mut [i64], a: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
|
||||
@@ -17,7 +17,6 @@ fn inv_mod_pow2(p: usize, bits: u32) -> usize {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2", enable = "fma")]
|
||||
pub fn znx_automorphism_avx(p: i64, res: &mut [i64], a: &[i64]) {
|
||||
debug_assert_eq!(res.len(), a.len());
|
||||
@@ -99,12 +98,12 @@ pub fn znx_automorphism_avx(p: i64, res: &mut [i64], a: &[i64]) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(all(test, any(target_arch = "x86_64", target_arch = "x86")))]
|
||||
mod tests {
|
||||
use poulpy_hal::reference::znx::znx_automorphism_ref;
|
||||
|
||||
use super::*;
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2", enable = "fma")]
|
||||
fn test_znx_automorphism_internal() {
|
||||
let a: [i64; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16];
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_negate_avx(res: &mut [i64], src: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -37,7 +36,6 @@ pub fn znx_negate_avx(res: &mut [i64], src: &[i64]) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_negate_inplace_avx(res: &mut [i64]) {
|
||||
let n: usize = res.len();
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
use std::arch::x86_64::__m256i;
|
||||
|
||||
/// Vector forms of those constants (broadcast to all lanes)
|
||||
@@ -6,7 +5,6 @@ use std::arch::x86_64::__m256i;
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn normalize_consts_avx(basek: usize) -> (__m256i, __m256i, __m256i, __m256i) {
|
||||
use std::arch::x86_64::_mm256_set1_epi64x;
|
||||
@@ -30,7 +28,6 @@ fn normalize_consts_avx(basek: usize) -> (__m256i, __m256i, __m256i, __m256i) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn get_digit_avx(x: __m256i, mask_k: __m256i, sign_k: __m256i) -> __m256i {
|
||||
use std::arch::x86_64::{_mm256_and_si256, _mm256_sub_epi64, _mm256_xor_si256};
|
||||
@@ -45,7 +42,6 @@ fn get_digit_avx(x: __m256i, mask_k: __m256i, sign_k: __m256i) -> __m256i {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
unsafe fn get_carry_avx(
|
||||
x: __m256i,
|
||||
@@ -66,7 +62,6 @@ unsafe fn get_carry_avx(
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_first_step_carry_only_avx(basek: usize, lsh: usize, x: &[i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -118,7 +113,6 @@ pub fn znx_normalize_first_step_carry_only_avx(basek: usize, lsh: usize, x: &[i6
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_first_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -189,7 +183,6 @@ pub fn znx_normalize_first_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_first_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -272,7 +265,6 @@ pub fn znx_normalize_first_step_avx(basek: usize, lsh: usize, x: &mut [i64], a:
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_middle_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -352,7 +344,6 @@ pub fn znx_normalize_middle_step_inplace_avx(basek: usize, lsh: usize, x: &mut [
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_middle_step_carry_only_avx(basek: usize, lsh: usize, x: &[i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -430,7 +421,6 @@ pub fn znx_normalize_middle_step_carry_only_avx(basek: usize, lsh: usize, x: &[i
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_middle_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -520,7 +510,6 @@ pub fn znx_normalize_middle_step_avx(basek: usize, lsh: usize, x: &mut [i64], a:
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_final_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -591,7 +580,6 @@ pub fn znx_normalize_final_step_inplace_avx(basek: usize, lsh: usize, x: &mut [i
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_normalize_final_step_avx(basek: usize, lsh: usize, x: &mut [i64], a: &[i64], carry: &mut [i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -668,7 +656,6 @@ pub fn znx_normalize_final_step_avx(basek: usize, lsh: usize, x: &mut [i64], a:
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, any(target_arch = "x86_64", target_arch = "x86")))]
|
||||
mod tests {
|
||||
use poulpy_hal::reference::znx::{
|
||||
get_carry, get_digit, znx_normalize_final_step_inplace_ref, znx_normalize_final_step_ref,
|
||||
@@ -680,6 +667,7 @@ mod tests {
|
||||
|
||||
use std::arch::x86_64::{_mm256_loadu_si256, _mm256_storeu_si256};
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_get_digit_avx_internal() {
|
||||
let basek: usize = 12;
|
||||
@@ -716,6 +704,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_get_carry_avx_internal() {
|
||||
let basek: usize = 12;
|
||||
@@ -754,6 +743,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_znx_normalize_first_step_inplace_avx_internal() {
|
||||
let mut y0: [i64; 4] = [
|
||||
@@ -798,6 +788,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_znx_normalize_middle_step_inplace_avx_internal() {
|
||||
let mut y0: [i64; 4] = [
|
||||
@@ -842,6 +833,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_znx_normalize_final_step_inplace_avx_internal() {
|
||||
let mut y0: [i64; 4] = [
|
||||
@@ -886,6 +878,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_znx_normalize_first_step_avx_internal() {
|
||||
let mut y0: [i64; 4] = [
|
||||
@@ -931,6 +924,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_znx_normalize_middle_step_avx_internal() {
|
||||
let mut y0: [i64; 4] = [
|
||||
@@ -976,6 +970,7 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
#[target_feature(enable = "avx2")]
|
||||
fn test_znx_normalize_final_step_avx_internal() {
|
||||
let mut y0: [i64; 4] = [
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_sub_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -41,7 +40,6 @@ pub fn znx_sub_avx(res: &mut [i64], a: &[i64], b: &[i64]) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_sub_ab_inplace_avx(res: &mut [i64], a: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
@@ -78,7 +76,6 @@ pub fn znx_sub_ab_inplace_avx(res: &mut [i64], a: &[i64]) {
|
||||
/// # Safety
|
||||
/// Caller must ensure the CPU supports AVX2 (e.g., via `is_x86_feature_detected!("avx2")`);
|
||||
/// all inputs must have the same length and must not alias.
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub fn znx_sub_ba_inplace_avx(res: &mut [i64], a: &[i64]) {
|
||||
#[cfg(debug_assertions)]
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
#[target_feature(enable = "avx2")]
|
||||
pub unsafe fn znx_switch_ring_avx(res: &mut [i64], a: &[i64]) {
|
||||
unsafe {
|
||||
|
||||
Submodule poulpy-backend/src/cpu_spqlios/spqlios-arithmetic updated: b6938df774...e8aed6384c
@@ -1,7 +1,11 @@
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
pub mod cpu_fft64_avx;
|
||||
|
||||
pub mod cpu_fft64_ref;
|
||||
pub mod cpu_spqlios;
|
||||
|
||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
||||
pub use cpu_fft64_avx::FFT64Avx;
|
||||
|
||||
pub use cpu_fft64_ref::FFT64Ref;
|
||||
pub use cpu_spqlios::FFT64Spqlios;
|
||||
|
||||
Reference in New Issue
Block a user