Merge pull request #123 from phantomzone-org/non-avx-build

Improvement to non-avx/x86 platform
This commit is contained in:
Jean-Philippe Bossuat
2025-11-21 17:16:44 +01:00
committed by GitHub
25 changed files with 370 additions and 113 deletions

View File

@@ -11,7 +11,7 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: Checkout code
- name: Checkout
uses: actions/checkout@v4
with:
submodules: recursive
@@ -21,7 +21,7 @@ jobs:
with:
components: clippy, rustfmt
- name: Cache cargo dependencies
- name: Cache cargo deps
uses: actions/cache@v4
with:
path: |
@@ -32,14 +32,48 @@ jobs:
restore-keys: |
${{ runner.os }}-cargo-
- name: Build
run: cargo build --all-targets
- name: Clippy (deny warnings)
run: cargo clippy --workspace --all-targets --all-features
# Detect whether runner supports AVX2 + FMA
- name: Detect AVX support
id: avxcheck
run: |
if lscpu | grep -qi avx2 && lscpu | grep -qi fma; then
echo "supported=true" >> $GITHUB_OUTPUT
else
echo "supported=false" >> $GITHUB_OUTPUT
fi
# rustfmt always runs — unrelated to AVX support
- name: rustfmt (check only)
run: cargo fmt --all --check
- name: Run tests
run: cargo test --all
# Build / lint / test WITH AVX
- name: Build (AVX enabled)
if: steps.avxcheck.outputs.supported == 'true'
run: |
RUSTFLAGS="-C target-feature=+avx2,+fma" \
cargo build --workspace --all-targets --features enable-avx
- name: Clippy (AVX enabled)
if: steps.avxcheck.outputs.supported == 'true'
run: |
RUSTFLAGS="-C target-feature=+avx2,+fma" \
cargo clippy --workspace --all-targets --features enable-avx -- -D warnings
- name: Tests (AVX enabled)
if: steps.avxcheck.outputs.supported == 'true'
run: |
RUSTFLAGS="-C target-feature=+avx2,+fma" \
cargo test --workspace --features enable-avx
# Build / lint / test WITHOUT AVX
- name: Build (portable mode)
if: steps.avxcheck.outputs.supported == 'false'
run: cargo build --workspace --all-targets
- name: Clippy (portable mode)
if: steps.avxcheck.outputs.supported == 'false'
run: cargo clippy --workspace --all-targets -- -D warnings
- name: Tests (portable mode)
if: steps.avxcheck.outputs.supported == 'false'
run: cargo test --workspace

View File

@@ -8,12 +8,16 @@ repository = "https://github.com/phantomzone-org/poulpy"
homepage = "https://github.com/phantomzone-org/poulpy"
documentation = "https://docs.rs/poulpy"
[features]
enable-avx = ["dep:poulpy-cpu-avx"]
default = ["dep:poulpy-cpu-ref"]
[dependencies]
rug = {workspace = true}
criterion = {workspace = true}
poulpy-hal = {workspace = true}
poulpy-cpu-avx = {workspace = true}
poulpy-cpu-ref = {workspace = true}
poulpy-cpu-avx = {workspace = true, optional = true}
poulpy-cpu-ref = {workspace = true, optional = true}
itertools = {workspace = true}
byteorder = {workspace = true}
bytemuck = {workspace = true}

View File

@@ -6,7 +6,12 @@ use std::hint::black_box;
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use poulpy_cpu_ref::FFT64Ref;
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
use poulpy_hal::{
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow},
layouts::{Module, ScalarZnx, ScratchOwned},
@@ -26,7 +31,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
}
fn runner(p: Params) -> impl FnMut() {
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
let n: Degree = Degree(module.n() as u32);
let base2k: Base2K = p.base2k;
@@ -42,8 +47,8 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
n,
base2k,
k: k_ggsw,
dnum: dnum,
dsize: dsize,
dnum,
dsize,
rank,
};
@@ -66,7 +71,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
let mut ct_glwe_out: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_out_layout);
let pt_rgsw: ScalarZnx<Vec<u8>> = ScalarZnx::alloc(n.into(), 1);
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout)
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout)
| GLWE::external_product_tmp_bytes(&module, &glwe_out_layout, &glwe_in_layout, &ggsw_layout),
@@ -79,7 +84,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
let mut sk: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_in_layout);
sk.fill_ternary_prob(0.5, &mut source_xs);
let mut sk_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
let mut sk_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
sk_dft.prepare(&module, &sk);
ct_ggsw.encrypt_sk(
@@ -99,7 +104,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
scratch.borrow(),
);
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, FFT64Ref> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, BackendImpl> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow());
move || {
@@ -138,7 +143,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
}
fn runner(p: Params) -> impl FnMut() {
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
let n: Degree = Degree(module.n() as u32);
let base2k: Base2K = p.base2k;
@@ -153,8 +158,8 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
n,
base2k,
k: k_ggsw,
dnum: dnum,
dsize: dsize,
dnum,
dsize,
rank,
};
@@ -169,7 +174,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
let mut ct_glwe: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_layout);
let pt_rgsw: ScalarZnx<Vec<u8>> = ScalarZnx::alloc(n.into(), 1);
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout)
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout)
| GLWE::external_product_tmp_bytes(&module, &glwe_layout, &glwe_layout, &ggsw_layout),
@@ -182,7 +187,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
let mut sk: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
sk.fill_ternary_prob(0.5, &mut source_xs);
let mut sk_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
let mut sk_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
sk_dft.prepare(&module, &sk);
ct_ggsw.encrypt_sk(
@@ -202,7 +207,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
scratch.borrow(),
);
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, FFT64Ref> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, BackendImpl> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow());
move || {
let scratch_borrow = scratch.borrow();

View File

@@ -6,7 +6,13 @@ use poulpy_core::layouts::{
use std::{hint::black_box, time::Duration};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use poulpy_cpu_ref::FFT64Ref;
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
use poulpy_hal::{
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow},
layouts::{Module, ScratchOwned},
@@ -27,7 +33,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) {
}
fn runner(p: Params) -> impl FnMut() {
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
let n: Degree = Degree(module.n() as u32);
let base2k: Base2K = p.base2k;
@@ -66,7 +72,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) {
let mut ct_in: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_in_layout);
let mut ct_out: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_out_layout);
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_atk_layout)
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout)
| GLWE::keyswitch_tmp_bytes(
@@ -84,7 +90,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) {
let mut sk_in: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_in_layout);
sk_in.fill_ternary_prob(0.5, &mut source_xs);
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
sk_in_dft.prepare(&module, &sk_in);
ksk.encrypt_sk(
@@ -150,7 +156,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) {
}
fn runner(p: Params) -> impl FnMut() {
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
let n: Degree = Degree(module.n() as u32);
let base2k: Base2K = p.base2k;
@@ -181,7 +187,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) {
let mut ksk: GLWESwitchingKey<Vec<u8>> = GLWESwitchingKey::alloc_from_infos(&gglwe_layout);
let mut ct: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_layout);
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_layout)
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout)
| GLWE::keyswitch_tmp_bytes(&module, &glwe_layout, &glwe_layout, &gglwe_layout),
@@ -194,7 +200,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) {
let mut sk_in: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
sk_in.fill_ternary_prob(0.5, &mut source_xs);
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
sk_in_dft.prepare(&module, &sk_in);
let mut sk_out: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);

View File

@@ -5,7 +5,13 @@ use poulpy_core::{
prepared::GLWESecretPrepared,
},
};
use poulpy_cpu_ref::FFT64Ref;
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
use poulpy_hal::{
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxFillUniform},
layouts::{Module, ScratchOwned},
@@ -31,7 +37,7 @@ fn main() {
let rank: Rank = Rank(1);
// Instantiate Module (DFT Tables)
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(n.0 as u64);
let module: Module<BackendImpl> = Module::<BackendImpl>::new(n.0 as u64);
let glwe_ct_infos: GLWELayout = GLWELayout {
n,
@@ -53,7 +59,7 @@ fn main() {
let mut source_xa: Source = Source::new([2u8; 32]);
// Scratch space
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
GLWE::encrypt_sk_tmp_bytes(&module, &glwe_ct_infos) | GLWE::decrypt_tmp_bytes(&module, &glwe_ct_infos),
);
@@ -62,7 +68,7 @@ fn main() {
sk.fill_ternary_prob(0.5, &mut source_xs);
// Backend-prepared secret
let mut sk_prepared: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
let mut sk_prepared: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
sk_prepared.prepare(&module, &sk);
// Uniform plaintext

View File

@@ -358,7 +358,7 @@ where
let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1);
let mut ksk_infos: GGLWELayout = infos.gglwe_layout();
ksk_infos.rank_in = Rank(pairs);
let (data, scratch) = self.take_gglwe(infos);
let (data, scratch) = self.take_gglwe(&ksk_infos);
(GLWETensorKey(data), scratch)
}
@@ -377,7 +377,7 @@ where
let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1);
let mut ksk_infos: GGLWELayout = infos.gglwe_layout();
ksk_infos.rank_in = Rank(pairs);
let (data, scratch) = self.take_gglwe_prepared(module, infos);
let (data, scratch) = self.take_gglwe_prepared(module, &ksk_infos);
(GLWETensorKeyPrepared(data), scratch)
}
}

View File

@@ -4,10 +4,10 @@ pub mod test_suite;
mod serialization;
#[cfg(test)]
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
mod poulpy_core {
use poulpy_hal::backend_test_suite;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
backend_test_suite!(
mod cpu_avx,
backend = poulpy_cpu_avx::FFT64Avx,
@@ -69,8 +69,13 @@ mod poulpy_core {
lwe_to_glwe => crate::tests::test_suite::test_lwe_to_glwe,
}
);
}
#[cfg(test)]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
mod poulpy_core {
use poulpy_hal::backend_test_suite;
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
backend_test_suite!(
mod cpu_ref,
backend = poulpy_cpu_ref::FFT64Ref,

View File

@@ -9,6 +9,9 @@ repository = "https://github.com/phantomzone-org/poulpy"
homepage = "https://github.com/phantomzone-org/poulpy"
documentation = "https://docs.rs/poulpy"
[features]
enable-avx = []
[dependencies]
poulpy-hal = {workspace = true}
poulpy-cpu-ref = {workspace = true}

View File

@@ -1,8 +1,51 @@
# 🐙 Poulpy-CPU-REF
# 🐙 Poulpy-CPU-AVX
**Poulpy-Backend-CPU-AVX** is a Rust crate that provides an AVX accelerated CPU implementation of **`poulpy-hal`**. This crate is used to instantiate projects implemented with **`poulpy-hal`**, **`poulpy-core`** and/or **`poulpy-schemes`**.
**Poulpy-CPU-AVX** is a Rust crate that provides an **AVX2 + FMA accelerated CPU backend for Poulpy**.
## Example
This backend implements the Poulpy HAL extension traits and can be used by:
- [`poulpy-hal`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-hal)
- [`poulpy-core`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-core)
- [`poulpy-schemes`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-schemes)
## 🚩 Safety and Requirements
To avoid illegal hardware instructions (SIGILL) on unsupported CPUs, this backend is **opt-in** and **only builds when explicitly requested**.
| Requirement | Status |
|------------|--------|
| Cargo feature flag | `--features enable-avx` **must be enabled** |
| CPU architecture | `x86_64` |
| CPU target features | `AVX2` + `FMA` |
If `enable-avx` is enabled but the target does not provide these capabilities, the build **fails immediately with a clear error message**, rather than generating invalid binaries.
When `enable-avx` is **not** enabled, this crate is simply skipped and Poulpy automatically falls back to the portable `poulpy-cpu-ref` backend. This ensure that Poulpy's workspace remains portable (e.g. for macOS ARM).
## ⚙️ Building with the AVX backend enabled
Because the compiler must generate AVX2 + FMA instructions, both the Cargo feature and CPU target flags must be specified:
```bash
RUSTFLAGS="-C target-feature=+avx2,+fma" \
cargo build --features enable-avx
````
### Running an example
```bash
RUSTFLAGS="-C target-feature=+avx2,+fma" \
cargo run --example <name> --features enable-avx
```
### Running benchmarks
```bash
RUSTFLAGS="-C target-feature=+avx2,+fma" \
cargo bench --features enable-avx
```
## Basic Usage
```rust
use poulpy_backend_cpu_avx::FFT64Avx;
@@ -12,7 +55,24 @@ let log_n: usize = 10;
let module = Module<FFT64Avx> = Module<FFT64Avx>::new(1<<log_n);
```
## Contributors
Once compiled with `enable-avx`, the backend is usable transparently anywhere Poulpy expects a backend type (`poulpy-hal`, `poulpy-core`, `poulpy-schemes`).
To add your own backend, implement the open extension traits from **`poulpy-hal/oep`** for a struct that implements the `Backend` trait.
This will automatically make your backend compatible with the API of **`poulpy-hal`**, **`poulpy-core`** and **`poulpy-schemes`**.
## 🤝 Contributors
To implement your own Poulpy backend (SIMD or accelerator):
1. Define a backend struct
2. Implement the open extension traits from `poulpy-hal/oep`
3. Implement the `Backend` trait
Your backend will automatically integrate with:
* `poulpy-hal`
* `poulpy-core`
* `poulpy-schemes`
No modifications to those crates are required — the HAL provides the extension points.
---
For questions or guidance, feel free to open an issue or discussion in the repository.

View File

@@ -1,10 +1,17 @@
use std::hint::black_box;
use criterion::{Criterion, criterion_group, criterion_main};
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
use poulpy_cpu_avx::{ReimFFTAvx, ReimIFFTAvx};
use poulpy_hal::reference::fft64::reim::{ReimDFTExecute, ReimFFTTable, ReimIFFTTable};
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
fn bench_ifft_avx2_fma(_c: &mut Criterion) {
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
}
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
use criterion::BenchmarkId;
use poulpy_cpu_avx::ReimIFFTAvx;
use poulpy_hal::reference::fft64::reim::{ReimDFTExecute, ReimIFFTTable};
use std::hint::black_box;
let group_name: String = "ifft_avx2_fma".to_string();
let mut group = c.benchmark_group(group_name);
@@ -40,7 +47,18 @@ pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
group.finish();
}
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
fn bench_fft_avx2_fma(_c: &mut Criterion) {
eprintln!("Skipping: AVX FFT benchmark requires x86_64 + AVX2 + FMA");
}
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
pub fn bench_fft_avx2_fma(c: &mut Criterion) {
use criterion::BenchmarkId;
use poulpy_cpu_avx::ReimFFTAvx;
use poulpy_hal::reference::fft64::reim::{ReimDFTExecute, ReimFFTTable};
use std::hint::black_box;
let group_name: String = "fft_avx2_fma".to_string();
let mut group = c.benchmark_group(group_name);

View File

@@ -1,20 +1,36 @@
// poulpy-backend/benches/vec_znx_add.rs
use criterion::{Criterion, criterion_group, criterion_main};
use poulpy_cpu_avx::FFT64Avx;
use poulpy_hal::reference::vec_znx::{bench_vec_znx_add, bench_vec_znx_automorphism, bench_vec_znx_normalize_inplace};
#[allow(dead_code)]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
fn bench_vec_znx_add_cpu_avx_fft64(_c: &mut Criterion) {
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
}
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
fn bench_vec_znx_add_cpu_avx_fft64(c: &mut Criterion) {
bench_vec_znx_add::<FFT64Avx>(c, "FFT64Avx");
use poulpy_cpu_avx::FFT64Avx;
poulpy_hal::reference::vec_znx::bench_vec_znx_add::<FFT64Avx>(c, "FFT64Avx");
}
#[allow(dead_code)]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(_c: &mut Criterion) {
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
}
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(c: &mut Criterion) {
bench_vec_znx_normalize_inplace::<FFT64Avx>(c, "FFT64Avx");
use poulpy_cpu_avx::FFT64Avx;
poulpy_hal::reference::vec_znx::bench_vec_znx_normalize_inplace::<FFT64Avx>(c, "FFT64Avx");
}
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
fn bench_vec_znx_automorphism_cpu_avx_fft64(_c: &mut Criterion) {
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
}
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
fn bench_vec_znx_automorphism_cpu_avx_fft64(c: &mut Criterion) {
bench_vec_znx_automorphism::<FFT64Avx>(c, "FFT64Avx");
use poulpy_cpu_avx::FFT64Avx;
poulpy_hal::reference::vec_znx::bench_vec_znx_automorphism::<FFT64Avx>(c, "FFT64Avx");
}
criterion_group!(

View File

@@ -1,10 +1,14 @@
// poulpy-backend/benches/vec_znx_add.rs
use criterion::{Criterion, criterion_group, criterion_main};
use poulpy_cpu_avx::FFT64Avx;
use poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(_c: &mut Criterion) {
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
}
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(c: &mut Criterion) {
bench_vmp_apply_dft_to_dft::<FFT64Avx>(c, "FFT64Avx");
use poulpy_cpu_avx::FFT64Avx;
poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft::<FFT64Avx>(c, "FFT64Avx");
}
criterion_group!(benches_x86, bench_vmp_apply_dft_to_dft_cpu_avx_fft64,);

View File

@@ -1,5 +1,10 @@
use itertools::izip;
use poulpy_cpu_avx::FFT64Avx;
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
use poulpy_hal::{
api::{
ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, SvpApplyDftToDftInplace, SvpPPolAlloc, SvpPrepare, VecZnxAddNormal,
@@ -16,9 +21,9 @@ fn main() {
let ct_size: usize = 3;
let msg_size: usize = 2;
let log_scale: usize = msg_size * base2k - 5;
let module: Module<FFT64Avx> = Module::<FFT64Avx>::new(n as u64);
let module: Module<BackendImpl> = Module::<BackendImpl>::new(n as u64);
let mut scratch: ScratchOwned<FFT64Avx> = ScratchOwned::<FFT64Avx>::alloc(module.vec_znx_big_normalize_tmp_bytes());
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::<BackendImpl>::alloc(module.vec_znx_big_normalize_tmp_bytes());
let seed: [u8; 32] = [0; 32];
let mut source: Source = Source::new(seed);
@@ -28,7 +33,7 @@ fn main() {
s.fill_ternary_prob(0, 0.5, &mut source);
// Buffer to store s in the DFT domain
let mut s_dft: SvpPPol<Vec<u8>, FFT64Avx> = module.svp_ppol_alloc(s.cols());
let mut s_dft: SvpPPol<Vec<u8>, BackendImpl> = module.svp_ppol_alloc(s.cols());
// s_dft <- DFT(s)
module.svp_prepare(&mut s_dft, 0, &s, 0);
@@ -43,7 +48,7 @@ fn main() {
// Fill the second column with random values: ct = (0, a)
module.vec_znx_fill_uniform(base2k, &mut ct, 1, &mut source);
let mut buf_dft: VecZnxDft<Vec<u8>, FFT64Avx> = module.vec_znx_dft_alloc(1, ct_size);
let mut buf_dft: VecZnxDft<Vec<u8>, BackendImpl> = module.vec_znx_dft_alloc(1, ct_size);
module.vec_znx_dft_apply(1, 0, &mut buf_dft, 0, &ct, 1);
@@ -58,7 +63,7 @@ fn main() {
// Alias scratch space (VecZnxDft<B> is always at least as big as VecZnxBig<B>)
// BIG(ct[1] * s) <- IDFT(DFT(ct[1] * s)) (not normalized)
let mut buf_big: VecZnxBig<Vec<u8>, FFT64Avx> = module.vec_znx_big_alloc(1, ct_size);
let mut buf_big: VecZnxBig<Vec<u8>, BackendImpl> = module.vec_znx_big_alloc(1, ct_size);
module.vec_znx_idft_apply_tmpa(&mut buf_big, 0, &mut buf_dft, 0);
// Creates a plaintext: VecZnx with 1 column

View File

@@ -1,3 +1,20 @@
// ─────────────────────────────────────────────────────────────
// Build the backend **only when ALL conditions are satisfied**
// ─────────────────────────────────────────────────────────────
#![cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
// If the user enables this backend but targets a non-x86_64 CPU → abort
#[cfg(all(feature = "enable-avx", not(target_arch = "x86_64")))]
compile_error!("feature `enable-avx` requires target_arch = \"x86_64\".");
// If the user enables this backend but AVX2 isn't enabled in the target → abort
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", not(target_feature = "avx2")))]
compile_error!("feature `enable-avx` requires AVX2. Build with RUSTFLAGS=\"-C target-feature=+avx2\".");
// If the user enables this backend but FMA isn't enabled in the target → abort
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", not(target_feature = "fma")))]
compile_error!("feature `enable-avx` requires FMA. Build with RUSTFLAGS=\"-C target-feature=+fma\".");
mod module;
mod reim;
mod reim4;

View File

@@ -1,18 +1,93 @@
# 🐙 Poulpy-CPU-AVX
# 🐙 Poulpy-CPU-REF
**Poulpy-Backend-CPU-AVX** is a Rust crate that provides the reference CPU implementation of **`poulpy-hal`**. This crate is used to instantiate projects implemented with **`poulpy-hal`**, **`poulpy-core`** and/or **`poulpy-schemes`**.
**Poulpy-CPU-REF** is the **reference (portable) CPU backend for Poulpy**.
## Example
It implements the Poulpy HAL extension traits without requiring SIMD or specialized CPU instructions, making it suitable for:
- all CPU architectures (`x86_64`, `aarch64`, `arm`, `riscv64`, …)
- development machines and CI runners
- environments without AVX or other advanced SIMD support
This backend integrates transparently with:
- `poulpy-hal`
- `poulpy-core`
- `poulpy-schemes`
---
## When is this backend used?
`poulpy-cpu-ref` is always available and requires **no compilation flags and no CPU features**.
It is automatically selected when:
- the project does not request an optimized backend, or
- the target CPU does not support the requested SIMD backend (e.g., AVX), or
- portability and reproducibility are more important than raw performance.
No additional configuration is required to use it.
---
## 🧪 Basic Usage
```rust
use poulpy_backend_cpu_ref::FFT64Ref;
use poulpy_cpu_ref::FFT64Ref;
use poulpy_hal::{api::ModuleNew, layouts::Module};
let log_n: usize = 10;
let module = Module<FFT64Ref> = Module<FFT64Ref>::new(1<<log_n);
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << log_n);
```
## Contributors
This works on **all supported platforms and architectures**.
To add your own backend, implement the open extension traits from **`poulpy-hal/oep`** for a struct that implements the `Backend` trait.
This will automatically make your backend compatible with the API of **`poulpy-hal`**, **`poulpy-core`** and **`poulpy-schemes`**.
---
## Performance Notes
`poulpy-cpu-ref` prioritizes:
* portability
* correctness
* ease of debugging
For maximum performance on x86_64 CPUs with AVX2 + FMA support, consider enabling the optional optimized backend:
```
poulpy-cpu-avx (feature: enable-avx)
```
Benchmarks and applications can freely switch between backends without changing source code — backend selection can be handled with feature flags, for example
```rust
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
```
---
## 🤝 Contributors
To implement your own backend (SIMD or accelerator):
1. Define a backend struct
2. Implement the open extension traits from `poulpy-hal/oep`
3. Implement the `Backend` trait
Your backend will automatically integrate with:
* `poulpy-hal`
* `poulpy-core`
* `poulpy-schemes`
No modifications to those crates are necessary — the HAL provides the extension points.
---
For questions or guidance, feel free to open an issue or discussion in the repository.
```

View File

@@ -367,12 +367,8 @@ fn test_vec_znx_normalize_conv() {
let out_prec: u32 = (end_size * end_base2k) as u32;
let mut data_want: Vec<Float> = (0..n)
.map(|_| Float::with_val(out_prec as u32, 0))
.collect();
let mut data_res: Vec<Float> = (0..n)
.map(|_| Float::with_val(out_prec as u32, 0))
.collect();
let mut data_want: Vec<Float> = (0..n).map(|_| Float::with_val(out_prec, 0)).collect();
let mut data_res: Vec<Float> = (0..n).map(|_| Float::with_val(out_prec, 0)).collect();
have.decode_vec_float(end_base2k, 0, &mut data_want);
want.decode_vec_float(end_base2k, 0, &mut data_res);

View File

@@ -9,6 +9,9 @@ repository = "https://github.com/phantomzone-org/poulpy"
homepage = "https://github.com/phantomzone-org/poulpy"
documentation = "https://docs.rs/poulpy"
[features]
enable-avx = []
[dependencies]
poulpy-cpu-avx = {workspace = true}
poulpy-cpu-ref = {workspace = true}

View File

@@ -8,8 +8,13 @@ use poulpy_core::{
GLWESecretPreparedFactory, LWE, LWELayout, LWESecret,
},
};
use poulpy_cpu_avx::FFT64Avx;
use poulpy_cpu_ref::FFT64Ref;
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
use poulpy_hal::{
api::{ModuleN, ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxRotateInplace},
layouts::{Backend, Module, Scratch, ScratchOwned},
@@ -127,7 +132,7 @@ where
}
}
for params in [Params {
let params: Params = Params {
name: String::from("1-bit"),
extension_factor: 1,
k_pt: 1,
@@ -171,27 +176,22 @@ where
rank: 2_u32.into(),
},
},
}] {
};
let id: BenchmarkId = BenchmarkId::from_parameter(params.name.clone());
let mut runner = runner::<BE, BRA>(&params);
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
}
group.finish();
}
fn bench_circuit_bootstrapping_cpu_ref_fft64(c: &mut Criterion) {
benc_circuit_bootstrapping::<FFT64Ref, CGGI>(c, "fft64_ref");
fn bench_circuit_bootstrapping_fft64(c: &mut Criterion) {
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
let label = "fft64_avx";
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
let label = "fft64_ref";
benc_circuit_bootstrapping::<BackendImpl, CGGI>(c, label);
}
fn bench_circuit_bootstrapping_cpu_avx_fft64(c: &mut Criterion) {
benc_circuit_bootstrapping::<FFT64Avx, CGGI>(c, "fft64_avx");
}
criterion_group!(
benches,
bench_circuit_bootstrapping_cpu_ref_fft64,
bench_circuit_bootstrapping_cpu_avx_fft64,
);
criterion_group!(benches, bench_circuit_bootstrapping_fft64);
criterion_main!(benches);

View File

@@ -8,10 +8,10 @@ use poulpy_core::{
};
use std::time::Instant;
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
use poulpy_hal::{

View File

@@ -5,7 +5,7 @@ use poulpy_cpu_avx::FFT64Avx;
use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock<test_suite::TestContext<CGGI, FFT64Avx>> =
LazyLock::new(|| test_suite::TestContext::<CGGI, FFT64Avx>::new());
LazyLock::new(test_suite::TestContext::<CGGI, FFT64Avx>::new);
#[test]
fn glwe_blind_retriever() {

View File

@@ -2,10 +2,10 @@ use std::sync::LazyLock;
use poulpy_cpu_ref::FFT64Ref;
use crate::tfhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock<test_suite::TestContext<CGGI, FFT64Ref>> =
LazyLock::new(|| test_suite::TestContext::<CGGI, FFT64Ref>::new());
LazyLock::new(test_suite::TestContext::<CGGI, FFT64Ref>::new);
#[test]
fn glwe_blind_retriever() {

View File

@@ -1,9 +1,9 @@
pub mod test_suite;
#[cfg(test)]
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
mod fft64_ref;
#[cfg(test)]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
mod fft64_avx;

View File

@@ -1,7 +1,7 @@
use poulpy_cpu_ref::FFT64Ref;
use poulpy_hal::{api::ModuleNew, layouts::Module};
use crate::tfhe::blind_rotation::{
use crate::bin_fhe::blind_rotation::{
CGGI,
tests::test_suite::{
generic_blind_rotation::test_blind_rotation,

View File

@@ -1,9 +1,9 @@
#[cfg(test)]
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
mod fft64_ref;
#[cfg(test)]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
mod fft64_avx;
#[cfg(test)]

View File

@@ -1,9 +1,9 @@
pub mod circuit_bootstrapping;
#[cfg(test)]
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
mod fft64_ref;
#[cfg(test)]
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
mod fft64_avx;