diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 927b7fa..d2aa5ca 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest steps: - - name: Checkout code + - name: Checkout uses: actions/checkout@v4 with: submodules: recursive @@ -21,7 +21,7 @@ jobs: with: components: clippy, rustfmt - - name: Cache cargo dependencies + - name: Cache cargo deps uses: actions/cache@v4 with: path: | @@ -32,14 +32,48 @@ jobs: restore-keys: | ${{ runner.os }}-cargo- - - name: Build - run: cargo build --all-targets - - - name: Clippy (deny warnings) - run: cargo clippy --workspace --all-targets --all-features + # Detect whether runner supports AVX2 + FMA + - name: Detect AVX support + id: avxcheck + run: | + if lscpu | grep -qi avx2 && lscpu | grep -qi fma; then + echo "supported=true" >> $GITHUB_OUTPUT + else + echo "supported=false" >> $GITHUB_OUTPUT + fi + # rustfmt always runs โ€” unrelated to AVX support - name: rustfmt (check only) run: cargo fmt --all --check - - name: Run tests - run: cargo test --all \ No newline at end of file + # Build / lint / test WITH AVX + - name: Build (AVX enabled) + if: steps.avxcheck.outputs.supported == 'true' + run: | + RUSTFLAGS="-C target-feature=+avx2,+fma" \ + cargo build --workspace --all-targets --features enable-avx + + - name: Clippy (AVX enabled) + if: steps.avxcheck.outputs.supported == 'true' + run: | + RUSTFLAGS="-C target-feature=+avx2,+fma" \ + cargo clippy --workspace --all-targets --features enable-avx -- -D warnings + + - name: Tests (AVX enabled) + if: steps.avxcheck.outputs.supported == 'true' + run: | + RUSTFLAGS="-C target-feature=+avx2,+fma" \ + cargo test --workspace --features enable-avx + + # Build / lint / test WITHOUT AVX + - name: Build (portable mode) + if: steps.avxcheck.outputs.supported == 'false' + run: cargo build --workspace --all-targets + + - name: Clippy (portable mode) + if: steps.avxcheck.outputs.supported == 'false' + run: cargo clippy --workspace --all-targets -- -D warnings + + - name: Tests (portable mode) + if: steps.avxcheck.outputs.supported == 'false' + run: cargo test --workspace \ No newline at end of file diff --git a/poulpy-core/Cargo.toml b/poulpy-core/Cargo.toml index 8a5e471..6ed82ab 100644 --- a/poulpy-core/Cargo.toml +++ b/poulpy-core/Cargo.toml @@ -8,12 +8,16 @@ repository = "https://github.com/phantomzone-org/poulpy" homepage = "https://github.com/phantomzone-org/poulpy" documentation = "https://docs.rs/poulpy" +[features] +enable-avx = ["dep:poulpy-cpu-avx"] +default = ["dep:poulpy-cpu-ref"] + [dependencies] rug = {workspace = true} criterion = {workspace = true} poulpy-hal = {workspace = true} -poulpy-cpu-avx = {workspace = true} -poulpy-cpu-ref = {workspace = true} +poulpy-cpu-avx = {workspace = true, optional = true} +poulpy-cpu-ref = {workspace = true, optional = true} itertools = {workspace = true} byteorder = {workspace = true} bytemuck = {workspace = true} diff --git a/poulpy-core/benches/external_product_glwe_fft64.rs b/poulpy-core/benches/external_product_glwe_fft64.rs index 3bbd4f4..fd3804d 100644 --- a/poulpy-core/benches/external_product_glwe_fft64.rs +++ b/poulpy-core/benches/external_product_glwe_fft64.rs @@ -6,7 +6,12 @@ use std::hint::black_box; use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; -use poulpy_cpu_ref::FFT64Ref; +#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))] +pub use poulpy_cpu_avx::FFT64Avx as BackendImpl; + +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))] +pub use poulpy_cpu_ref::FFT64Ref as BackendImpl; + use poulpy_hal::{ api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow}, layouts::{Module, ScalarZnx, ScratchOwned}, @@ -26,7 +31,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) { } fn runner(p: Params) -> impl FnMut() { - let module: Module = Module::::new(1 << p.log_n); + let module: Module = Module::::new(1 << p.log_n); let n: Degree = Degree(module.n() as u32); let base2k: Base2K = p.base2k; @@ -42,8 +47,8 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) { n, base2k, k: k_ggsw, - dnum: dnum, - dsize: dsize, + dnum, + dsize, rank, }; @@ -66,7 +71,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) { let mut ct_glwe_out: GLWE> = GLWE::alloc_from_infos(&glwe_out_layout); let pt_rgsw: ScalarZnx> = ScalarZnx::alloc(n.into(), 1); - let mut scratch: ScratchOwned = ScratchOwned::alloc( + let mut scratch: ScratchOwned = ScratchOwned::alloc( GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout) | GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout) | GLWE::external_product_tmp_bytes(&module, &glwe_out_layout, &glwe_in_layout, &ggsw_layout), @@ -79,7 +84,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) { let mut sk: GLWESecret> = GLWESecret::alloc_from_infos(&glwe_in_layout); sk.fill_ternary_prob(0.5, &mut source_xs); - let mut sk_dft: GLWESecretPrepared, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank); + let mut sk_dft: GLWESecretPrepared, BackendImpl> = GLWESecretPrepared::alloc(&module, rank); sk_dft.prepare(&module, &sk); ct_ggsw.encrypt_sk( @@ -99,7 +104,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) { scratch.borrow(), ); - let mut ggsw_prepared: GGSWPrepared, FFT64Ref> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw); + let mut ggsw_prepared: GGSWPrepared, BackendImpl> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw); ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow()); move || { @@ -138,7 +143,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) { } fn runner(p: Params) -> impl FnMut() { - let module: Module = Module::::new(1 << p.log_n); + let module: Module = Module::::new(1 << p.log_n); let n: Degree = Degree(module.n() as u32); let base2k: Base2K = p.base2k; @@ -153,8 +158,8 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) { n, base2k, k: k_ggsw, - dnum: dnum, - dsize: dsize, + dnum, + dsize, rank, }; @@ -169,7 +174,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) { let mut ct_glwe: GLWE> = GLWE::alloc_from_infos(&glwe_layout); let pt_rgsw: ScalarZnx> = ScalarZnx::alloc(n.into(), 1); - let mut scratch: ScratchOwned = ScratchOwned::alloc( + let mut scratch: ScratchOwned = ScratchOwned::alloc( GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout) | GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout) | GLWE::external_product_tmp_bytes(&module, &glwe_layout, &glwe_layout, &ggsw_layout), @@ -182,7 +187,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) { let mut sk: GLWESecret> = GLWESecret::alloc_from_infos(&glwe_layout); sk.fill_ternary_prob(0.5, &mut source_xs); - let mut sk_dft: GLWESecretPrepared, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank); + let mut sk_dft: GLWESecretPrepared, BackendImpl> = GLWESecretPrepared::alloc(&module, rank); sk_dft.prepare(&module, &sk); ct_ggsw.encrypt_sk( @@ -202,7 +207,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) { scratch.borrow(), ); - let mut ggsw_prepared: GGSWPrepared, FFT64Ref> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw); + let mut ggsw_prepared: GGSWPrepared, BackendImpl> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw); ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow()); move || { let scratch_borrow = scratch.borrow(); diff --git a/poulpy-core/benches/keyswitch_glwe_fft64.rs b/poulpy-core/benches/keyswitch_glwe_fft64.rs index 61399c0..afd64b8 100644 --- a/poulpy-core/benches/keyswitch_glwe_fft64.rs +++ b/poulpy-core/benches/keyswitch_glwe_fft64.rs @@ -6,7 +6,13 @@ use poulpy_core::layouts::{ use std::{hint::black_box, time::Duration}; use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; -use poulpy_cpu_ref::FFT64Ref; + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))] +pub use poulpy_cpu_avx::FFT64Avx as BackendImpl; + +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))] +pub use poulpy_cpu_ref::FFT64Ref as BackendImpl; + use poulpy_hal::{ api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow}, layouts::{Module, ScratchOwned}, @@ -27,7 +33,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) { } fn runner(p: Params) -> impl FnMut() { - let module: Module = Module::::new(1 << p.log_n); + let module: Module = Module::::new(1 << p.log_n); let n: Degree = Degree(module.n() as u32); let base2k: Base2K = p.base2k; @@ -66,7 +72,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) { let mut ct_in: GLWE> = GLWE::alloc_from_infos(&glwe_in_layout); let mut ct_out: GLWE> = GLWE::alloc_from_infos(&glwe_out_layout); - let mut scratch: ScratchOwned = ScratchOwned::alloc( + let mut scratch: ScratchOwned = ScratchOwned::alloc( GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_atk_layout) | GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout) | GLWE::keyswitch_tmp_bytes( @@ -84,7 +90,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) { let mut sk_in: GLWESecret> = GLWESecret::alloc_from_infos(&glwe_in_layout); sk_in.fill_ternary_prob(0.5, &mut source_xs); - let mut sk_in_dft: GLWESecretPrepared, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank); + let mut sk_in_dft: GLWESecretPrepared, BackendImpl> = GLWESecretPrepared::alloc(&module, rank); sk_in_dft.prepare(&module, &sk_in); ksk.encrypt_sk( @@ -150,7 +156,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) { } fn runner(p: Params) -> impl FnMut() { - let module: Module = Module::::new(1 << p.log_n); + let module: Module = Module::::new(1 << p.log_n); let n: Degree = Degree(module.n() as u32); let base2k: Base2K = p.base2k; @@ -181,7 +187,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) { let mut ksk: GLWESwitchingKey> = GLWESwitchingKey::alloc_from_infos(&gglwe_layout); let mut ct: GLWE> = GLWE::alloc_from_infos(&glwe_layout); - let mut scratch: ScratchOwned = ScratchOwned::alloc( + let mut scratch: ScratchOwned = ScratchOwned::alloc( GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_layout) | GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout) | GLWE::keyswitch_tmp_bytes(&module, &glwe_layout, &glwe_layout, &gglwe_layout), @@ -194,7 +200,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) { let mut sk_in: GLWESecret> = GLWESecret::alloc_from_infos(&glwe_layout); sk_in.fill_ternary_prob(0.5, &mut source_xs); - let mut sk_in_dft: GLWESecretPrepared, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank); + let mut sk_in_dft: GLWESecretPrepared, BackendImpl> = GLWESecretPrepared::alloc(&module, rank); sk_in_dft.prepare(&module, &sk_in); let mut sk_out: GLWESecret> = GLWESecret::alloc_from_infos(&glwe_layout); diff --git a/poulpy-core/examples/encryption.rs b/poulpy-core/examples/encryption.rs index 0aa3c67..08e1595 100644 --- a/poulpy-core/examples/encryption.rs +++ b/poulpy-core/examples/encryption.rs @@ -5,7 +5,13 @@ use poulpy_core::{ prepared::GLWESecretPrepared, }, }; -use poulpy_cpu_ref::FFT64Ref; + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))] +pub use poulpy_cpu_avx::FFT64Avx as BackendImpl; + +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))] +pub use poulpy_cpu_ref::FFT64Ref as BackendImpl; + use poulpy_hal::{ api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxFillUniform}, layouts::{Module, ScratchOwned}, @@ -31,7 +37,7 @@ fn main() { let rank: Rank = Rank(1); // Instantiate Module (DFT Tables) - let module: Module = Module::::new(n.0 as u64); + let module: Module = Module::::new(n.0 as u64); let glwe_ct_infos: GLWELayout = GLWELayout { n, @@ -53,7 +59,7 @@ fn main() { let mut source_xa: Source = Source::new([2u8; 32]); // Scratch space - let mut scratch: ScratchOwned = ScratchOwned::alloc( + let mut scratch: ScratchOwned = ScratchOwned::alloc( GLWE::encrypt_sk_tmp_bytes(&module, &glwe_ct_infos) | GLWE::decrypt_tmp_bytes(&module, &glwe_ct_infos), ); @@ -62,7 +68,7 @@ fn main() { sk.fill_ternary_prob(0.5, &mut source_xs); // Backend-prepared secret - let mut sk_prepared: GLWESecretPrepared, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank); + let mut sk_prepared: GLWESecretPrepared, BackendImpl> = GLWESecretPrepared::alloc(&module, rank); sk_prepared.prepare(&module, &sk); // Uniform plaintext diff --git a/poulpy-core/src/scratch.rs b/poulpy-core/src/scratch.rs index 1d39a9b..a6675da 100644 --- a/poulpy-core/src/scratch.rs +++ b/poulpy-core/src/scratch.rs @@ -358,7 +358,7 @@ where let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1); let mut ksk_infos: GGLWELayout = infos.gglwe_layout(); ksk_infos.rank_in = Rank(pairs); - let (data, scratch) = self.take_gglwe(infos); + let (data, scratch) = self.take_gglwe(&ksk_infos); (GLWETensorKey(data), scratch) } @@ -377,7 +377,7 @@ where let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1); let mut ksk_infos: GGLWELayout = infos.gglwe_layout(); ksk_infos.rank_in = Rank(pairs); - let (data, scratch) = self.take_gglwe_prepared(module, infos); + let (data, scratch) = self.take_gglwe_prepared(module, &ksk_infos); (GLWETensorKeyPrepared(data), scratch) } } diff --git a/poulpy-core/src/tests/mod.rs b/poulpy-core/src/tests/mod.rs index 3881fa4..9b69e20 100644 --- a/poulpy-core/src/tests/mod.rs +++ b/poulpy-core/src/tests/mod.rs @@ -4,10 +4,10 @@ pub mod test_suite; mod serialization; #[cfg(test)] +#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))] mod poulpy_core { use poulpy_hal::backend_test_suite; - #[cfg(any(target_arch = "x86_64", target_arch = "x86"))] backend_test_suite!( mod cpu_avx, backend = poulpy_cpu_avx::FFT64Avx, @@ -69,8 +69,13 @@ mod poulpy_core { lwe_to_glwe => crate::tests::test_suite::test_lwe_to_glwe, } ); +} + +#[cfg(test)] +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))] +mod poulpy_core { + use poulpy_hal::backend_test_suite; - #[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] backend_test_suite!( mod cpu_ref, backend = poulpy_cpu_ref::FFT64Ref, diff --git a/poulpy-cpu-avx/Cargo.toml b/poulpy-cpu-avx/Cargo.toml index 8becb6a..5704b1e 100644 --- a/poulpy-cpu-avx/Cargo.toml +++ b/poulpy-cpu-avx/Cargo.toml @@ -9,6 +9,9 @@ repository = "https://github.com/phantomzone-org/poulpy" homepage = "https://github.com/phantomzone-org/poulpy" documentation = "https://docs.rs/poulpy" +[features] +enable-avx = [] + [dependencies] poulpy-hal = {workspace = true} poulpy-cpu-ref = {workspace = true} diff --git a/poulpy-cpu-avx/README.md b/poulpy-cpu-avx/README.md index 9d7be01..8a8fccc 100644 --- a/poulpy-cpu-avx/README.md +++ b/poulpy-cpu-avx/README.md @@ -1,8 +1,51 @@ -# ๐Ÿ™ Poulpy-CPU-REF +# ๐Ÿ™ Poulpy-CPU-AVX -**Poulpy-Backend-CPU-AVX** is a Rust crate that provides an AVX accelerated CPU implementation of **`poulpy-hal`**. This crate is used to instantiate projects implemented with **`poulpy-hal`**, **`poulpy-core`** and/or **`poulpy-schemes`**. +**Poulpy-CPU-AVX** is a Rust crate that provides an **AVX2 + FMA accelerated CPU backend for Poulpy**. -## Example +This backend implements the Poulpy HAL extension traits and can be used by: + +- [`poulpy-hal`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-hal) +- [`poulpy-core`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-core) +- [`poulpy-schemes`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-schemes) + +## ๐Ÿšฉ Safety and Requirements + +To avoid illegal hardware instructions (SIGILL) on unsupported CPUs, this backend is **opt-in** and **only builds when explicitly requested**. + +| Requirement | Status | +|------------|--------| +| Cargo feature flag | `--features enable-avx` **must be enabled** | +| CPU architecture | `x86_64` | +| CPU target features | `AVX2` + `FMA` | + +If `enable-avx` is enabled but the target does not provide these capabilities, the build **fails immediately with a clear error message**, rather than generating invalid binaries. + +When `enable-avx` is **not** enabled, this crate is simply skipped and Poulpy automatically falls back to the portable `poulpy-cpu-ref` backend. This ensure that Poulpy's workspace remains portable (e.g. for macOS ARM). + +## โš™๏ธ Building with the AVX backend enabled + +Because the compiler must generate AVX2 + FMA instructions, both the Cargo feature and CPU target flags must be specified: + +```bash +RUSTFLAGS="-C target-feature=+avx2,+fma" \ +cargo build --features enable-avx +```` + +### Running an example + +```bash +RUSTFLAGS="-C target-feature=+avx2,+fma" \ +cargo run --example --features enable-avx +``` + +### Running benchmarks + +```bash +RUSTFLAGS="-C target-feature=+avx2,+fma" \ +cargo bench --features enable-avx +``` + +## Basic Usage ```rust use poulpy_backend_cpu_avx::FFT64Avx; @@ -12,7 +55,24 @@ let log_n: usize = 10; let module = Module = Module::new(1<(c, "FFT64Avx"); + use poulpy_cpu_avx::FFT64Avx; + poulpy_hal::reference::vec_znx::bench_vec_znx_add::(c, "FFT64Avx"); } -#[allow(dead_code)] +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] +fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(_c: &mut Criterion) { + eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA"); +} + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(c: &mut Criterion) { - bench_vec_znx_normalize_inplace::(c, "FFT64Avx"); + use poulpy_cpu_avx::FFT64Avx; + poulpy_hal::reference::vec_znx::bench_vec_znx_normalize_inplace::(c, "FFT64Avx"); } +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] +fn bench_vec_znx_automorphism_cpu_avx_fft64(_c: &mut Criterion) { + eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA"); +} + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] fn bench_vec_znx_automorphism_cpu_avx_fft64(c: &mut Criterion) { - bench_vec_znx_automorphism::(c, "FFT64Avx"); + use poulpy_cpu_avx::FFT64Avx; + poulpy_hal::reference::vec_znx::bench_vec_znx_automorphism::(c, "FFT64Avx"); } criterion_group!( diff --git a/poulpy-cpu-avx/benches/vmp.rs b/poulpy-cpu-avx/benches/vmp.rs index 195fa36..e1d6a65 100644 --- a/poulpy-cpu-avx/benches/vmp.rs +++ b/poulpy-cpu-avx/benches/vmp.rs @@ -1,10 +1,14 @@ -// poulpy-backend/benches/vec_znx_add.rs use criterion::{Criterion, criterion_group, criterion_main}; -use poulpy_cpu_avx::FFT64Avx; -use poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft; +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] +fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(_c: &mut Criterion) { + eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA"); +} + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(c: &mut Criterion) { - bench_vmp_apply_dft_to_dft::(c, "FFT64Avx"); + use poulpy_cpu_avx::FFT64Avx; + poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft::(c, "FFT64Avx"); } criterion_group!(benches_x86, bench_vmp_apply_dft_to_dft_cpu_avx_fft64,); diff --git a/poulpy-cpu-avx/examples/rlwe_encrypt.rs b/poulpy-cpu-avx/examples/rlwe_encrypt.rs index b312757..2cc51a9 100644 --- a/poulpy-cpu-avx/examples/rlwe_encrypt.rs +++ b/poulpy-cpu-avx/examples/rlwe_encrypt.rs @@ -1,5 +1,10 @@ use itertools::izip; -use poulpy_cpu_avx::FFT64Avx; + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] +use poulpy_cpu_avx::FFT64Avx as BackendImpl; +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] +use poulpy_cpu_ref::FFT64Ref as BackendImpl; + use poulpy_hal::{ api::{ ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, SvpApplyDftToDftInplace, SvpPPolAlloc, SvpPrepare, VecZnxAddNormal, @@ -16,9 +21,9 @@ fn main() { let ct_size: usize = 3; let msg_size: usize = 2; let log_scale: usize = msg_size * base2k - 5; - let module: Module = Module::::new(n as u64); + let module: Module = Module::::new(n as u64); - let mut scratch: ScratchOwned = ScratchOwned::::alloc(module.vec_znx_big_normalize_tmp_bytes()); + let mut scratch: ScratchOwned = ScratchOwned::::alloc(module.vec_znx_big_normalize_tmp_bytes()); let seed: [u8; 32] = [0; 32]; let mut source: Source = Source::new(seed); @@ -28,7 +33,7 @@ fn main() { s.fill_ternary_prob(0, 0.5, &mut source); // Buffer to store s in the DFT domain - let mut s_dft: SvpPPol, FFT64Avx> = module.svp_ppol_alloc(s.cols()); + let mut s_dft: SvpPPol, BackendImpl> = module.svp_ppol_alloc(s.cols()); // s_dft <- DFT(s) module.svp_prepare(&mut s_dft, 0, &s, 0); @@ -43,7 +48,7 @@ fn main() { // Fill the second column with random values: ct = (0, a) module.vec_znx_fill_uniform(base2k, &mut ct, 1, &mut source); - let mut buf_dft: VecZnxDft, FFT64Avx> = module.vec_znx_dft_alloc(1, ct_size); + let mut buf_dft: VecZnxDft, BackendImpl> = module.vec_znx_dft_alloc(1, ct_size); module.vec_znx_dft_apply(1, 0, &mut buf_dft, 0, &ct, 1); @@ -58,7 +63,7 @@ fn main() { // Alias scratch space (VecZnxDft is always at least as big as VecZnxBig) // BIG(ct[1] * s) <- IDFT(DFT(ct[1] * s)) (not normalized) - let mut buf_big: VecZnxBig, FFT64Avx> = module.vec_znx_big_alloc(1, ct_size); + let mut buf_big: VecZnxBig, BackendImpl> = module.vec_znx_big_alloc(1, ct_size); module.vec_znx_idft_apply_tmpa(&mut buf_big, 0, &mut buf_dft, 0); // Creates a plaintext: VecZnx with 1 column diff --git a/poulpy-cpu-avx/src/lib.rs b/poulpy-cpu-avx/src/lib.rs index 4ba20c7..9a139f6 100644 --- a/poulpy-cpu-avx/src/lib.rs +++ b/poulpy-cpu-avx/src/lib.rs @@ -1,3 +1,20 @@ +// โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +// Build the backend **only when ALL conditions are satisfied** +// โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ +#![cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] + +// If the user enables this backend but targets a non-x86_64 CPU โ†’ abort +#[cfg(all(feature = "enable-avx", not(target_arch = "x86_64")))] +compile_error!("feature `enable-avx` requires target_arch = \"x86_64\"."); + +// If the user enables this backend but AVX2 isn't enabled in the target โ†’ abort +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", not(target_feature = "avx2")))] +compile_error!("feature `enable-avx` requires AVX2. Build with RUSTFLAGS=\"-C target-feature=+avx2\"."); + +// If the user enables this backend but FMA isn't enabled in the target โ†’ abort +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", not(target_feature = "fma")))] +compile_error!("feature `enable-avx` requires FMA. Build with RUSTFLAGS=\"-C target-feature=+fma\"."); + mod module; mod reim; mod reim4; diff --git a/poulpy-cpu-ref/README.md b/poulpy-cpu-ref/README.md index e108527..e11290b 100644 --- a/poulpy-cpu-ref/README.md +++ b/poulpy-cpu-ref/README.md @@ -1,18 +1,93 @@ -# ๐Ÿ™ Poulpy-CPU-AVX +# ๐Ÿ™ Poulpy-CPU-REF -**Poulpy-Backend-CPU-AVX** is a Rust crate that provides the reference CPU implementation of **`poulpy-hal`**. This crate is used to instantiate projects implemented with **`poulpy-hal`**, **`poulpy-core`** and/or **`poulpy-schemes`**. +**Poulpy-CPU-REF** is the **reference (portable) CPU backend for Poulpy**. -## Example +It implements the Poulpy HAL extension traits without requiring SIMD or specialized CPU instructions, making it suitable for: + +- all CPU architectures (`x86_64`, `aarch64`, `arm`, `riscv64`, โ€ฆ) +- development machines and CI runners +- environments without AVX or other advanced SIMD support + +This backend integrates transparently with: + +- `poulpy-hal` +- `poulpy-core` +- `poulpy-schemes` + +--- + +## When is this backend used? + +`poulpy-cpu-ref` is always available and requires **no compilation flags and no CPU features**. + +It is automatically selected when: + +- the project does not request an optimized backend, or +- the target CPU does not support the requested SIMD backend (e.g., AVX), or +- portability and reproducibility are more important than raw performance. + +No additional configuration is required to use it. + +--- + +## ๐Ÿงช Basic Usage ```rust -use poulpy_backend_cpu_ref::FFT64Ref; +use poulpy_cpu_ref::FFT64Ref; use poulpy_hal::{api::ModuleNew, layouts::Module}; let log_n: usize = 10; -let module = Module = Module::new(1< = Module::::new(1 << log_n); ``` -## Contributors +This works on **all supported platforms and architectures**. -To add your own backend, implement the open extension traits from **`poulpy-hal/oep`** for a struct that implements the `Backend` trait. -This will automatically make your backend compatible with the API of **`poulpy-hal`**, **`poulpy-core`** and **`poulpy-schemes`**. \ No newline at end of file +--- + +## Performance Notes + +`poulpy-cpu-ref` prioritizes: + +* portability +* correctness +* ease of debugging + +For maximum performance on x86_64 CPUs with AVX2 + FMA support, consider enabling the optional optimized backend: + +``` +poulpy-cpu-avx (feature: enable-avx) +``` + +Benchmarks and applications can freely switch between backends without changing source code โ€” backend selection can be handled with feature flags, for example + +```rust +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] +use poulpy_cpu_avx::FFT64Avx as BackendImpl; + +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] +use poulpy_cpu_ref::FFT64Ref as BackendImpl; +``` + +--- + +## ๐Ÿค Contributors + +To implement your own backend (SIMD or accelerator): + +1. Define a backend struct +2. Implement the open extension traits from `poulpy-hal/oep` +3. Implement the `Backend` trait + +Your backend will automatically integrate with: + +* `poulpy-hal` +* `poulpy-core` +* `poulpy-schemes` + +No modifications to those crates are necessary โ€” the HAL provides the extension points. + +--- + +For questions or guidance, feel free to open an issue or discussion in the repository. + +``` diff --git a/poulpy-hal/src/reference/vec_znx/normalize.rs b/poulpy-hal/src/reference/vec_znx/normalize.rs index 139c8a5..021f57c 100644 --- a/poulpy-hal/src/reference/vec_znx/normalize.rs +++ b/poulpy-hal/src/reference/vec_znx/normalize.rs @@ -367,12 +367,8 @@ fn test_vec_znx_normalize_conv() { let out_prec: u32 = (end_size * end_base2k) as u32; - let mut data_want: Vec = (0..n) - .map(|_| Float::with_val(out_prec as u32, 0)) - .collect(); - let mut data_res: Vec = (0..n) - .map(|_| Float::with_val(out_prec as u32, 0)) - .collect(); + let mut data_want: Vec = (0..n).map(|_| Float::with_val(out_prec, 0)).collect(); + let mut data_res: Vec = (0..n).map(|_| Float::with_val(out_prec, 0)).collect(); have.decode_vec_float(end_base2k, 0, &mut data_want); want.decode_vec_float(end_base2k, 0, &mut data_res); diff --git a/poulpy-schemes/Cargo.toml b/poulpy-schemes/Cargo.toml index 2b7da12..34d15aa 100644 --- a/poulpy-schemes/Cargo.toml +++ b/poulpy-schemes/Cargo.toml @@ -9,6 +9,9 @@ repository = "https://github.com/phantomzone-org/poulpy" homepage = "https://github.com/phantomzone-org/poulpy" documentation = "https://docs.rs/poulpy" +[features] +enable-avx = [] + [dependencies] poulpy-cpu-avx = {workspace = true} poulpy-cpu-ref = {workspace = true} diff --git a/poulpy-schemes/benches/circuit_bootstrapping.rs b/poulpy-schemes/benches/circuit_bootstrapping.rs index 2a6cc0e..79f8336 100644 --- a/poulpy-schemes/benches/circuit_bootstrapping.rs +++ b/poulpy-schemes/benches/circuit_bootstrapping.rs @@ -8,8 +8,13 @@ use poulpy_core::{ GLWESecretPreparedFactory, LWE, LWELayout, LWESecret, }, }; -use poulpy_cpu_avx::FFT64Avx; -use poulpy_cpu_ref::FFT64Ref; + +#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))] +pub use poulpy_cpu_avx::FFT64Avx as BackendImpl; + +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))] +pub use poulpy_cpu_ref::FFT64Ref as BackendImpl; + use poulpy_hal::{ api::{ModuleN, ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxRotateInplace}, layouts::{Backend, Module, Scratch, ScratchOwned}, @@ -127,7 +132,7 @@ where } } - for params in [Params { + let params: Params = Params { name: String::from("1-bit"), extension_factor: 1, k_pt: 1, @@ -171,27 +176,22 @@ where rank: 2_u32.into(), }, }, - }] { - let id: BenchmarkId = BenchmarkId::from_parameter(params.name.clone()); - let mut runner = runner::(¶ms); - group.bench_with_input(id, &(), |b, _| b.iter(&mut runner)); - } + }; + + let id: BenchmarkId = BenchmarkId::from_parameter(params.name.clone()); + let mut runner = runner::(¶ms); + group.bench_with_input(id, &(), |b, _| b.iter(&mut runner)); group.finish(); } -fn bench_circuit_bootstrapping_cpu_ref_fft64(c: &mut Criterion) { - benc_circuit_bootstrapping::(c, "fft64_ref"); +fn bench_circuit_bootstrapping_fft64(c: &mut Criterion) { + #[cfg(all(feature = "enable-avx", target_arch = "x86_64"))] + let label = "fft64_avx"; + #[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))] + let label = "fft64_ref"; + benc_circuit_bootstrapping::(c, label); } -fn bench_circuit_bootstrapping_cpu_avx_fft64(c: &mut Criterion) { - benc_circuit_bootstrapping::(c, "fft64_avx"); -} - -criterion_group!( - benches, - bench_circuit_bootstrapping_cpu_ref_fft64, - bench_circuit_bootstrapping_cpu_avx_fft64, -); - +criterion_group!(benches, bench_circuit_bootstrapping_fft64); criterion_main!(benches); diff --git a/poulpy-schemes/examples/circuit_bootstrapping.rs b/poulpy-schemes/examples/circuit_bootstrapping.rs index 67e1cae..8ab8909 100644 --- a/poulpy-schemes/examples/circuit_bootstrapping.rs +++ b/poulpy-schemes/examples/circuit_bootstrapping.rs @@ -8,10 +8,10 @@ use poulpy_core::{ }; use std::time::Instant; -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] use poulpy_cpu_avx::FFT64Avx as BackendImpl; -#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] use poulpy_cpu_ref::FFT64Ref as BackendImpl; use poulpy_hal::{ diff --git a/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_avx.rs b/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_avx.rs index 5a3c8a5..e9012ac 100644 --- a/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_avx.rs +++ b/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_avx.rs @@ -5,7 +5,7 @@ use poulpy_cpu_avx::FFT64Avx; use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI}; static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock> = - LazyLock::new(|| test_suite::TestContext::::new()); + LazyLock::new(test_suite::TestContext::::new); #[test] fn glwe_blind_retriever() { diff --git a/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_ref.rs b/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_ref.rs index 66039cd..e2e0b01 100644 --- a/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_ref.rs +++ b/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/fft64_ref.rs @@ -2,10 +2,10 @@ use std::sync::LazyLock; use poulpy_cpu_ref::FFT64Ref; -use crate::tfhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI}; +use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI}; static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock> = - LazyLock::new(|| test_suite::TestContext::::new()); + LazyLock::new(test_suite::TestContext::::new); #[test] fn glwe_blind_retriever() { diff --git a/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/mod.rs b/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/mod.rs index 18e8e5a..38bb35e 100644 --- a/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/mod.rs +++ b/poulpy-schemes/src/bin_fhe/bdd_arithmetic/tests/mod.rs @@ -1,9 +1,9 @@ pub mod test_suite; #[cfg(test)] -#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] mod fft64_ref; #[cfg(test)] -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] mod fft64_avx; diff --git a/poulpy-schemes/src/bin_fhe/blind_rotation/tests/fft64_ref.rs b/poulpy-schemes/src/bin_fhe/blind_rotation/tests/fft64_ref.rs index 5166951..9f22652 100644 --- a/poulpy-schemes/src/bin_fhe/blind_rotation/tests/fft64_ref.rs +++ b/poulpy-schemes/src/bin_fhe/blind_rotation/tests/fft64_ref.rs @@ -1,7 +1,7 @@ use poulpy_cpu_ref::FFT64Ref; use poulpy_hal::{api::ModuleNew, layouts::Module}; -use crate::tfhe::blind_rotation::{ +use crate::bin_fhe::blind_rotation::{ CGGI, tests::test_suite::{ generic_blind_rotation::test_blind_rotation, diff --git a/poulpy-schemes/src/bin_fhe/blind_rotation/tests/mod.rs b/poulpy-schemes/src/bin_fhe/blind_rotation/tests/mod.rs index a136f7f..b5fa520 100644 --- a/poulpy-schemes/src/bin_fhe/blind_rotation/tests/mod.rs +++ b/poulpy-schemes/src/bin_fhe/blind_rotation/tests/mod.rs @@ -1,9 +1,9 @@ #[cfg(test)] -#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] mod fft64_ref; #[cfg(test)] -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] mod fft64_avx; #[cfg(test)] diff --git a/poulpy-schemes/src/bin_fhe/circuit_bootstrapping/tests/mod.rs b/poulpy-schemes/src/bin_fhe/circuit_bootstrapping/tests/mod.rs index 5fd20ad..b685dbd 100644 --- a/poulpy-schemes/src/bin_fhe/circuit_bootstrapping/tests/mod.rs +++ b/poulpy-schemes/src/bin_fhe/circuit_bootstrapping/tests/mod.rs @@ -1,9 +1,9 @@ pub mod circuit_bootstrapping; #[cfg(test)] -#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))] +#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))] mod fft64_ref; #[cfg(test)] -#[cfg(any(target_arch = "x86_64", target_arch = "x86"))] +#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))] mod fft64_avx;