mirror of
https://github.com/arnaucube/poulpy.git
synced 2026-02-10 05:06:44 +01:00
Update to non-avx builds
This commit is contained in:
52
.github/workflows/ci.yml
vendored
52
.github/workflows/ci.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
with:
|
||||||
submodules: recursive
|
submodules: recursive
|
||||||
@@ -21,7 +21,7 @@ jobs:
|
|||||||
with:
|
with:
|
||||||
components: clippy, rustfmt
|
components: clippy, rustfmt
|
||||||
|
|
||||||
- name: Cache cargo dependencies
|
- name: Cache cargo deps
|
||||||
uses: actions/cache@v4
|
uses: actions/cache@v4
|
||||||
with:
|
with:
|
||||||
path: |
|
path: |
|
||||||
@@ -32,14 +32,48 @@ jobs:
|
|||||||
restore-keys: |
|
restore-keys: |
|
||||||
${{ runner.os }}-cargo-
|
${{ runner.os }}-cargo-
|
||||||
|
|
||||||
- name: Build
|
# Detect whether runner supports AVX2 + FMA
|
||||||
run: cargo build --all-targets
|
- name: Detect AVX support
|
||||||
|
id: avxcheck
|
||||||
- name: Clippy (deny warnings)
|
run: |
|
||||||
run: cargo clippy --workspace --all-targets --all-features
|
if lscpu | grep -qi avx2 && lscpu | grep -qi fma; then
|
||||||
|
echo "supported=true" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "supported=false" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
# rustfmt always runs — unrelated to AVX support
|
||||||
- name: rustfmt (check only)
|
- name: rustfmt (check only)
|
||||||
run: cargo fmt --all --check
|
run: cargo fmt --all --check
|
||||||
|
|
||||||
- name: Run tests
|
# Build / lint / test WITH AVX
|
||||||
run: cargo test --all
|
- name: Build (AVX enabled)
|
||||||
|
if: steps.avxcheck.outputs.supported == 'true'
|
||||||
|
run: |
|
||||||
|
RUSTFLAGS="-C target-feature=+avx2,+fma" \
|
||||||
|
cargo build --workspace --all-targets --features enable-avx
|
||||||
|
|
||||||
|
- name: Clippy (AVX enabled)
|
||||||
|
if: steps.avxcheck.outputs.supported == 'true'
|
||||||
|
run: |
|
||||||
|
RUSTFLAGS="-C target-feature=+avx2,+fma" \
|
||||||
|
cargo clippy --workspace --all-targets --features enable-avx -- -D warnings
|
||||||
|
|
||||||
|
- name: Tests (AVX enabled)
|
||||||
|
if: steps.avxcheck.outputs.supported == 'true'
|
||||||
|
run: |
|
||||||
|
RUSTFLAGS="-C target-feature=+avx2,+fma" \
|
||||||
|
cargo test --workspace --features enable-avx
|
||||||
|
|
||||||
|
# Build / lint / test WITHOUT AVX
|
||||||
|
- name: Build (portable mode)
|
||||||
|
if: steps.avxcheck.outputs.supported == 'false'
|
||||||
|
run: cargo build --workspace --all-targets
|
||||||
|
|
||||||
|
- name: Clippy (portable mode)
|
||||||
|
if: steps.avxcheck.outputs.supported == 'false'
|
||||||
|
run: cargo clippy --workspace --all-targets -- -D warnings
|
||||||
|
|
||||||
|
- name: Tests (portable mode)
|
||||||
|
if: steps.avxcheck.outputs.supported == 'false'
|
||||||
|
run: cargo test --workspace
|
||||||
@@ -8,12 +8,16 @@ repository = "https://github.com/phantomzone-org/poulpy"
|
|||||||
homepage = "https://github.com/phantomzone-org/poulpy"
|
homepage = "https://github.com/phantomzone-org/poulpy"
|
||||||
documentation = "https://docs.rs/poulpy"
|
documentation = "https://docs.rs/poulpy"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
enable-avx = ["dep:poulpy-cpu-avx"]
|
||||||
|
default = ["dep:poulpy-cpu-ref"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
rug = {workspace = true}
|
rug = {workspace = true}
|
||||||
criterion = {workspace = true}
|
criterion = {workspace = true}
|
||||||
poulpy-hal = {workspace = true}
|
poulpy-hal = {workspace = true}
|
||||||
poulpy-cpu-avx = {workspace = true}
|
poulpy-cpu-avx = {workspace = true, optional = true}
|
||||||
poulpy-cpu-ref = {workspace = true}
|
poulpy-cpu-ref = {workspace = true, optional = true}
|
||||||
itertools = {workspace = true}
|
itertools = {workspace = true}
|
||||||
byteorder = {workspace = true}
|
byteorder = {workspace = true}
|
||||||
bytemuck = {workspace = true}
|
bytemuck = {workspace = true}
|
||||||
|
|||||||
@@ -6,7 +6,12 @@ use std::hint::black_box;
|
|||||||
|
|
||||||
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
||||||
|
|
||||||
use poulpy_cpu_ref::FFT64Ref;
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
|
||||||
|
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
|
||||||
|
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
|
||||||
use poulpy_hal::{
|
use poulpy_hal::{
|
||||||
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow},
|
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow},
|
||||||
layouts::{Module, ScalarZnx, ScratchOwned},
|
layouts::{Module, ScalarZnx, ScratchOwned},
|
||||||
@@ -26,7 +31,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn runner(p: Params) -> impl FnMut() {
|
fn runner(p: Params) -> impl FnMut() {
|
||||||
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
|
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
|
||||||
|
|
||||||
let n: Degree = Degree(module.n() as u32);
|
let n: Degree = Degree(module.n() as u32);
|
||||||
let base2k: Base2K = p.base2k;
|
let base2k: Base2K = p.base2k;
|
||||||
@@ -42,8 +47,8 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
|
|||||||
n,
|
n,
|
||||||
base2k,
|
base2k,
|
||||||
k: k_ggsw,
|
k: k_ggsw,
|
||||||
dnum: dnum,
|
dnum,
|
||||||
dsize: dsize,
|
dsize,
|
||||||
rank,
|
rank,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -66,7 +71,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
|
|||||||
let mut ct_glwe_out: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_out_layout);
|
let mut ct_glwe_out: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_out_layout);
|
||||||
let pt_rgsw: ScalarZnx<Vec<u8>> = ScalarZnx::alloc(n.into(), 1);
|
let pt_rgsw: ScalarZnx<Vec<u8>> = ScalarZnx::alloc(n.into(), 1);
|
||||||
|
|
||||||
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
|
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
|
||||||
GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout)
|
GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout)
|
||||||
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout)
|
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout)
|
||||||
| GLWE::external_product_tmp_bytes(&module, &glwe_out_layout, &glwe_in_layout, &ggsw_layout),
|
| GLWE::external_product_tmp_bytes(&module, &glwe_out_layout, &glwe_in_layout, &ggsw_layout),
|
||||||
@@ -79,7 +84,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
|
|||||||
let mut sk: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_in_layout);
|
let mut sk: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_in_layout);
|
||||||
sk.fill_ternary_prob(0.5, &mut source_xs);
|
sk.fill_ternary_prob(0.5, &mut source_xs);
|
||||||
|
|
||||||
let mut sk_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
|
let mut sk_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
|
||||||
sk_dft.prepare(&module, &sk);
|
sk_dft.prepare(&module, &sk);
|
||||||
|
|
||||||
ct_ggsw.encrypt_sk(
|
ct_ggsw.encrypt_sk(
|
||||||
@@ -99,7 +104,7 @@ fn bench_external_product_glwe_fft64(c: &mut Criterion) {
|
|||||||
scratch.borrow(),
|
scratch.borrow(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, FFT64Ref> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
|
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, BackendImpl> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
|
||||||
ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow());
|
ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow());
|
||||||
|
|
||||||
move || {
|
move || {
|
||||||
@@ -138,7 +143,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn runner(p: Params) -> impl FnMut() {
|
fn runner(p: Params) -> impl FnMut() {
|
||||||
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
|
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
|
||||||
|
|
||||||
let n: Degree = Degree(module.n() as u32);
|
let n: Degree = Degree(module.n() as u32);
|
||||||
let base2k: Base2K = p.base2k;
|
let base2k: Base2K = p.base2k;
|
||||||
@@ -153,8 +158,8 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
n,
|
n,
|
||||||
base2k,
|
base2k,
|
||||||
k: k_ggsw,
|
k: k_ggsw,
|
||||||
dnum: dnum,
|
dnum,
|
||||||
dsize: dsize,
|
dsize,
|
||||||
rank,
|
rank,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -169,7 +174,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
let mut ct_glwe: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_layout);
|
let mut ct_glwe: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_layout);
|
||||||
let pt_rgsw: ScalarZnx<Vec<u8>> = ScalarZnx::alloc(n.into(), 1);
|
let pt_rgsw: ScalarZnx<Vec<u8>> = ScalarZnx::alloc(n.into(), 1);
|
||||||
|
|
||||||
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
|
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
|
||||||
GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout)
|
GGSW::encrypt_sk_tmp_bytes(&module, &ggsw_layout)
|
||||||
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout)
|
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout)
|
||||||
| GLWE::external_product_tmp_bytes(&module, &glwe_layout, &glwe_layout, &ggsw_layout),
|
| GLWE::external_product_tmp_bytes(&module, &glwe_layout, &glwe_layout, &ggsw_layout),
|
||||||
@@ -182,7 +187,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
let mut sk: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
|
let mut sk: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
|
||||||
sk.fill_ternary_prob(0.5, &mut source_xs);
|
sk.fill_ternary_prob(0.5, &mut source_xs);
|
||||||
|
|
||||||
let mut sk_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
|
let mut sk_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
|
||||||
sk_dft.prepare(&module, &sk);
|
sk_dft.prepare(&module, &sk);
|
||||||
|
|
||||||
ct_ggsw.encrypt_sk(
|
ct_ggsw.encrypt_sk(
|
||||||
@@ -202,7 +207,7 @@ fn bench_external_product_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
scratch.borrow(),
|
scratch.borrow(),
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, FFT64Ref> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
|
let mut ggsw_prepared: GGSWPrepared<Vec<u8>, BackendImpl> = GGSWPrepared::alloc_from_infos(&module, &ct_ggsw);
|
||||||
ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow());
|
ggsw_prepared.prepare(&module, &ct_ggsw, scratch.borrow());
|
||||||
move || {
|
move || {
|
||||||
let scratch_borrow = scratch.borrow();
|
let scratch_borrow = scratch.borrow();
|
||||||
|
|||||||
@@ -6,7 +6,13 @@ use poulpy_core::layouts::{
|
|||||||
use std::{hint::black_box, time::Duration};
|
use std::{hint::black_box, time::Duration};
|
||||||
|
|
||||||
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
||||||
use poulpy_cpu_ref::FFT64Ref;
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
|
||||||
|
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
|
||||||
|
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
|
||||||
use poulpy_hal::{
|
use poulpy_hal::{
|
||||||
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow},
|
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow},
|
||||||
layouts::{Module, ScratchOwned},
|
layouts::{Module, ScratchOwned},
|
||||||
@@ -27,7 +33,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn runner(p: Params) -> impl FnMut() {
|
fn runner(p: Params) -> impl FnMut() {
|
||||||
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
|
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
|
||||||
|
|
||||||
let n: Degree = Degree(module.n() as u32);
|
let n: Degree = Degree(module.n() as u32);
|
||||||
let base2k: Base2K = p.base2k;
|
let base2k: Base2K = p.base2k;
|
||||||
@@ -66,7 +72,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) {
|
|||||||
let mut ct_in: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_in_layout);
|
let mut ct_in: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_in_layout);
|
||||||
let mut ct_out: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_out_layout);
|
let mut ct_out: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_out_layout);
|
||||||
|
|
||||||
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
|
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
|
||||||
GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_atk_layout)
|
GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_atk_layout)
|
||||||
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout)
|
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_in_layout)
|
||||||
| GLWE::keyswitch_tmp_bytes(
|
| GLWE::keyswitch_tmp_bytes(
|
||||||
@@ -84,7 +90,7 @@ fn bench_keyswitch_glwe_fft64(c: &mut Criterion) {
|
|||||||
let mut sk_in: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_in_layout);
|
let mut sk_in: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_in_layout);
|
||||||
sk_in.fill_ternary_prob(0.5, &mut source_xs);
|
sk_in.fill_ternary_prob(0.5, &mut source_xs);
|
||||||
|
|
||||||
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
|
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
|
||||||
sk_in_dft.prepare(&module, &sk_in);
|
sk_in_dft.prepare(&module, &sk_in);
|
||||||
|
|
||||||
ksk.encrypt_sk(
|
ksk.encrypt_sk(
|
||||||
@@ -150,7 +156,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn runner(p: Params) -> impl FnMut() {
|
fn runner(p: Params) -> impl FnMut() {
|
||||||
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << p.log_n);
|
let module: Module<BackendImpl> = Module::<BackendImpl>::new(1 << p.log_n);
|
||||||
|
|
||||||
let n: Degree = Degree(module.n() as u32);
|
let n: Degree = Degree(module.n() as u32);
|
||||||
let base2k: Base2K = p.base2k;
|
let base2k: Base2K = p.base2k;
|
||||||
@@ -181,7 +187,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
let mut ksk: GLWESwitchingKey<Vec<u8>> = GLWESwitchingKey::alloc_from_infos(&gglwe_layout);
|
let mut ksk: GLWESwitchingKey<Vec<u8>> = GLWESwitchingKey::alloc_from_infos(&gglwe_layout);
|
||||||
let mut ct: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_layout);
|
let mut ct: GLWE<Vec<u8>> = GLWE::alloc_from_infos(&glwe_layout);
|
||||||
|
|
||||||
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
|
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
|
||||||
GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_layout)
|
GLWESwitchingKey::encrypt_sk_tmp_bytes(&module, &gglwe_layout)
|
||||||
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout)
|
| GLWE::encrypt_sk_tmp_bytes(&module, &glwe_layout)
|
||||||
| GLWE::keyswitch_tmp_bytes(&module, &glwe_layout, &glwe_layout, &gglwe_layout),
|
| GLWE::keyswitch_tmp_bytes(&module, &glwe_layout, &glwe_layout, &gglwe_layout),
|
||||||
@@ -194,7 +200,7 @@ fn bench_keyswitch_glwe_inplace_fft64(c: &mut Criterion) {
|
|||||||
let mut sk_in: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
|
let mut sk_in: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
|
||||||
sk_in.fill_ternary_prob(0.5, &mut source_xs);
|
sk_in.fill_ternary_prob(0.5, &mut source_xs);
|
||||||
|
|
||||||
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
|
let mut sk_in_dft: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
|
||||||
sk_in_dft.prepare(&module, &sk_in);
|
sk_in_dft.prepare(&module, &sk_in);
|
||||||
|
|
||||||
let mut sk_out: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
|
let mut sk_out: GLWESecret<Vec<u8>> = GLWESecret::alloc_from_infos(&glwe_layout);
|
||||||
|
|||||||
@@ -5,7 +5,13 @@ use poulpy_core::{
|
|||||||
prepared::GLWESecretPrepared,
|
prepared::GLWESecretPrepared,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
use poulpy_cpu_ref::FFT64Ref;
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
|
||||||
|
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
|
||||||
|
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
|
||||||
use poulpy_hal::{
|
use poulpy_hal::{
|
||||||
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxFillUniform},
|
api::{ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxFillUniform},
|
||||||
layouts::{Module, ScratchOwned},
|
layouts::{Module, ScratchOwned},
|
||||||
@@ -31,7 +37,7 @@ fn main() {
|
|||||||
let rank: Rank = Rank(1);
|
let rank: Rank = Rank(1);
|
||||||
|
|
||||||
// Instantiate Module (DFT Tables)
|
// Instantiate Module (DFT Tables)
|
||||||
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(n.0 as u64);
|
let module: Module<BackendImpl> = Module::<BackendImpl>::new(n.0 as u64);
|
||||||
|
|
||||||
let glwe_ct_infos: GLWELayout = GLWELayout {
|
let glwe_ct_infos: GLWELayout = GLWELayout {
|
||||||
n,
|
n,
|
||||||
@@ -53,7 +59,7 @@ fn main() {
|
|||||||
let mut source_xa: Source = Source::new([2u8; 32]);
|
let mut source_xa: Source = Source::new([2u8; 32]);
|
||||||
|
|
||||||
// Scratch space
|
// Scratch space
|
||||||
let mut scratch: ScratchOwned<FFT64Ref> = ScratchOwned::alloc(
|
let mut scratch: ScratchOwned<BackendImpl> = ScratchOwned::alloc(
|
||||||
GLWE::encrypt_sk_tmp_bytes(&module, &glwe_ct_infos) | GLWE::decrypt_tmp_bytes(&module, &glwe_ct_infos),
|
GLWE::encrypt_sk_tmp_bytes(&module, &glwe_ct_infos) | GLWE::decrypt_tmp_bytes(&module, &glwe_ct_infos),
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -62,7 +68,7 @@ fn main() {
|
|||||||
sk.fill_ternary_prob(0.5, &mut source_xs);
|
sk.fill_ternary_prob(0.5, &mut source_xs);
|
||||||
|
|
||||||
// Backend-prepared secret
|
// Backend-prepared secret
|
||||||
let mut sk_prepared: GLWESecretPrepared<Vec<u8>, FFT64Ref> = GLWESecretPrepared::alloc(&module, rank);
|
let mut sk_prepared: GLWESecretPrepared<Vec<u8>, BackendImpl> = GLWESecretPrepared::alloc(&module, rank);
|
||||||
sk_prepared.prepare(&module, &sk);
|
sk_prepared.prepare(&module, &sk);
|
||||||
|
|
||||||
// Uniform plaintext
|
// Uniform plaintext
|
||||||
|
|||||||
@@ -358,7 +358,7 @@ where
|
|||||||
let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1);
|
let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1);
|
||||||
let mut ksk_infos: GGLWELayout = infos.gglwe_layout();
|
let mut ksk_infos: GGLWELayout = infos.gglwe_layout();
|
||||||
ksk_infos.rank_in = Rank(pairs);
|
ksk_infos.rank_in = Rank(pairs);
|
||||||
let (data, scratch) = self.take_gglwe(infos);
|
let (data, scratch) = self.take_gglwe(&ksk_infos);
|
||||||
(GLWETensorKey(data), scratch)
|
(GLWETensorKey(data), scratch)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -377,7 +377,7 @@ where
|
|||||||
let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1);
|
let pairs: u32 = (((infos.rank_out().0 + 1) * infos.rank_out().0) >> 1).max(1);
|
||||||
let mut ksk_infos: GGLWELayout = infos.gglwe_layout();
|
let mut ksk_infos: GGLWELayout = infos.gglwe_layout();
|
||||||
ksk_infos.rank_in = Rank(pairs);
|
ksk_infos.rank_in = Rank(pairs);
|
||||||
let (data, scratch) = self.take_gglwe_prepared(module, infos);
|
let (data, scratch) = self.take_gglwe_prepared(module, &ksk_infos);
|
||||||
(GLWETensorKeyPrepared(data), scratch)
|
(GLWETensorKeyPrepared(data), scratch)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -4,10 +4,10 @@ pub mod test_suite;
|
|||||||
mod serialization;
|
mod serialization;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
|
||||||
mod poulpy_core {
|
mod poulpy_core {
|
||||||
use poulpy_hal::backend_test_suite;
|
use poulpy_hal::backend_test_suite;
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
|
||||||
backend_test_suite!(
|
backend_test_suite!(
|
||||||
mod cpu_avx,
|
mod cpu_avx,
|
||||||
backend = poulpy_cpu_avx::FFT64Avx,
|
backend = poulpy_cpu_avx::FFT64Avx,
|
||||||
@@ -69,8 +69,13 @@ mod poulpy_core {
|
|||||||
lwe_to_glwe => crate::tests::test_suite::test_lwe_to_glwe,
|
lwe_to_glwe => crate::tests::test_suite::test_lwe_to_glwe,
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
|
||||||
|
mod poulpy_core {
|
||||||
|
use poulpy_hal::backend_test_suite;
|
||||||
|
|
||||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
|
|
||||||
backend_test_suite!(
|
backend_test_suite!(
|
||||||
mod cpu_ref,
|
mod cpu_ref,
|
||||||
backend = poulpy_cpu_ref::FFT64Ref,
|
backend = poulpy_cpu_ref::FFT64Ref,
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ repository = "https://github.com/phantomzone-org/poulpy"
|
|||||||
homepage = "https://github.com/phantomzone-org/poulpy"
|
homepage = "https://github.com/phantomzone-org/poulpy"
|
||||||
documentation = "https://docs.rs/poulpy"
|
documentation = "https://docs.rs/poulpy"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
enable-avx = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
poulpy-hal = {workspace = true}
|
poulpy-hal = {workspace = true}
|
||||||
poulpy-cpu-ref = {workspace = true}
|
poulpy-cpu-ref = {workspace = true}
|
||||||
|
|||||||
@@ -1,8 +1,51 @@
|
|||||||
# 🐙 Poulpy-CPU-REF
|
# 🐙 Poulpy-CPU-AVX
|
||||||
|
|
||||||
**Poulpy-Backend-CPU-AVX** is a Rust crate that provides an AVX accelerated CPU implementation of **`poulpy-hal`**. This crate is used to instantiate projects implemented with **`poulpy-hal`**, **`poulpy-core`** and/or **`poulpy-schemes`**.
|
**Poulpy-CPU-AVX** is a Rust crate that provides an **AVX2 + FMA accelerated CPU backend for Poulpy**.
|
||||||
|
|
||||||
## Example
|
This backend implements the Poulpy HAL extension traits and can be used by:
|
||||||
|
|
||||||
|
- [`poulpy-hal`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-hal)
|
||||||
|
- [`poulpy-core`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-core)
|
||||||
|
- [`poulpy-schemes`](https://github.com/phantomzone-org/poulpy/tree/main/poulpy-schemes)
|
||||||
|
|
||||||
|
## 🚩 Safety and Requirements
|
||||||
|
|
||||||
|
To avoid illegal hardware instructions (SIGILL) on unsupported CPUs, this backend is **opt-in** and **only builds when explicitly requested**.
|
||||||
|
|
||||||
|
| Requirement | Status |
|
||||||
|
|------------|--------|
|
||||||
|
| Cargo feature flag | `--features enable-avx` **must be enabled** |
|
||||||
|
| CPU architecture | `x86_64` |
|
||||||
|
| CPU target features | `AVX2` + `FMA` |
|
||||||
|
|
||||||
|
If `enable-avx` is enabled but the target does not provide these capabilities, the build **fails immediately with a clear error message**, rather than generating invalid binaries.
|
||||||
|
|
||||||
|
When `enable-avx` is **not** enabled, this crate is simply skipped and Poulpy automatically falls back to the portable `poulpy-cpu-ref` backend. This ensure that Poulpy's workspace remains portable (e.g. for macOS ARM).
|
||||||
|
|
||||||
|
## ⚙️ Building with the AVX backend enabled
|
||||||
|
|
||||||
|
Because the compiler must generate AVX2 + FMA instructions, both the Cargo feature and CPU target flags must be specified:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
RUSTFLAGS="-C target-feature=+avx2,+fma" \
|
||||||
|
cargo build --features enable-avx
|
||||||
|
````
|
||||||
|
|
||||||
|
### Running an example
|
||||||
|
|
||||||
|
```bash
|
||||||
|
RUSTFLAGS="-C target-feature=+avx2,+fma" \
|
||||||
|
cargo run --example <name> --features enable-avx
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running benchmarks
|
||||||
|
|
||||||
|
```bash
|
||||||
|
RUSTFLAGS="-C target-feature=+avx2,+fma" \
|
||||||
|
cargo bench --features enable-avx
|
||||||
|
```
|
||||||
|
|
||||||
|
## Basic Usage
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
use poulpy_backend_cpu_avx::FFT64Avx;
|
use poulpy_backend_cpu_avx::FFT64Avx;
|
||||||
@@ -12,7 +55,24 @@ let log_n: usize = 10;
|
|||||||
let module = Module<FFT64Avx> = Module<FFT64Avx>::new(1<<log_n);
|
let module = Module<FFT64Avx> = Module<FFT64Avx>::new(1<<log_n);
|
||||||
```
|
```
|
||||||
|
|
||||||
## Contributors
|
Once compiled with `enable-avx`, the backend is usable transparently anywhere Poulpy expects a backend type (`poulpy-hal`, `poulpy-core`, `poulpy-schemes`).
|
||||||
|
|
||||||
To add your own backend, implement the open extension traits from **`poulpy-hal/oep`** for a struct that implements the `Backend` trait.
|
## 🤝 Contributors
|
||||||
This will automatically make your backend compatible with the API of **`poulpy-hal`**, **`poulpy-core`** and **`poulpy-schemes`**.
|
|
||||||
|
To implement your own Poulpy backend (SIMD or accelerator):
|
||||||
|
|
||||||
|
1. Define a backend struct
|
||||||
|
2. Implement the open extension traits from `poulpy-hal/oep`
|
||||||
|
3. Implement the `Backend` trait
|
||||||
|
|
||||||
|
Your backend will automatically integrate with:
|
||||||
|
|
||||||
|
* `poulpy-hal`
|
||||||
|
* `poulpy-core`
|
||||||
|
* `poulpy-schemes`
|
||||||
|
|
||||||
|
No modifications to those crates are required — the HAL provides the extension points.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
For questions or guidance, feel free to open an issue or discussion in the repository.
|
||||||
|
|||||||
@@ -1,11 +1,17 @@
|
|||||||
#![cfg(target_arch = "x86_64")]
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
use std::hint::black_box;
|
|
||||||
|
|
||||||
use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main};
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
use poulpy_cpu_avx::{ReimFFTAvx, ReimIFFTAvx};
|
fn bench_ifft_avx2_fma(_c: &mut Criterion) {
|
||||||
use poulpy_hal::reference::fft64::reim::{ReimDFTExecute, ReimFFTTable, ReimIFFTTable};
|
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
|
pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
|
||||||
|
use criterion::BenchmarkId;
|
||||||
|
use poulpy_cpu_avx::ReimIFFTAvx;
|
||||||
|
use poulpy_hal::reference::fft64::reim::{ReimDFTExecute, ReimIFFTTable};
|
||||||
|
use std::hint::black_box;
|
||||||
|
|
||||||
let group_name: String = "ifft_avx2_fma".to_string();
|
let group_name: String = "ifft_avx2_fma".to_string();
|
||||||
|
|
||||||
let mut group = c.benchmark_group(group_name);
|
let mut group = c.benchmark_group(group_name);
|
||||||
@@ -41,7 +47,18 @@ pub fn bench_ifft_avx2_fma(c: &mut Criterion) {
|
|||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
|
fn bench_fft_avx2_fma(_c: &mut Criterion) {
|
||||||
|
eprintln!("Skipping: AVX FFT benchmark requires x86_64 + AVX2 + FMA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
pub fn bench_fft_avx2_fma(c: &mut Criterion) {
|
pub fn bench_fft_avx2_fma(c: &mut Criterion) {
|
||||||
|
use criterion::BenchmarkId;
|
||||||
|
use poulpy_cpu_avx::ReimFFTAvx;
|
||||||
|
use poulpy_hal::reference::fft64::reim::{ReimDFTExecute, ReimFFTTable};
|
||||||
|
use std::hint::black_box;
|
||||||
|
|
||||||
let group_name: String = "fft_avx2_fma".to_string();
|
let group_name: String = "fft_avx2_fma".to_string();
|
||||||
|
|
||||||
let mut group = c.benchmark_group(group_name);
|
let mut group = c.benchmark_group(group_name);
|
||||||
|
|||||||
@@ -1,21 +1,36 @@
|
|||||||
#![cfg(target_arch = "x86_64")]
|
|
||||||
// poulpy-backend/benches/vec_znx_add.rs
|
|
||||||
use criterion::{Criterion, criterion_group, criterion_main};
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
use poulpy_cpu_avx::FFT64Avx;
|
|
||||||
use poulpy_hal::reference::vec_znx::{bench_vec_znx_add, bench_vec_znx_automorphism, bench_vec_znx_normalize_inplace};
|
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
|
fn bench_vec_znx_add_cpu_avx_fft64(_c: &mut Criterion) {
|
||||||
|
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
fn bench_vec_znx_add_cpu_avx_fft64(c: &mut Criterion) {
|
fn bench_vec_znx_add_cpu_avx_fft64(c: &mut Criterion) {
|
||||||
bench_vec_znx_add::<FFT64Avx>(c, "FFT64Avx");
|
use poulpy_cpu_avx::FFT64Avx;
|
||||||
|
poulpy_hal::reference::vec_znx::bench_vec_znx_add::<FFT64Avx>(c, "FFT64Avx");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(dead_code)]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
|
fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(_c: &mut Criterion) {
|
||||||
|
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(c: &mut Criterion) {
|
fn bench_vec_znx_normalize_inplace_cpu_avx_fft64(c: &mut Criterion) {
|
||||||
bench_vec_znx_normalize_inplace::<FFT64Avx>(c, "FFT64Avx");
|
use poulpy_cpu_avx::FFT64Avx;
|
||||||
|
poulpy_hal::reference::vec_znx::bench_vec_znx_normalize_inplace::<FFT64Avx>(c, "FFT64Avx");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
|
fn bench_vec_znx_automorphism_cpu_avx_fft64(_c: &mut Criterion) {
|
||||||
|
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
fn bench_vec_znx_automorphism_cpu_avx_fft64(c: &mut Criterion) {
|
fn bench_vec_znx_automorphism_cpu_avx_fft64(c: &mut Criterion) {
|
||||||
bench_vec_znx_automorphism::<FFT64Avx>(c, "FFT64Avx");
|
use poulpy_cpu_avx::FFT64Avx;
|
||||||
|
poulpy_hal::reference::vec_znx::bench_vec_znx_automorphism::<FFT64Avx>(c, "FFT64Avx");
|
||||||
}
|
}
|
||||||
|
|
||||||
criterion_group!(
|
criterion_group!(
|
||||||
|
|||||||
@@ -1,27 +1,15 @@
|
|||||||
// poulpy-backend/benches/vec_znx_add.rs
|
|
||||||
use criterion::{Criterion, criterion_group, criterion_main};
|
use criterion::{Criterion, criterion_group, criterion_main};
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
use poulpy_cpu_avx::FFT64Avx;
|
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
|
||||||
use poulpy_cpu_ref::FFT64Ref;
|
|
||||||
|
|
||||||
use poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft;
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
|
fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(_c: &mut Criterion) {
|
||||||
|
eprintln!("Skipping: AVX IFft benchmark requires x86_64 + AVX2 + FMA");
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(c: &mut Criterion) {
|
fn bench_vmp_apply_dft_to_dft_cpu_avx_fft64(c: &mut Criterion) {
|
||||||
bench_vmp_apply_dft_to_dft::<FFT64Avx>(c, "FFT64Avx");
|
use poulpy_cpu_avx::FFT64Avx;
|
||||||
}
|
poulpy_hal::bench_suite::vmp::bench_vmp_apply_dft_to_dft::<FFT64Avx>(c, "FFT64Avx");
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
|
||||||
fn bench_vmp_apply_dft_to_dft_cpu_ref_fft64(c: &mut Criterion) {
|
|
||||||
bench_vmp_apply_dft_to_dft::<FFT64Ref>(c, "FFT64Ref");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
criterion_group!(benches_x86, bench_vmp_apply_dft_to_dft_cpu_avx_fft64,);
|
criterion_group!(benches_x86, bench_vmp_apply_dft_to_dft_cpu_avx_fft64,);
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
|
||||||
criterion_group!(benches_ref, bench_vmp_apply_dft_to_dft_cpu_ref_fft64,);
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
criterion_main!(benches_x86);
|
criterion_main!(benches_x86);
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
|
||||||
criterion_main!(benches_ref);
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
use itertools::izip;
|
use itertools::izip;
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
|
||||||
use poulpy_hal::{
|
use poulpy_hal::{
|
||||||
@@ -143,4 +143,3 @@ fn main() {
|
|||||||
println!("{}: {} {}", i, a, (*b as f64) / scale);
|
println!("{}: {} {}", i, a, (*b as f64) / scale);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,19 @@
|
|||||||
#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
|
// ─────────────────────────────────────────────────────────────
|
||||||
|
// Build the backend **only when ALL conditions are satisfied**
|
||||||
|
// ─────────────────────────────────────────────────────────────
|
||||||
|
#![cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
|
|
||||||
|
// If the user enables this backend but targets a non-x86_64 CPU → abort
|
||||||
|
#[cfg(all(feature = "enable-avx", not(target_arch = "x86_64")))]
|
||||||
|
compile_error!("feature `enable-avx` requires target_arch = \"x86_64\".");
|
||||||
|
|
||||||
|
// If the user enables this backend but AVX2 isn't enabled in the target → abort
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", not(target_feature = "avx2")))]
|
||||||
|
compile_error!("feature `enable-avx` requires AVX2. Build with RUSTFLAGS=\"-C target-feature=+avx2\".");
|
||||||
|
|
||||||
|
// If the user enables this backend but FMA isn't enabled in the target → abort
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", not(target_feature = "fma")))]
|
||||||
|
compile_error!("feature `enable-avx` requires FMA. Build with RUSTFLAGS=\"-C target-feature=+fma\".");
|
||||||
|
|
||||||
mod module;
|
mod module;
|
||||||
mod reim;
|
mod reim;
|
||||||
|
|||||||
@@ -1,18 +1,93 @@
|
|||||||
# 🐙 Poulpy-CPU-AVX
|
# 🐙 Poulpy-CPU-REF
|
||||||
|
|
||||||
**Poulpy-Backend-CPU-AVX** is a Rust crate that provides the reference CPU implementation of **`poulpy-hal`**. This crate is used to instantiate projects implemented with **`poulpy-hal`**, **`poulpy-core`** and/or **`poulpy-schemes`**.
|
**Poulpy-CPU-REF** is the **reference (portable) CPU backend for Poulpy**.
|
||||||
|
|
||||||
## Example
|
It implements the Poulpy HAL extension traits without requiring SIMD or specialized CPU instructions, making it suitable for:
|
||||||
|
|
||||||
|
- all CPU architectures (`x86_64`, `aarch64`, `arm`, `riscv64`, …)
|
||||||
|
- development machines and CI runners
|
||||||
|
- environments without AVX or other advanced SIMD support
|
||||||
|
|
||||||
|
This backend integrates transparently with:
|
||||||
|
|
||||||
|
- `poulpy-hal`
|
||||||
|
- `poulpy-core`
|
||||||
|
- `poulpy-schemes`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## When is this backend used?
|
||||||
|
|
||||||
|
`poulpy-cpu-ref` is always available and requires **no compilation flags and no CPU features**.
|
||||||
|
|
||||||
|
It is automatically selected when:
|
||||||
|
|
||||||
|
- the project does not request an optimized backend, or
|
||||||
|
- the target CPU does not support the requested SIMD backend (e.g., AVX), or
|
||||||
|
- portability and reproducibility are more important than raw performance.
|
||||||
|
|
||||||
|
No additional configuration is required to use it.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧪 Basic Usage
|
||||||
|
|
||||||
```rust
|
```rust
|
||||||
use poulpy_backend_cpu_ref::FFT64Ref;
|
use poulpy_cpu_ref::FFT64Ref;
|
||||||
use poulpy_hal::{api::ModuleNew, layouts::Module};
|
use poulpy_hal::{api::ModuleNew, layouts::Module};
|
||||||
|
|
||||||
let log_n: usize = 10;
|
let log_n: usize = 10;
|
||||||
let module = Module<FFT64Ref> = Module<FFT64Ref>::new(1<<log_n);
|
let module: Module<FFT64Ref> = Module::<FFT64Ref>::new(1 << log_n);
|
||||||
```
|
```
|
||||||
|
|
||||||
## Contributors
|
This works on **all supported platforms and architectures**.
|
||||||
|
|
||||||
To add your own backend, implement the open extension traits from **`poulpy-hal/oep`** for a struct that implements the `Backend` trait.
|
---
|
||||||
This will automatically make your backend compatible with the API of **`poulpy-hal`**, **`poulpy-core`** and **`poulpy-schemes`**.
|
|
||||||
|
## Performance Notes
|
||||||
|
|
||||||
|
`poulpy-cpu-ref` prioritizes:
|
||||||
|
|
||||||
|
* portability
|
||||||
|
* correctness
|
||||||
|
* ease of debugging
|
||||||
|
|
||||||
|
For maximum performance on x86_64 CPUs with AVX2 + FMA support, consider enabling the optional optimized backend:
|
||||||
|
|
||||||
|
```
|
||||||
|
poulpy-cpu-avx (feature: enable-avx)
|
||||||
|
```
|
||||||
|
|
||||||
|
Benchmarks and applications can freely switch between backends without changing source code — backend selection can be handled with feature flags, for example
|
||||||
|
|
||||||
|
```rust
|
||||||
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
|
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
|
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🤝 Contributors
|
||||||
|
|
||||||
|
To implement your own backend (SIMD or accelerator):
|
||||||
|
|
||||||
|
1. Define a backend struct
|
||||||
|
2. Implement the open extension traits from `poulpy-hal/oep`
|
||||||
|
3. Implement the `Backend` trait
|
||||||
|
|
||||||
|
Your backend will automatically integrate with:
|
||||||
|
|
||||||
|
* `poulpy-hal`
|
||||||
|
* `poulpy-core`
|
||||||
|
* `poulpy-schemes`
|
||||||
|
|
||||||
|
No modifications to those crates are necessary — the HAL provides the extension points.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
For questions or guidance, feel free to open an issue or discussion in the repository.
|
||||||
|
|
||||||
|
```
|
||||||
|
|||||||
@@ -367,12 +367,8 @@ fn test_vec_znx_normalize_conv() {
|
|||||||
|
|
||||||
let out_prec: u32 = (end_size * end_base2k) as u32;
|
let out_prec: u32 = (end_size * end_base2k) as u32;
|
||||||
|
|
||||||
let mut data_want: Vec<Float> = (0..n)
|
let mut data_want: Vec<Float> = (0..n).map(|_| Float::with_val(out_prec, 0)).collect();
|
||||||
.map(|_| Float::with_val(out_prec as u32, 0))
|
let mut data_res: Vec<Float> = (0..n).map(|_| Float::with_val(out_prec, 0)).collect();
|
||||||
.collect();
|
|
||||||
let mut data_res: Vec<Float> = (0..n)
|
|
||||||
.map(|_| Float::with_val(out_prec as u32, 0))
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
have.decode_vec_float(end_base2k, 0, &mut data_want);
|
have.decode_vec_float(end_base2k, 0, &mut data_want);
|
||||||
want.decode_vec_float(end_base2k, 0, &mut data_res);
|
want.decode_vec_float(end_base2k, 0, &mut data_res);
|
||||||
|
|||||||
@@ -9,6 +9,9 @@ repository = "https://github.com/phantomzone-org/poulpy"
|
|||||||
homepage = "https://github.com/phantomzone-org/poulpy"
|
homepage = "https://github.com/phantomzone-org/poulpy"
|
||||||
documentation = "https://docs.rs/poulpy"
|
documentation = "https://docs.rs/poulpy"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
enable-avx = []
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
poulpy-cpu-avx = {workspace = true}
|
poulpy-cpu-avx = {workspace = true}
|
||||||
poulpy-cpu-ref = {workspace = true}
|
poulpy-cpu-ref = {workspace = true}
|
||||||
|
|||||||
@@ -8,10 +8,12 @@ use poulpy_core::{
|
|||||||
GLWESecretPreparedFactory, LWE, LWELayout, LWESecret,
|
GLWESecretPreparedFactory, LWE, LWELayout, LWESecret,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
use poulpy_cpu_avx::FFT64Avx;
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
pub use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
use poulpy_cpu_ref::FFT64Ref;
|
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
|
||||||
|
pub use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
|
||||||
use poulpy_hal::{
|
use poulpy_hal::{
|
||||||
api::{ModuleN, ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxRotateInplace},
|
api::{ModuleN, ModuleNew, ScratchOwnedAlloc, ScratchOwnedBorrow, VecZnxRotateInplace},
|
||||||
@@ -130,7 +132,7 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for params in [Params {
|
let params: Params = Params {
|
||||||
name: String::from("1-bit"),
|
name: String::from("1-bit"),
|
||||||
extension_factor: 1,
|
extension_factor: 1,
|
||||||
k_pt: 1,
|
k_pt: 1,
|
||||||
@@ -174,27 +176,22 @@ where
|
|||||||
rank: 2_u32.into(),
|
rank: 2_u32.into(),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}] {
|
};
|
||||||
|
|
||||||
let id: BenchmarkId = BenchmarkId::from_parameter(params.name.clone());
|
let id: BenchmarkId = BenchmarkId::from_parameter(params.name.clone());
|
||||||
let mut runner = runner::<BE, BRA>(¶ms);
|
let mut runner = runner::<BE, BRA>(¶ms);
|
||||||
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
|
group.bench_with_input(id, &(), |b, _| b.iter(&mut runner));
|
||||||
}
|
|
||||||
|
|
||||||
group.finish();
|
group.finish();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
fn bench_circuit_bootstrapping_fft64(c: &mut Criterion) {
|
||||||
fn bench_circuit_bootstrapping_cpu_ref_fft64(c: &mut Criterion) {
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64"))]
|
||||||
benc_circuit_bootstrapping::<FFT64Ref, CGGI>(c, "fft64_ref");
|
let label = "fft64_avx";
|
||||||
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64")))]
|
||||||
|
let label = "fft64_ref";
|
||||||
|
benc_circuit_bootstrapping::<BackendImpl, CGGI>(c, label);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
criterion_group!(benches, bench_circuit_bootstrapping_fft64);
|
||||||
fn bench_circuit_bootstrapping_cpu_avx_fft64(c: &mut Criterion) {
|
|
||||||
benc_circuit_bootstrapping::<FFT64Avx, CGGI>(c, "fft64_avx");
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(target_arch = "x86_64")]
|
|
||||||
criterion_group!(benches, bench_circuit_bootstrapping_cpu_ref_fft64, bench_circuit_bootstrapping_cpu_avx_fft64,);
|
|
||||||
#[cfg(not(target_arch = "x86_64"))]
|
|
||||||
criterion_group!(benches, bench_circuit_bootstrapping_cpu_ref_fft64,);
|
|
||||||
criterion_main!(benches);
|
criterion_main!(benches);
|
||||||
|
|||||||
@@ -8,10 +8,10 @@ use poulpy_core::{
|
|||||||
};
|
};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
use poulpy_cpu_avx::FFT64Avx as BackendImpl;
|
||||||
|
|
||||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
use poulpy_cpu_ref::FFT64Ref as BackendImpl;
|
||||||
|
|
||||||
use poulpy_hal::{
|
use poulpy_hal::{
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use poulpy_cpu_avx::FFT64Avx;
|
|||||||
use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
|
use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
|
||||||
|
|
||||||
static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock<test_suite::TestContext<CGGI, FFT64Avx>> =
|
static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock<test_suite::TestContext<CGGI, FFT64Avx>> =
|
||||||
LazyLock::new(|| test_suite::TestContext::<CGGI, FFT64Avx>::new());
|
LazyLock::new(test_suite::TestContext::<CGGI, FFT64Avx>::new);
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn glwe_blind_retriever() {
|
fn glwe_blind_retriever() {
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ use poulpy_cpu_ref::FFT64Ref;
|
|||||||
use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
|
use crate::bin_fhe::{bdd_arithmetic::tests::test_suite, blind_rotation::CGGI};
|
||||||
|
|
||||||
static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock<test_suite::TestContext<CGGI, FFT64Ref>> =
|
static TEST_CONTEXT_CGGI_FFT64_REF: LazyLock<test_suite::TestContext<CGGI, FFT64Ref>> =
|
||||||
LazyLock::new(|| test_suite::TestContext::<CGGI, FFT64Ref>::new());
|
LazyLock::new(test_suite::TestContext::<CGGI, FFT64Ref>::new);
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn glwe_blind_retriever() {
|
fn glwe_blind_retriever() {
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
pub mod test_suite;
|
pub mod test_suite;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
mod fft64_ref;
|
mod fft64_ref;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
mod fft64_avx;
|
mod fft64_avx;
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
mod fft64_ref;
|
mod fft64_ref;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
mod fft64_avx;
|
mod fft64_avx;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|||||||
@@ -1,9 +1,9 @@
|
|||||||
pub mod circuit_bootstrapping;
|
pub mod circuit_bootstrapping;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[cfg(not(any(target_arch = "x86_64", target_arch = "x86")))]
|
#[cfg(not(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma")))]
|
||||||
mod fft64_ref;
|
mod fft64_ref;
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
|
#[cfg(all(feature = "enable-avx", target_arch = "x86_64", target_feature = "avx2", target_feature = "fma"))]
|
||||||
mod fft64_avx;
|
mod fft64_avx;
|
||||||
|
|||||||
Reference in New Issue
Block a user