added spqlios as submodule

This commit is contained in:
Jean-Philippe Bossuat
2025-01-27 14:10:59 +01:00
parent 250d1a4942
commit c30f598776
244 changed files with 51 additions and 29899 deletions

3
.gitmodules vendored Normal file
View File

@@ -0,0 +1,3 @@
[submodule "base2k/spqlios-arithmetic"]
path = base2k/spqlios-arithmetic
url = https://github.com/tfhe/spqlios-arithmetic

View File

@@ -1,2 +1,2 @@
[workspace] [workspace]
members = ["math", "sampling", "spqlios", "utils"] members = ["base2k", "rns", "sampling", "utils"]

View File

@@ -1,18 +1,19 @@
use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion}; use criterion::{criterion_group, criterion_main, BenchmarkId, Criterion};
use math::modulus::WordOps; use math::modulus::WordOps;
use math::ring::Ring;
use math::poly::Poly; use math::poly::Poly;
use math::ring::Ring;
fn ntt(c: &mut Criterion) { fn ntt(c: &mut Criterion) {
fn runner<'a, const INPLACE: bool, const LAZY: bool>(
fn runner<'a, const INPLACE: bool, const LAZY:bool>(ring: &'a Ring<u64>) -> Box<dyn FnMut() + 'a > { ring: &'a Ring<u64>,
) -> Box<dyn FnMut() + 'a> {
let mut a: Poly<u64> = ring.new_poly(); let mut a: Poly<u64> = ring.new_poly();
for i in 0..a.n() { for i in 0..a.n() {
a.0[i] = i as u64; a.0[i] = i as u64;
} }
if INPLACE{ if INPLACE {
Box::new(move || ring.ntt_inplace::<LAZY>(&mut a)) Box::new(move || ring.ntt_inplace::<LAZY>(&mut a))
}else{ } else {
let mut b: Poly<u64> = ring.new_poly(); let mut b: Poly<u64> = ring.new_poly();
Box::new(move || ring.ntt::<LAZY>(&a, &mut b)) Box::new(move || ring.ntt::<LAZY>(&a, &mut b))
} }
@@ -24,33 +25,43 @@ fn ntt(c: &mut Criterion) {
c.benchmark_group("ntt"); c.benchmark_group("ntt");
for log_n in 10..17 { for log_n in 10..17 {
let ring = Ring::new(1 << log_n, q, 1);
let ring = Ring::new(1<<log_n, q, 1);
let runners: [(String, Box<dyn FnMut()>); 4] = [ let runners: [(String, Box<dyn FnMut()>); 4] = [
(format!("inplace=true/LAZY=true/q={}", q.log2()), { runner::<true, true>(&ring) }), (format!("inplace=true/LAZY=true/q={}", q.log2()), {
(format!("inplace=true/LAZY=false/q={}", q.log2()), { runner::<true, false>(&ring) }), runner::<true, true>(&ring)
(format!("inplace=false/LAZY=true/q={}", q.log2()), { runner::<false, true>(&ring) }), }),
(format!("inplace=false/LAZY=false/q={}", q.log2()), { runner::<false, false>(&ring) }), (format!("inplace=true/LAZY=false/q={}", q.log2()), {
]; runner::<true, false>(&ring)
}),
(format!("inplace=false/LAZY=true/q={}", q.log2()), {
runner::<false, true>(&ring)
}),
(format!("inplace=false/LAZY=false/q={}", q.log2()), {
runner::<false, false>(&ring)
}),
];
for (name, mut runner) in runners { for (name, mut runner) in runners {
let id: BenchmarkId = BenchmarkId::new(name, format!("n={}", 1 << log_n)); let id: BenchmarkId = BenchmarkId::new(name, format!("n={}", 1 << log_n));
b.bench_with_input(id, &(), |b: &mut criterion::Bencher<'_>, _| b.iter(&mut runner)); b.bench_with_input(id, &(), |b: &mut criterion::Bencher<'_>, _| {
b.iter(&mut runner)
});
} }
} }
} }
fn intt(c: &mut Criterion) { fn intt(c: &mut Criterion) {
fn runner<'a, const INPLACE: bool, const LAZY: bool>(
fn runner<'a, const INPLACE: bool, const LAZY:bool>(ring: &'a Ring<u64>) -> Box<dyn FnMut() + 'a > { ring: &'a Ring<u64>,
) -> Box<dyn FnMut() + 'a> {
let mut a: Poly<u64> = ring.new_poly(); let mut a: Poly<u64> = ring.new_poly();
for i in 0..a.n() { for i in 0..a.n() {
a.0[i] = i as u64; a.0[i] = i as u64;
} }
if INPLACE{ if INPLACE {
Box::new(move || ring.intt_inplace::<LAZY>(&mut a)) Box::new(move || ring.intt_inplace::<LAZY>(&mut a))
}else{ } else {
let mut b: Poly<u64> = ring.new_poly(); let mut b: Poly<u64> = ring.new_poly();
Box::new(move || ring.intt::<LAZY>(&a, &mut b)) Box::new(move || ring.intt::<LAZY>(&a, &mut b))
} }
@@ -62,26 +73,31 @@ fn intt(c: &mut Criterion) {
c.benchmark_group("intt"); c.benchmark_group("intt");
for log_n in 10..17 { for log_n in 10..17 {
let ring = Ring::new(1 << log_n, q, 1);
let ring = Ring::new(1<<log_n, q, 1);
let runners: [(String, Box<dyn FnMut()>); 4] = [ let runners: [(String, Box<dyn FnMut()>); 4] = [
(format!("inplace=true/LAZY=true/q={}", q.log2()), { runner::<true, true>(&ring) }), (format!("inplace=true/LAZY=true/q={}", q.log2()), {
(format!("inplace=true/LAZY=false/q={}", q.log2()), { runner::<true, false>(&ring) }), runner::<true, true>(&ring)
(format!("inplace=false/LAZY=true/q={}", q.log2()), { runner::<false, true>(&ring) }), }),
(format!("inplace=false/LAZY=false/q={}", q.log2()), { runner::<false, false>(&ring) }), (format!("inplace=true/LAZY=false/q={}", q.log2()), {
]; runner::<true, false>(&ring)
}),
(format!("inplace=false/LAZY=true/q={}", q.log2()), {
runner::<false, true>(&ring)
}),
(format!("inplace=false/LAZY=false/q={}", q.log2()), {
runner::<false, false>(&ring)
}),
];
for (name, mut runner) in runners { for (name, mut runner) in runners {
let id: BenchmarkId = BenchmarkId::new(name, format!("n={}", 1 << log_n)); let id: BenchmarkId = BenchmarkId::new(name, format!("n={}", 1 << log_n));
b.bench_with_input(id, &(), |b: &mut criterion::Bencher<'_>, _| b.iter(&mut runner)); b.bench_with_input(id, &(), |b: &mut criterion::Bencher<'_>, _| {
b.iter(&mut runner)
});
} }
} }
} }
criterion_group!( criterion_group!(benches, ntt, intt,);
benches,
ntt,
intt,
);
criterion_main!(benches); criterion_main!(benches);

View File

@@ -1,14 +0,0 @@
# Use the Google style in this project.
BasedOnStyle: Google
# Some folks prefer to write "int& foo" while others prefer "int &foo". The
# Google Style Guide only asks for consistency within a project, we chose
# "int& foo" for this project:
DerivePointerAlignment: false
PointerAlignment: Left
# The Google Style Guide only asks for consistency w.r.t. "east const" vs.
# "const west" alignment of cv-qualifiers. In this project we use "east const".
QualifierAlignment: Left
ColumnLimit: 120

View File

@@ -1,4 +0,0 @@
cmake-build-*
.idea
build/

View File

@@ -1,69 +0,0 @@
cmake_minimum_required(VERSION 3.8)
project(spqlios)
# read the current version from the manifest file
file(READ "manifest.yaml" manifest)
string(REGEX MATCH "version: +(([0-9]+)\\.([0-9]+)\\.([0-9]+))" SPQLIOS_VERSION_BLAH ${manifest})
#message(STATUS "Version: ${SPQLIOS_VERSION_BLAH}")
set(SPQLIOS_VERSION ${CMAKE_MATCH_1})
set(SPQLIOS_VERSION_MAJOR ${CMAKE_MATCH_2})
set(SPQLIOS_VERSION_MINOR ${CMAKE_MATCH_3})
set(SPQLIOS_VERSION_PATCH ${CMAKE_MATCH_4})
message(STATUS "Compiling spqlios-fft version: ${SPQLIOS_VERSION_MAJOR}.${SPQLIOS_VERSION_MINOR}.${SPQLIOS_VERSION_PATCH}")
#set(ENABLE_SPQLIOS_F128 ON CACHE BOOL "Enable float128 via libquadmath")
set(WARNING_PARANOID ON CACHE BOOL "Treat all warnings as errors")
set(ENABLE_TESTING ON CACHE BOOL "Compiles unittests and integration tests")
set(DEVMODE_INSTALL OFF CACHE BOOL "Install private headers and testlib (mainly for CI)")
if (NOT CMAKE_BUILD_TYPE OR CMAKE_BUILD_TYPE STREQUAL "")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type: Release or Debug" FORCE)
endif()
message(STATUS "Build type: ${CMAKE_BUILD_TYPE}")
if (WARNING_PARANOID)
add_compile_options(-Wall -Werror -Wno-unused-command-line-argument)
endif()
message(STATUS "CMAKE_HOST_SYSTEM_NAME: ${CMAKE_HOST_SYSTEM_NAME}")
message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS "CMAKE_SYSTEM_NAME: ${CMAKE_SYSTEM_NAME}")
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(X86 ON)
set(AARCH64 OFF)
else ()
set(X86 OFF)
# set(ENABLE_SPQLIOS_F128 OFF) # float128 are only supported for x86 targets
endif ()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "(aarch64)|(arm64)")
set(AARCH64 ON)
endif ()
if (CMAKE_SYSTEM_NAME MATCHES "(Windows)|(MSYS)")
set(WIN32 ON)
endif ()
if (WIN32)
#overrides for win32
set(X86 OFF)
set(AARCH64 OFF)
set(X86_WIN32 ON)
else()
set(X86_WIN32 OFF)
set(WIN32 OFF)
endif (WIN32)
message(STATUS "--> WIN32: ${WIN32}")
message(STATUS "--> X86_WIN32: ${X86_WIN32}")
message(STATUS "--> X86_LINUX: ${X86}")
message(STATUS "--> AARCH64: ${AARCH64}")
# compiles the main library in spqlios
add_subdirectory(spqlios)
# compiles and activates unittests and itests
if (${ENABLE_TESTING})
enable_testing()
add_subdirectory(test)
endif()

View File

@@ -1,77 +0,0 @@
# Contributing to SPQlios-fft
The spqlios-fft team encourages contributions.
We encourage users to fix bugs, improve the documentation, write tests and to enhance the code, or ask for new features.
We encourage researchers to contribute with implementations of their FFT or NTT algorithms.
In the following we are trying to give some guidance on how to contribute effectively.
## Communication ##
Communication in the spqlios-fft project happens mainly on [GitHub](https://github.com/tfhe/spqlios-fft/issues).
All communications are public, so please make sure to maintain professional behaviour in
all published comments. See [Code of Conduct](https://www.contributor-covenant.org/version/2/1/code_of_conduct/) for
guidelines.
## Reporting Bugs or Requesting features ##
Bug should be filed at [https://github.com/tfhe/spqlios-fft/issues](https://github.com/tfhe/spqlios-fft/issues).
Features can also be requested there, in this case, please ensure that the features you request are self-contained,
easy to define, and generic enough to be used in different use-cases. Please provide an example of use-cases if
possible.
## Setting up topic branches and generating pull requests
This section applies to people that already have write access to the repository. Specific instructions for pull-requests
from public forks will be given later.
To implement some changes, please follow these steps:
- Create a "topic branch". Usually, the branch name should be `username/small-title`
or better `username/issuenumber-small-title` where `issuenumber` is the number of
the github issue number that is tackled.
- Push any needed commits to your branch. Make sure it compiles in `CMAKE_BUILD_TYPE=Debug` and `=Release`, with `-DWARNING_PARANOID=ON`.
- When the branch is nearly ready for review, please open a pull request, and add the label `check-on-arm`
- Do as many commits as necessary until all CI checks pass and all PR comments have been resolved.
> _During the process, you may optionnally use `git rebase -i` to clean up your commit history. If you elect to do so,
please at the very least make sure that nobody else is working or has forked from your branch: the conflicts it would generate
and the human hours to fix them are not worth it. `Git merge` remains the preferred option._
- Finally, when all reviews are positive and all CI checks pass, you may merge your branch via the github webpage.
### Keep your pull requests limited to a single issue
Pull requests should be as small/atomic as possible.
### Coding Conventions
* Please make sure that your code is formatted according to the `.clang-format` file and
that all files end with a newline character.
* Please make sure that all the functions declared in the public api have relevant doxygen comments.
Preferably, functions in the private apis should also contain a brief doxygen description.
### Versions and History
* **Stable API** The project uses semantic versioning on the functions that are listed as `stable` in the documentation. A version has
the form `x.y.z`
* a patch release that increments `z` does not modify the stable API.
* a minor release that increments `y` adds a new feature to the stable API.
* In the unlikely case where we need to change or remove a feature, we will trigger a major release that
increments `x`.
> _If any, we will mark those features as deprecated at least six months before the major release._
* **Experimental API** Features that are not part of the stable section in the documentation are experimental features: you may test them at
your own risk,
but keep in mind that semantic versioning does not apply to them.
> _If you have a use-case that uses an experimental feature, we encourage
> you to tell us about it, so that this feature reaches to the stable section faster!_
* **Version history** The current version is reported in `manifest.yaml`, any change of version comes up with a tag on the main branch, and the history between releases is summarized in `Changelog.md`. It is the main source of truth for anyone who wishes to
get insight about
the history of the repository (not the commit graph).
> Note: _The commit graph of git is for git's internal use only. Its main purpose is to reduce potential merge conflicts to a minimum, even in scenario where multiple features are developped in parallel: it may therefore be non-linear. If, as humans, we like to see a linear history, please read `Changelog.md` instead!_

View File

@@ -1,18 +0,0 @@
# Changelog
All notable changes to this project will be documented in this file.
this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [2.0.0] - 2024-08-21
- Initial release of the `vec_znx` (except convolution products), `vec_rnx` and `zn` apis.
- Hardware acceleration available: AVX2 (most parts)
- APIs are documented in the wiki and are in "beta mode": during the 2.x -> 3.x transition, functions whose API is satisfactory in test projects will pass in "stable mode".
## [1.0.0] - 2023-07-18
- Initial release of the double precision fft on the reim and cplx backends
- Coeffs-space conversions cplx <-> znx32 and tnx32
- FFT-space conversions cplx <-> reim4 layouts
- FFT-space multiplications on the cplx, reim and reim4 layouts.
- In this first release, the only platform supported is linux x86_64 (generic C code, and avx2/fma). It compiles on arm64, but without any acceleration.

View File

@@ -1,201 +0,0 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@@ -1,65 +0,0 @@
# SPQlios library
The SPQlios library provides fast arithmetic for Fully Homomorphic Encryption, and other lattice constructions that arise in post quantum cryptography.
<img src="docs/api-full.svg">
Namely, it is divided into 4 sections:
* The low-level DFT section support FFT over 64-bit floats, as well as NTT modulo one fixed 120-bit modulus. It is an upgrade of the original spqlios-fft module embedded in the TFHE library since 2016. The DFT section exposes the traditional DFT, inverse-DFT, and coefficient-wise multiplications in DFT space.
* The VEC_ZNX section exposes fast algebra over vectors of small integer polynomial modulo $X^N+1$. It proposed in particular efficient (prepared) vector-matrix products, scalar-vector products, convolution products, and element-wise products, operations that naturally occurs on gadget-decomposed Ring-LWE coordinates.
* The RNX section is a simpler variant of VEC_ZNX, to represent single polynomials modulo $X^N+1$ (over the reals or over the torus) when the coefficient precision fits on 64-bit doubles. The small vector-matrix API of the RNX section is particularly adapted to reproducing the fastest CGGI-based bootstrappings.
* The ZN section focuses over vector and matrix algebra over scalars (used by scalar LWE, or scalar key-switches, but also on non-ring schemes like Frodo, FrodoPIR, and SimplePIR).
### A high value target for hardware accelerations
SPQlios is more than a library, it is also a good target for hardware developers.
On one hand, the arithmetic operations that are defined in the library have a clear standalone mathematical definition. And at the same time, the amount of work in each operations is sufficiently large so that meaningful functions only require a few of these.
This makes the SPQlios API a high value target for hardware acceleration, that targets FHE.
### SPQLios is not an FHE library, but a huge enabler
SPQlios itself is not an FHE library: there is no ciphertext, plaintext or key. It is a mathematical library that exposes efficient algebra over polynomials. Using the functions exposed, it is possible to quickly build efficient FHE libraries, with support for the main schemes based on Ring-LWE: BFV, BGV, CGGI, DM, CKKS.
## Dependencies
The SPQLIOS-FFT library is a C library that can be compiled with a standard C compiler, and depends only on libc and libm. The API
interface can be used in a regular C code, and any other language via classical foreign APIs.
The unittests and integration tests are in an optional part of the code, and are written in C++. These tests rely on
[```benchmark```](https://github.com/google/benchmark), and [```gtest```](https://github.com/google/googletest) libraries, and therefore require a C++17 compiler.
Currently, the project has been tested with the gcc,g++ >= 11.3.0 compiler under Linux (x86_64). In the future, we plan to
extend the compatibility to other compilers, platforms and operating systems.
## Installation
The library uses a classical ```cmake``` build mechanism: use ```cmake``` to create a ```build``` folder in the top level directory and run ```make``` from inside it. This assumes that the standard tool ```cmake``` is already installed on the system, and an up-to-date c++ compiler (i.e. g++ >=11.3.0) as well.
It will compile the shared library in optimized mode, and ```make install``` install it to the desired prefix folder (by default ```/usr/local/lib```).
If you want to choose additional compile options (i.e. other installation folder, debug mode, tests), you need to run cmake manually and pass the desired options:
```
mkdir build
cd build
cmake ../src -CMAKE_INSTALL_PREFIX=/usr/
make
```
The available options are the following:
| Variable Name | values |
| -------------------- | ------------------------------------------------------------ |
| CMAKE_INSTALL_PREFIX | */usr/local* installation folder (libs go in lib/ and headers in include/) |
| WARNING_PARANOID | All warnings are shown and treated as errors. Off by default |
| ENABLE_TESTING | Compiles unit tests and integration tests |
------
<img src="docs/logo-sandboxaq-black.svg">
<img src="docs/logo-inpher1.png">

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 550 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

View File

@@ -1,139 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 24.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg
version="1.1"
id="Layer_1"
x="0px"
y="0px"
viewBox="0 0 270 49.4"
style="enable-background:new 0 0 270 49.4;"
xml:space="preserve"
sodipodi:docname="logo-sandboxaq-black.svg"
inkscape:version="1.3.2 (1:1.3.2+202311252150+091e20ef0f)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg"><defs
id="defs9839">
</defs><sodipodi:namedview
id="namedview9837"
pagecolor="#ffffff"
bordercolor="#000000"
borderopacity="0.25"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
showgrid="false"
inkscape:zoom="1.194332"
inkscape:cx="135.64068"
inkscape:cy="25.118645"
inkscape:window-width="804"
inkscape:window-height="436"
inkscape:window-x="190"
inkscape:window-y="27"
inkscape:window-maximized="0"
inkscape:current-layer="Layer_1" />
<style
type="text/css"
id="style9786">
.st0{fill:#EBB028;}
.st1{fill:#FFFFFF;}
</style>
<text
transform="matrix(1 0 0 1 393.832 -491.944)"
class="st1"
style="font-family:'Satoshi-Medium'; font-size:86.2078px;"
id="text9788">SANDBOX </text>
<text
transform="matrix(1 0 0 1 896.332 -491.944)"
class="st1"
style="font-family:'Satoshi-Black'; font-size:86.2078px;"
id="text9790">AQ</text>
<g
id="g9808">
<g
id="g9800">
<g
id="g9798">
<path
class="st0"
d="m 8.9,9.7 v 3.9 l 29.6,17.1 v 2.7 c 0,1.2 -0.6,2.3 -1.6,2.9 L 31,39.8 v -4 L 1.4,18.6 V 15.9 C 1.4,14.7 2,13.6 3.1,13 Z"
id="path9792" />
<path
class="st0"
d="M 18.3,45.1 3.1,36.3 C 2.1,35.7 1.4,34.6 1.4,33.4 V 26 L 28,41.4 21.5,45.1 c -0.9,0.6 -2.2,0.6 -3.2,0 z"
id="path9794" />
<path
class="st0"
d="m 21.6,4.3 15.2,8.8 c 1,0.6 1.7,1.7 1.7,2.9 v 7.5 L 11.8,8 18.3,4.3 c 1,-0.6 2.3,-0.6 3.3,0 z"
id="path9796" />
</g>
</g>
<g
id="g9806">
<polygon
class="st0"
points="248.1,23.2 248.1,30 251.4,33.8 257.3,33.8 "
id="polygon9802" />
<path
class="st0"
d="m 246.9,31 -0.1,-0.1 h -0.1 c -0.2,0 -0.4,0 -0.6,0 -3.5,0 -5.7,-2.6 -5.7,-6.7 0,-4.1 2.2,-6.7 5.7,-6.7 3.5,0 5.7,2.6 5.7,6.7 0,0.3 0,0.6 0,0.9 l 3.6,4.2 c 0.7,-1.5 1,-3.2 1,-5.1 0,-6.5 -4.2,-11 -10.3,-11 -6.1,0 -10.3,4.5 -10.3,11 0,6.5 4.2,11 10.3,11 1.2,0 2.3,-0.2 3.4,-0.5 l 0.5,-0.2 z"
id="path9804" />
</g>
</g><g
id="g9824"
style="fill:#1a1a1a">
<path
class="st1"
d="m 58.7,13.2 c 4.6,0 7.4,2.5 7.4,6.5 h -4.6 c 0,-1.5 -1.1,-2.4 -2.9,-2.4 -1.9,0 -3.1,0.9 -3.1,2.3 0,1.3 0.7,1.9 2.2,2.2 l 3.2,0.7 c 3.8,0.8 5.6,2.6 5.6,5.9 0,4.1 -3.2,6.8 -8.1,6.8 -4.7,0 -7.8,-2.6 -7.8,-6.5 h 4.6 c 0,1.6 1.1,2.4 3.2,2.4 2.1,0 3.4,-0.8 3.4,-2.2 0,-1.2 -0.5,-1.8 -2,-2.1 l -3.2,-0.7 c -3.8,-0.8 -5.7,-2.9 -5.7,-6.4 0,-3.7 3.2,-6.5 7.8,-6.5 z"
id="path9810"
style="fill:#1a1a1a" />
<path
class="st1"
d="M 70.4,34.9 78,13.6 h 4.5 l 7.6,21.3 h -4.9 l -1.5,-4.5 h -6.9 l -1.5,4.5 z m 7.7,-8.4 h 4.2 L 80.8,22 c -0.2,-0.7 -0.5,-1.6 -0.6,-2.1 -0.1,0.5 -0.3,1.3 -0.6,2.1 z"
id="path9812"
style="fill:#1a1a1a" />
<path
class="st1"
d="M 95.3,34.9 V 13.6 h 4.6 l 9,13.5 V 13.6 h 4.6 v 21.3 h -4.6 l -9,-13.5 v 13.5 z"
id="path9814"
style="fill:#1a1a1a" />
<path
class="st1"
d="M 120.7,34.9 V 13.6 h 8 c 6.2,0 10.6,4.4 10.6,10.7 0,6.2 -4.2,10.6 -10.3,10.6 z m 4.7,-17 v 12.6 h 3.2 c 3.7,0 5.8,-2.3 5.8,-6.3 0,-4 -2.3,-6.4 -6.1,-6.4 h -2.9 z"
id="path9816"
style="fill:#1a1a1a" />
<path
class="st1"
d="m 145.4,13.6 h 8.8 c 4.3,0 6.9,2.2 6.9,5.9 0,2.3 -1,3.9 -3,4.8 2.1,0.7 3.2,2.3 3.2,4.7 0,3.8 -2.5,5.9 -7.1,5.9 h -8.8 z m 4.7,4.1 v 4.6 h 3.7 c 1.7,0 2.6,-0.8 2.6,-2.4 0,-1.5 -0.9,-2.3 -2.6,-2.3 h -3.7 z m 0,8.5 v 4.6 h 3.9 c 1.7,0 2.6,-0.8 2.6,-2.4 0,-1.4 -0.9,-2.2 -2.6,-2.2 z"
id="path9818"
style="fill:#1a1a1a" />
<path
class="st1"
d="m 176.5,35.2 c -6.1,0 -10.4,-4.5 -10.4,-11 0,-6.5 4.3,-11 10.4,-11 6.2,0 10.4,4.5 10.4,11 0,6.5 -4.2,11 -10.4,11 z m 0.1,-17.5 c -3.4,0 -5.5,2.4 -5.5,6.5 0,4.1 2.1,6.5 5.5,6.5 3.4,0 5.5,-2.5 5.5,-6.5 0,-4 -2.1,-6.5 -5.5,-6.5 z"
id="path9820"
style="fill:#1a1a1a" />
<path
class="st1"
d="m 190.4,13.6 h 5.5 l 1.8,2.8 c 0.8,1.2 1.5,2.5 2.5,4.3 l 4.3,-7 h 5.4 l -6.7,10.6 6.7,10.6 h -5.5 L 203,32.7 c -1.1,-1.7 -1.8,-3 -2.8,-4.9 l -4.6,7.1 h -5.5 l 7.1,-10.6 z"
id="path9822"
style="fill:#1a1a1a" />
</g><path
class="st0"
d="m 229,34.9 h 4.7 L 226,13.6 h -4.3 L 214,34.8 h 4.6 l 1.6,-4.5 h 7.1 z m -5.1,-14.6 c 0,0 0,0 0,0 0,-0.1 0,-0.1 0,0 l 2.2,6.2 h -4.4 z"
id="path9826" /><g
id="g9832">
<path
class="st1"
d="m 259.5,11.2 h 3.9 v 1 h -1.3 v 3.1 h -1.3 v -3.1 h -1.3 z m 4.5,0 h 1.7 l 0.6,2.5 0.6,-2.5 h 1.7 v 4.1 h -1 v -3.1 l -0.8,3.1 h -0.9 l -0.8,-3.1 v 3.1 h -1 v -4.1 z"
id="path9830" />
</g>
</svg>

Before

Width:  |  Height:  |  Size: 5.0 KiB

View File

@@ -1,133 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Generator: Adobe Illustrator 24.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0) -->
<svg
version="1.1"
id="Layer_1"
x="0px"
y="0px"
viewBox="0 0 270 49.4"
style="enable-background:new 0 0 270 49.4;"
xml:space="preserve"
sodipodi:docname="logo-sandboxaq-white.svg"
inkscape:version="1.2.2 (1:1.2.2+202212051551+b0a8486541)"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg"><defs
id="defs9839" /><sodipodi:namedview
id="namedview9837"
pagecolor="#ffffff"
bordercolor="#000000"
borderopacity="0.25"
inkscape:showpageshadow="2"
inkscape:pageopacity="0.0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
showgrid="false"
inkscape:zoom="2.3886639"
inkscape:cx="135.22204"
inkscape:cy="25.327967"
inkscape:window-width="1072"
inkscape:window-height="688"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="Layer_1" />
<style
type="text/css"
id="style9786">
.st0{fill:#EBB028;}
.st1{fill:#FFFFFF;}
</style>
<text
transform="matrix(1 0 0 1 393.832 -491.944)"
class="st1"
style="font-family:'Satoshi-Medium'; font-size:86.2078px;"
id="text9788">SANDBOX </text>
<text
transform="matrix(1 0 0 1 896.332 -491.944)"
class="st1"
style="font-family:'Satoshi-Black'; font-size:86.2078px;"
id="text9790">AQ</text>
<g
id="g9834">
<g
id="g9828">
<g
id="g9808">
<g
id="g9800">
<g
id="g9798">
<path
class="st0"
d="M8.9,9.7v3.9l29.6,17.1v2.7c0,1.2-0.6,2.3-1.6,2.9L31,39.8v-4L1.4,18.6v-2.7c0-1.2,0.6-2.3,1.7-2.9 L8.9,9.7z"
id="path9792" />
<path
class="st0"
d="M18.3,45.1L3.1,36.3c-1-0.6-1.7-1.7-1.7-2.9V26L28,41.4l-6.5,3.7C20.6,45.7,19.3,45.7,18.3,45.1z"
id="path9794" />
<path
class="st0"
d="M21.6,4.3l15.2,8.8c1,0.6,1.7,1.7,1.7,2.9v7.5L11.8,8l6.5-3.7C19.3,3.7,20.6,3.7,21.6,4.3z"
id="path9796" />
</g>
</g>
<g
id="g9806">
<polygon
class="st0"
points="248.1,23.2 248.1,30 251.4,33.8 257.3,33.8 "
id="polygon9802" />
<path
class="st0"
d="M246.9,31l-0.1-0.1l-0.1,0c-0.2,0-0.4,0-0.6,0c-3.5,0-5.7-2.6-5.7-6.7c0-4.1,2.2-6.7,5.7-6.7 s5.7,2.6,5.7,6.7c0,0.3,0,0.6,0,0.9l3.6,4.2c0.7-1.5,1-3.2,1-5.1c0-6.5-4.2-11-10.3-11c-6.1,0-10.3,4.5-10.3,11s4.2,11,10.3,11 c1.2,0,2.3-0.2,3.4-0.5l0.5-0.2L246.9,31z"
id="path9804" />
</g>
</g>
<g
id="g9824">
<path
class="st1"
d="M58.7,13.2c4.6,0,7.4,2.5,7.4,6.5h-4.6c0-1.5-1.1-2.4-2.9-2.4c-1.9,0-3.1,0.9-3.1,2.3c0,1.3,0.7,1.9,2.2,2.2 l3.2,0.7c3.8,0.8,5.6,2.6,5.6,5.9c0,4.1-3.2,6.8-8.1,6.8c-4.7,0-7.8-2.6-7.8-6.5h4.6c0,1.6,1.1,2.4,3.2,2.4 c2.1,0,3.4-0.8,3.4-2.2c0-1.2-0.5-1.8-2-2.1l-3.2-0.7c-3.8-0.8-5.7-2.9-5.7-6.4C50.9,16,54.1,13.2,58.7,13.2z"
id="path9810" />
<path
class="st1"
d="M70.4,34.9L78,13.6h4.5l7.6,21.3h-4.9l-1.5-4.5h-6.9l-1.5,4.5H70.4z M78.1,26.5h4.2L80.8,22 c-0.2-0.7-0.5-1.6-0.6-2.1c-0.1,0.5-0.3,1.3-0.6,2.1L78.1,26.5z"
id="path9812" />
<path
class="st1"
d="M95.3,34.9V13.6h4.6l9,13.5V13.6h4.6v21.3h-4.6l-9-13.5v13.5H95.3z"
id="path9814" />
<path
class="st1"
d="M120.7,34.9V13.6h8c6.2,0,10.6,4.4,10.6,10.7c0,6.2-4.2,10.6-10.3,10.6H120.7z M125.4,17.9v12.6h3.2 c3.7,0,5.8-2.3,5.8-6.3c0-4-2.3-6.4-6.1-6.4H125.4z"
id="path9816" />
<path
class="st1"
d="M145.4,13.6h8.8c4.3,0,6.9,2.2,6.9,5.9c0,2.3-1,3.9-3,4.8c2.1,0.7,3.2,2.3,3.2,4.7c0,3.8-2.5,5.9-7.1,5.9 h-8.8V13.6z M150.1,17.7v4.6h3.7c1.7,0,2.6-0.8,2.6-2.4c0-1.5-0.9-2.3-2.6-2.3H150.1z M150.1,26.2v4.6h3.9c1.7,0,2.6-0.8,2.6-2.4 c0-1.4-0.9-2.2-2.6-2.2H150.1z"
id="path9818" />
<path
class="st1"
d="M176.5,35.2c-6.1,0-10.4-4.5-10.4-11s4.3-11,10.4-11c6.2,0,10.4,4.5,10.4,11S182.7,35.2,176.5,35.2z M176.6,17.7c-3.4,0-5.5,2.4-5.5,6.5c0,4.1,2.1,6.5,5.5,6.5c3.4,0,5.5-2.5,5.5-6.5C182.1,20.2,180,17.7,176.6,17.7z"
id="path9820" />
<path
class="st1"
d="M190.4,13.6h5.5l1.8,2.8c0.8,1.2,1.5,2.5,2.5,4.3l4.3-7h5.4l-6.7,10.6l6.7,10.6h-5.5l-1.4-2.2 c-1.1-1.7-1.8-3-2.8-4.9l-4.6,7.1h-5.5l7.1-10.6L190.4,13.6z"
id="path9822" />
</g>
<path
class="st0"
d="M229,34.9h4.7L226,13.6h-4.3l-7.7,21.2h4.6l1.6-4.5h7.1L229,34.9z M223.9,20.3 C223.9,20.3,223.9,20.3,223.9,20.3C223.9,20.2,223.9,20.2,223.9,20.3l2.2,6.2h-4.4L223.9,20.3z"
id="path9826" />
</g>
<g
id="g9832">
<path
class="st1"
d="M259.5,11.2h3.9v1h-1.3v3.1h-1.3v-3.1h-1.3V11.2L259.5,11.2z M264,11.2h1.7l0.6,2.5l0.6-2.5h1.7v4.1h-1v-3.1 l-0.8,3.1h-0.9l-0.8-3.1v3.1h-1V11.2L264,11.2z"
id="path9830" />
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -1,2 +0,0 @@
library: spqlios-fft
version: 2.0.0

View File

@@ -1,27 +0,0 @@
#!/bin/sh
# this script generates one tag if there is a version change in manifest.yaml
cd `dirname $0`/..
if [ "v$1" = "v-y" ]; then
echo "production mode!";
fi
changes=`git diff HEAD~1..HEAD -- manifest.yaml | grep 'version:'`
oldversion=$(echo "$changes" | grep '^-version:' | cut '-d ' -f2)
version=$(echo "$changes" | grep '^+version:' | cut '-d ' -f2)
echo "Versions: $oldversion --> $version"
if [ "v$oldversion" = "v$version" ]; then
echo "Same version - nothing to do"; exit 0;
fi
if [ "v$1" = "v-y" ]; then
git config user.name github-actions
git config user.email github-actions@github.com
git tag -a "v$version" -m "Version $version"
git push origin "v$version"
else
cat <<EOF
# the script would do:
git tag -a "v$version" -m "Version $version"
git push origin "v$version"
EOF
fi

View File

@@ -1,102 +0,0 @@
#!/bin/sh
# ONLY USE A PREFIX YOU ARE CONFIDENT YOU CAN WIPE OUT ENTIRELY
CI_INSTALL_PREFIX=/opt/spqlios
CI_REPO_URL=https://spq-dav.algonics.net/ci
WORKDIR=`pwd`
if [ "x$DESTDIR" = "x" ]; then
DESTDIR=/
else
mkdir -p $DESTDIR
DESTDIR=`realpath $DESTDIR`
fi
DIR=`dirname "$0"`
cd $DIR/..
DIR=`pwd`
FULL_UNAME=`uname -a | tr '[A-Z]' '[a-z]'`
HOST=`echo $FULL_UNAME | sed 's/ .*//'`
ARCH=none
case "$HOST" in
*linux*)
DISTRIB=`lsb_release -c | awk '{print $2}' | tr '[A-Z]' '[a-z]'`
HOST=linux-$DISTRIB
;;
*darwin*)
HOST=darwin
;;
*mingw*|*msys*)
DISTRIB=`echo $MSYSTEM | tr '[A-Z]' '[a-z]'`
HOST=msys64-$DISTRIB
;;
*)
echo "Host unknown: $HOST";
exit 1
esac
case "$FULL_UNAME" in
*x86_64*)
ARCH=x86_64
;;
*aarch64*)
ARCH=aarch64
;;
*arm64*)
ARCH=arm64
;;
*)
echo "Architecture unknown: $FULL_UNAME";
exit 1
esac
UNAME="$HOST-$ARCH"
CMH=
if [ -d lib/spqlios/.git ]; then
CMH=`git submodule status | sed 's/\(..........\).*/\1/'`
else
CMH=`git rev-parse HEAD | sed 's/\(..........\).*/\1/'`
fi
FNAME=spqlios-arithmetic-$CMH-$UNAME.tar.gz
cat <<EOF
================= CI MINI-PACKAGER ==================
Work Dir: WORKDIR=$WORKDIR
Spq Dir: DIR=$DIR
Install Root: DESTDIR=$DESTDIR
Install Prefix: CI_INSTALL_PREFIX=$CI_INSTALL_PREFIX
Archive Name: FNAME=$FNAME
CI WebDav: CI_REPO_URL=$CI_REPO_URL
=====================================================
EOF
if [ "x$1" = "xcreate" ]; then
rm -rf dist
cmake -B build -S . -DCMAKE_INSTALL_PREFIX="$CI_INSTALL_PREFIX" -DCMAKE_BUILD_TYPE=Release -DENABLE_TESTING=ON -DWARNING_PARANOID=ON -DDEVMODE_INSTALL=ON || exit 1
cmake --build build || exit 1
rm -rf "$DIR/dist" 2>/dev/null
rm -f "$DIR/$FNAME" 2>/dev/null
DESTDIR="$DIR/dist" cmake --install build || exit 1
if [ -d "$DIR/dist$CI_INSTALL_PREFIX" ]; then
tar -C "$DIR/dist" -cvzf "$DIR/$FNAME" .
else
# fix since msys can mess up the paths
REAL_DEST=`find "$DIR/dist" -type d -exec test -d "{}$CI_INSTALL_PREFIX" \; -print`
echo "REAL_DEST: $REAL_DEST"
[ -d "$REAL_DEST$CI_INSTALL_PREFIX" ] && tar -C "$REAL_DEST" -cvzf "$DIR/$FNAME" .
fi
[ -f "$DIR/$FNAME" ] || { echo "failed to create $DIR/$FNAME"; exit 1; }
[ "x$CI_CREDS" = "x" ] && { echo "CI_CREDS is not set: not uploading"; exit 1; }
curl -u "$CI_CREDS" -T "$DIR/$FNAME" "$CI_REPO_URL/$FNAME"
fi
if [ "x$1" = "xinstall" ]; then
[ "x$CI_CREDS" = "x" ] && { echo "CI_CREDS is not set: not downloading"; exit 1; }
# cleaning
rm -rf "$DESTDIR$CI_INSTALL_PREFIX"/* 2>/dev/null
rm -f "$DIR/$FNAME" 2>/dev/null
# downloading
curl -u "$CI_CREDS" -o "$DIR/$FNAME" "$CI_REPO_URL/$FNAME"
[ -f "$DIR/$FNAME" ] || { echo "failed to download $DIR/$FNAME"; exit 0; }
# installing
mkdir -p $DESTDIR
tar -C "$DESTDIR" -xvzf "$DIR/$FNAME"
exit 0
fi

View File

@@ -1,181 +0,0 @@
#!/usr/bin/perl
##
## This script will help update manifest.yaml and Changelog.md before a release
## Any merge to master that changes the version line in manifest.yaml
## is considered as a new release.
##
## When ready to make a release, please run ./scripts/prepare-release
## and commit push the final result!
use File::Basename;
use Cwd 'abs_path';
# find its way to the root of git's repository
my $scriptsdirname = dirname(abs_path(__FILE__));
chdir "$scriptsdirname/..";
print "✓ Entering directory:".`pwd`;
# ensures that the current branch is ahead of origin/main
my $diff= `git diff`;
chop $diff;
if ($diff =~ /./) {
die("ERROR: Please commit all the changes before calling the prepare-release script.");
} else {
print("✓ All changes are comitted.\n");
}
system("git fetch origin");
my $vcount = `git rev-list --left-right --count origin/main...HEAD`;
$vcount =~ /^([0-9]+)[ \t]*([0-9]+)$/;
if ($2>0) {
die("ERROR: the current HEAD is not ahead of origin/main\n. Please use git merge origin/main.");
} else {
print("✓ Current HEAD is up to date with origin/main.\n");
}
mkdir ".changes";
my $currentbranch = `git rev-parse --abbrev-ref HEAD`;
chop $currentbranch;
$currentbranch =~ s/[^a-zA-Z._-]+/-/g;
my $changefile=".changes/$currentbranch.md";
my $origmanifestfile=".changes/$currentbranch--manifest.yaml";
my $origchangelogfile=".changes/$currentbranch--Changelog.md";
my $exit_code=system("wget -O $origmanifestfile https://raw.githubusercontent.com/tfhe/spqlios-fft/main/manifest.yaml");
if ($exit_code!=0 or ! -f $origmanifestfile) {
die("ERROR: failed to download manifest.yaml");
}
$exit_code=system("wget -O $origchangelogfile https://raw.githubusercontent.com/tfhe/spqlios-fft/main/Changelog.md");
if ($exit_code!=0 or ! -f $origchangelogfile) {
die("ERROR: failed to download Changelog.md");
}
# read the current version (from origin/main manifest)
my $vmajor = 0;
my $vminor = 0;
my $vpatch = 0;
my $versionline = `grep '^version: ' $origmanifestfile | cut -d" " -f2`;
chop $versionline;
if (not $versionline =~ /^([0-9]+)\.([0-9]+)\.([0-9]+)$/) {
die("ERROR: invalid version in manifest file: $versionline\n");
} else {
$vmajor = int($1);
$vminor = int($2);
$vpatch = int($3);
}
print "Version in manifest file: $vmajor.$vminor.$vpatch\n";
if (not -f $changefile) {
## create a changes file
open F,">$changefile";
print F "# Changefile for branch $currentbranch\n\n";
print F "## Type of release (major,minor,patch)?\n\n";
print F "releasetype: patch\n\n";
print F "## What has changed (please edit)?\n\n";
print F "- This has changed.\n";
close F;
}
system("editor $changefile");
# compute the new version
my $nvmajor;
my $nvminor;
my $nvpatch;
my $changelog;
my $recordchangelog=0;
open F,"$changefile";
while ($line=<F>) {
chop $line;
if ($recordchangelog) {
($line =~ /^$/) and next;
$changelog .= "$line\n";
next;
}
if ($line =~ /^releasetype *: *patch *$/) {
$nvmajor=$vmajor;
$nvminor=$vminor;
$nvpatch=$vpatch+1;
}
if ($line =~ /^releasetype *: *minor *$/) {
$nvmajor=$vmajor;
$nvminor=$vminor+1;
$nvpatch=0;
}
if ($line =~ /^releasetype *: *major *$/) {
$nvmajor=$vmajor+1;
$nvminor=0;
$nvpatch=0;
}
if ($line =~ /^## What has changed/) {
$recordchangelog=1;
}
}
close F;
print "New version: $nvmajor.$nvminor.$nvpatch\n";
print "Changes:\n$changelog";
# updating manifest.yaml
open F,"manifest.yaml";
open G,">.changes/manifest.yaml";
while ($line=<F>) {
if ($line =~ /^version *: */) {
print G "version: $nvmajor.$nvminor.$nvpatch\n";
next;
}
print G $line;
}
close F;
close G;
# updating Changelog.md
open F,"$origchangelogfile";
open G,">.changes/Changelog.md";
print G <<EOF
# Changelog
All notable changes to this project will be documented in this file.
this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
EOF
;
print G "## [$nvmajor.$nvminor.$nvpatch] - ".`date '+%Y-%m-%d'`."\n";
print G "$changelog\n";
my $skip_section=1;
while ($line=<F>) {
if ($line =~ /^## +\[([0-9]+)\.([0-9]+)\.([0-9]+)\] +/) {
if ($1>$nvmajor) {
die("ERROR: found larger version $1.$2.$3 in the Changelog.md\n");
} elsif ($1<$nvmajor) {
$skip_section=0;
} elsif ($2>$nvminor) {
die("ERROR: found larger version $1.$2.$3 in the Changelog.md\n");
} elsif ($2<$nvminor) {
$skip_section=0;
} elsif ($3>$nvpatch) {
die("ERROR: found larger version $1.$2.$3 in the Changelog.md\n");
} elsif ($2<$nvpatch) {
$skip_section=0;
} else {
$skip_section=1;
}
}
($skip_section) and next;
print G $line;
}
close F;
close G;
print "-------------------------------------\n";
print "THIS WILL BE UPDATED:\n";
print "-------------------------------------\n";
system("diff -u manifest.yaml .changes/manifest.yaml");
system("diff -u Changelog.md .changes/Changelog.md");
print "-------------------------------------\n";
print "To proceed: press <enter> otherwise <CTRL+C>\n";
my $bla;
$bla=<STDIN>;
system("cp -vf .changes/manifest.yaml manifest.yaml");
system("cp -vf .changes/Changelog.md Changelog.md");
system("git commit -a -m \"Update version and changelog.\"");
system("git push");
print("✓ Changes have been committed and pushed!\n");
print("✓ A new release will be created when this branch is merged to main.\n");

View File

@@ -1,223 +0,0 @@
enable_language(ASM)
# C source files that are compiled for all targets (i.e. reference code)
set(SRCS_GENERIC
commons.c
commons_private.c
coeffs/coeffs_arithmetic.c
arithmetic/vec_znx.c
arithmetic/vec_znx_dft.c
arithmetic/vector_matrix_product.c
cplx/cplx_common.c
cplx/cplx_conversions.c
cplx/cplx_fft_asserts.c
cplx/cplx_fft_ref.c
cplx/cplx_fftvec_ref.c
cplx/cplx_ifft_ref.c
cplx/spqlios_cplx_fft.c
reim4/reim4_arithmetic_ref.c
reim4/reim4_fftvec_addmul_ref.c
reim4/reim4_fftvec_conv_ref.c
reim/reim_conversions.c
reim/reim_fft_ifft.c
reim/reim_fft_ref.c
reim/reim_fftvec_addmul_ref.c
reim/reim_ifft_ref.c
reim/reim_ifft_ref.c
reim/reim_to_tnx_ref.c
q120/q120_ntt.c
q120/q120_arithmetic_ref.c
q120/q120_arithmetic_simple.c
arithmetic/scalar_vector_product.c
arithmetic/vec_znx_big.c
arithmetic/znx_small.c
arithmetic/module_api.c
arithmetic/zn_vmp_int8_ref.c
arithmetic/zn_vmp_int16_ref.c
arithmetic/zn_vmp_int32_ref.c
arithmetic/zn_vmp_ref.c
arithmetic/zn_api.c
arithmetic/zn_conversions_ref.c
arithmetic/zn_approxdecomp_ref.c
arithmetic/vec_rnx_api.c
arithmetic/vec_rnx_conversions_ref.c
arithmetic/vec_rnx_svp_ref.c
reim/reim_execute.c
cplx/cplx_execute.c
reim4/reim4_execute.c
arithmetic/vec_rnx_arithmetic.c
arithmetic/vec_rnx_approxdecomp_ref.c
arithmetic/vec_rnx_vmp_ref.c
)
# C or assembly source files compiled only on x86 targets
set(SRCS_X86
)
# C or assembly source files compiled only on aarch64 targets
set(SRCS_AARCH64
cplx/cplx_fallbacks_aarch64.c
reim/reim_fallbacks_aarch64.c
reim4/reim4_fallbacks_aarch64.c
q120/q120_fallbacks_aarch64.c
reim/reim_fft_neon.c
)
# C or assembly source files compiled only on x86: avx, avx2, fma targets
set(SRCS_FMA_C
arithmetic/vector_matrix_product_avx.c
cplx/cplx_conversions_avx2_fma.c
cplx/cplx_fft_avx2_fma.c
cplx/cplx_fft_sse.c
cplx/cplx_fftvec_avx2_fma.c
cplx/cplx_ifft_avx2_fma.c
reim4/reim4_arithmetic_avx2.c
reim4/reim4_fftvec_conv_fma.c
reim4/reim4_fftvec_addmul_fma.c
reim/reim_conversions_avx.c
reim/reim_fft4_avx_fma.c
reim/reim_fft8_avx_fma.c
reim/reim_ifft4_avx_fma.c
reim/reim_ifft8_avx_fma.c
reim/reim_fft_avx2.c
reim/reim_ifft_avx2.c
reim/reim_to_tnx_avx.c
reim/reim_fftvec_addmul_fma.c
)
set(SRCS_FMA_ASM
cplx/cplx_fft16_avx_fma.s
cplx/cplx_ifft16_avx_fma.s
reim/reim_fft16_avx_fma.s
reim/reim_ifft16_avx_fma.s
)
set(SRCS_FMA_WIN32_ASM
cplx/cplx_fft16_avx_fma_win32.s
cplx/cplx_ifft16_avx_fma_win32.s
reim/reim_fft16_avx_fma_win32.s
reim/reim_ifft16_avx_fma_win32.s
)
set_source_files_properties(${SRCS_FMA_C} PROPERTIES COMPILE_OPTIONS "-mfma;-mavx;-mavx2")
set_source_files_properties(${SRCS_FMA_ASM} PROPERTIES COMPILE_OPTIONS "-mfma;-mavx;-mavx2")
# C or assembly source files compiled only on x86: avx512f/vl/dq + fma targets
set(SRCS_AVX512
cplx/cplx_fft_avx512.c
)
set_source_files_properties(${SRCS_AVX512} PROPERTIES COMPILE_OPTIONS "-mfma;-mavx512f;-mavx512vl;-mavx512dq")
# C or assembly source files compiled only on x86: avx2 + bmi targets
set(SRCS_AVX2
arithmetic/vec_znx_avx.c
coeffs/coeffs_arithmetic_avx.c
arithmetic/vec_znx_dft_avx2.c
arithmetic/zn_vmp_int8_avx.c
arithmetic/zn_vmp_int16_avx.c
arithmetic/zn_vmp_int32_avx.c
q120/q120_arithmetic_avx2.c
q120/q120_ntt_avx2.c
arithmetic/vec_rnx_arithmetic_avx.c
arithmetic/vec_rnx_approxdecomp_avx.c
arithmetic/vec_rnx_vmp_avx.c
)
set_source_files_properties(${SRCS_AVX2} PROPERTIES COMPILE_OPTIONS "-mbmi2;-mavx2")
# C source files on float128 via libquadmath on x86 targets targets
set(SRCS_F128
cplx_f128/cplx_fft_f128.c
cplx_f128/cplx_fft_f128.h
)
# H header files containing the public API (these headers are installed)
set(HEADERSPUBLIC
commons.h
arithmetic/vec_znx_arithmetic.h
arithmetic/vec_rnx_arithmetic.h
arithmetic/zn_arithmetic.h
cplx/cplx_fft.h
reim/reim_fft.h
q120/q120_common.h
q120/q120_arithmetic.h
q120/q120_ntt.h
)
# H header files containing the private API (these headers are used internally)
set(HEADERSPRIVATE
commons_private.h
cplx/cplx_fft_internal.h
cplx/cplx_fft_private.h
reim4/reim4_arithmetic.h
reim4/reim4_fftvec_internal.h
reim4/reim4_fftvec_private.h
reim4/reim4_fftvec_public.h
reim/reim_fft_internal.h
reim/reim_fft_private.h
q120/q120_arithmetic_private.h
q120/q120_ntt_private.h
arithmetic/vec_znx_arithmetic.h
arithmetic/vec_rnx_arithmetic_private.h
arithmetic/vec_rnx_arithmetic_plugin.h
arithmetic/zn_arithmetic_private.h
arithmetic/zn_arithmetic_plugin.h
coeffs/coeffs_arithmetic.h
reim/reim_fft_core_template.h
)
set(SPQLIOSSOURCES
${SRCS_GENERIC}
${HEADERSPUBLIC}
${HEADERSPRIVATE}
)
if (${X86})
set(SPQLIOSSOURCES ${SPQLIOSSOURCES}
${SRCS_X86}
${SRCS_FMA_C}
${SRCS_FMA_ASM}
${SRCS_AVX2}
${SRCS_AVX512}
)
elseif (${X86_WIN32})
set(SPQLIOSSOURCES ${SPQLIOSSOURCES}
#${SRCS_X86}
${SRCS_FMA_C}
${SRCS_FMA_WIN32_ASM}
${SRCS_AVX2}
${SRCS_AVX512}
)
elseif (${AARCH64})
set(SPQLIOSSOURCES ${SPQLIOSSOURCES}
${SRCS_AARCH64}
)
endif ()
set(SPQLIOSLIBDEP
m # libmath depencency for cosinus/sinus functions
)
if (ENABLE_SPQLIOS_F128)
find_library(quadmath REQUIRED NAMES quadmath)
set(SPQLIOSSOURCES ${SPQLIOSSOURCES} ${SRCS_F128})
set(SPQLIOSLIBDEP ${SPQLIOSLIBDEP} quadmath)
endif (ENABLE_SPQLIOS_F128)
add_library(libspqlios-static STATIC ${SPQLIOSSOURCES})
add_library(libspqlios SHARED ${SPQLIOSSOURCES})
set_property(TARGET libspqlios-static PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET libspqlios PROPERTY OUTPUT_NAME spqlios)
set_property(TARGET libspqlios-static PROPERTY OUTPUT_NAME spqlios)
set_property(TARGET libspqlios PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET libspqlios PROPERTY SOVERSION ${SPQLIOS_VERSION_MAJOR})
set_property(TARGET libspqlios PROPERTY VERSION ${SPQLIOS_VERSION})
if (NOT APPLE)
target_link_options(libspqlios-static PUBLIC -Wl,--no-undefined)
target_link_options(libspqlios PUBLIC -Wl,--no-undefined)
endif()
target_link_libraries(libspqlios ${SPQLIOSLIBDEP})
target_link_libraries(libspqlios-static ${SPQLIOSLIBDEP})
install(TARGETS libspqlios-static)
install(TARGETS libspqlios)
# install the public headers only
foreach (file ${HEADERSPUBLIC})
get_filename_component(dir ${file} DIRECTORY)
install(FILES ${file} DESTINATION include/spqlios/${dir})
endforeach ()

View File

@@ -1,164 +0,0 @@
#include <string.h>
#include "vec_znx_arithmetic_private.h"
static void fill_generic_virtual_table(MODULE* module) {
// TODO add default ref handler here
module->func.vec_znx_zero = vec_znx_zero_ref;
module->func.vec_znx_copy = vec_znx_copy_ref;
module->func.vec_znx_negate = vec_znx_negate_ref;
module->func.vec_znx_add = vec_znx_add_ref;
module->func.vec_znx_sub = vec_znx_sub_ref;
module->func.vec_znx_rotate = vec_znx_rotate_ref;
module->func.vec_znx_automorphism = vec_znx_automorphism_ref;
module->func.vec_znx_normalize_base2k = vec_znx_normalize_base2k_ref;
module->func.vec_znx_normalize_base2k_tmp_bytes = vec_znx_normalize_base2k_tmp_bytes_ref;
if (CPU_SUPPORTS("avx2")) {
// TODO add avx handlers here
module->func.vec_znx_negate = vec_znx_negate_avx;
module->func.vec_znx_add = vec_znx_add_avx;
module->func.vec_znx_sub = vec_znx_sub_avx;
}
}
static void fill_fft64_virtual_table(MODULE* module) {
// TODO add default ref handler here
// module->func.vec_znx_dft = ...;
module->func.vec_znx_big_normalize_base2k = fft64_vec_znx_big_normalize_base2k;
module->func.vec_znx_big_normalize_base2k_tmp_bytes = fft64_vec_znx_big_normalize_base2k_tmp_bytes;
module->func.vec_znx_big_range_normalize_base2k = fft64_vec_znx_big_range_normalize_base2k;
module->func.vec_znx_big_range_normalize_base2k_tmp_bytes = fft64_vec_znx_big_range_normalize_base2k_tmp_bytes;
module->func.vec_znx_dft = fft64_vec_znx_dft;
module->func.vec_znx_idft = fft64_vec_znx_idft;
module->func.vec_znx_idft_tmp_bytes = fft64_vec_znx_idft_tmp_bytes;
module->func.vec_znx_idft_tmp_a = fft64_vec_znx_idft_tmp_a;
module->func.vec_znx_big_add = fft64_vec_znx_big_add;
module->func.vec_znx_big_add_small = fft64_vec_znx_big_add_small;
module->func.vec_znx_big_add_small2 = fft64_vec_znx_big_add_small2;
module->func.vec_znx_big_sub = fft64_vec_znx_big_sub;
module->func.vec_znx_big_sub_small_a = fft64_vec_znx_big_sub_small_a;
module->func.vec_znx_big_sub_small_b = fft64_vec_znx_big_sub_small_b;
module->func.vec_znx_big_sub_small2 = fft64_vec_znx_big_sub_small2;
module->func.vec_znx_big_rotate = fft64_vec_znx_big_rotate;
module->func.vec_znx_big_automorphism = fft64_vec_znx_big_automorphism;
module->func.svp_prepare = fft64_svp_prepare_ref;
module->func.svp_apply_dft = fft64_svp_apply_dft_ref;
module->func.znx_small_single_product = fft64_znx_small_single_product;
module->func.znx_small_single_product_tmp_bytes = fft64_znx_small_single_product_tmp_bytes;
module->func.vmp_prepare_contiguous = fft64_vmp_prepare_contiguous_ref;
module->func.vmp_prepare_contiguous_tmp_bytes = fft64_vmp_prepare_contiguous_tmp_bytes;
module->func.vmp_apply_dft = fft64_vmp_apply_dft_ref;
module->func.vmp_apply_dft_tmp_bytes = fft64_vmp_apply_dft_tmp_bytes;
module->func.vmp_apply_dft_to_dft = fft64_vmp_apply_dft_to_dft_ref;
module->func.vmp_apply_dft_to_dft_tmp_bytes = fft64_vmp_apply_dft_to_dft_tmp_bytes;
module->func.bytes_of_vec_znx_dft = fft64_bytes_of_vec_znx_dft;
module->func.bytes_of_vec_znx_dft = fft64_bytes_of_vec_znx_dft;
module->func.bytes_of_vec_znx_dft = fft64_bytes_of_vec_znx_dft;
module->func.bytes_of_vec_znx_big = fft64_bytes_of_vec_znx_big;
module->func.bytes_of_svp_ppol = fft64_bytes_of_svp_ppol;
module->func.bytes_of_vmp_pmat = fft64_bytes_of_vmp_pmat;
if (CPU_SUPPORTS("avx2")) {
// TODO add avx handlers here
// TODO: enable when avx implementation is done
module->func.vmp_prepare_contiguous = fft64_vmp_prepare_contiguous_avx;
module->func.vmp_apply_dft = fft64_vmp_apply_dft_avx;
module->func.vmp_apply_dft_to_dft = fft64_vmp_apply_dft_to_dft_avx;
}
}
static void fill_ntt120_virtual_table(MODULE* module) {
// TODO add default ref handler here
// module->func.vec_znx_dft = ...;
if (CPU_SUPPORTS("avx2")) {
// TODO add avx handlers here
module->func.vec_znx_dft = ntt120_vec_znx_dft_avx;
module->func.vec_znx_idft = ntt120_vec_znx_idft_avx;
module->func.vec_znx_idft_tmp_bytes = ntt120_vec_znx_idft_tmp_bytes_avx;
module->func.vec_znx_idft_tmp_a = ntt120_vec_znx_idft_tmp_a_avx;
}
}
static void fill_virtual_table(MODULE* module) {
fill_generic_virtual_table(module);
switch (module->module_type) {
case FFT64:
fill_fft64_virtual_table(module);
break;
case NTT120:
fill_ntt120_virtual_table(module);
break;
default:
NOT_SUPPORTED(); // invalid type
}
}
static void fill_fft64_precomp(MODULE* module) {
// fill any necessary precomp stuff
module->mod.fft64.p_conv = new_reim_from_znx64_precomp(module->m, 50);
module->mod.fft64.p_fft = new_reim_fft_precomp(module->m, 0);
module->mod.fft64.p_reim_to_znx = new_reim_to_znx64_precomp(module->m, module->m, 63);
module->mod.fft64.p_ifft = new_reim_ifft_precomp(module->m, 0);
module->mod.fft64.p_addmul = new_reim_fftvec_addmul_precomp(module->m);
module->mod.fft64.mul_fft = new_reim_fftvec_mul_precomp(module->m);
}
static void fill_ntt120_precomp(MODULE* module) {
// fill any necessary precomp stuff
if (CPU_SUPPORTS("avx2")) {
module->mod.q120.p_ntt = q120_new_ntt_bb_precomp(module->nn);
module->mod.q120.p_intt = q120_new_intt_bb_precomp(module->nn);
}
}
static void fill_module_precomp(MODULE* module) {
switch (module->module_type) {
case FFT64:
fill_fft64_precomp(module);
break;
case NTT120:
fill_ntt120_precomp(module);
break;
default:
NOT_SUPPORTED(); // invalid type
}
}
static void fill_module(MODULE* module, uint64_t nn, MODULE_TYPE mtype) {
// init to zero to ensure that any non-initialized field bug is detected
// by at least a "proper" segfault
memset(module, 0, sizeof(MODULE));
module->module_type = mtype;
module->nn = nn;
module->m = nn >> 1;
fill_module_precomp(module);
fill_virtual_table(module);
}
EXPORT MODULE* new_module_info(uint64_t N, MODULE_TYPE mtype) {
MODULE* m = (MODULE*)malloc(sizeof(MODULE));
fill_module(m, N, mtype);
return m;
}
EXPORT void delete_module_info(MODULE* mod) {
switch (mod->module_type) {
case FFT64:
free(mod->mod.fft64.p_conv);
free(mod->mod.fft64.p_fft);
free(mod->mod.fft64.p_ifft);
free(mod->mod.fft64.p_reim_to_znx);
free(mod->mod.fft64.mul_fft);
free(mod->mod.fft64.p_addmul);
break;
case NTT120:
if (CPU_SUPPORTS("avx2")) {
q120_del_ntt_bb_precomp(mod->mod.q120.p_ntt);
q120_del_intt_bb_precomp(mod->mod.q120.p_intt);
}
break;
default:
break;
}
free(mod);
}
EXPORT uint64_t module_get_n(const MODULE* module) { return module->nn; }

View File

@@ -1,63 +0,0 @@
#include <string.h>
#include "vec_znx_arithmetic_private.h"
EXPORT uint64_t bytes_of_svp_ppol(const MODULE* module) { return module->func.bytes_of_svp_ppol(module); }
EXPORT uint64_t fft64_bytes_of_svp_ppol(const MODULE* module) { return module->nn * sizeof(double); }
EXPORT SVP_PPOL* new_svp_ppol(const MODULE* module) { return spqlios_alloc(bytes_of_svp_ppol(module)); }
EXPORT void delete_svp_ppol(SVP_PPOL* ppol) { spqlios_free(ppol); }
// public wrappers
EXPORT void svp_prepare(const MODULE* module, // N
SVP_PPOL* ppol, // output
const int64_t* pol // a
) {
module->func.svp_prepare(module, ppol, pol);
}
/** @brief prepares a svp polynomial */
EXPORT void fft64_svp_prepare_ref(const MODULE* module, // N
SVP_PPOL* ppol, // output
const int64_t* pol // a
) {
reim_from_znx64(module->mod.fft64.p_conv, ppol, pol);
reim_fft(module->mod.fft64.p_fft, (double*)ppol);
}
EXPORT void svp_apply_dft(const MODULE* module, // N
const VEC_ZNX_DFT* res, uint64_t res_size, // output
const SVP_PPOL* ppol, // prepared pol
const int64_t* a, uint64_t a_size, uint64_t a_sl) {
module->func.svp_apply_dft(module, // N
res,
res_size, // output
ppol, // prepared pol
a, a_size, a_sl);
}
// result = ppol * a
EXPORT void fft64_svp_apply_dft_ref(const MODULE* module, // N
const VEC_ZNX_DFT* res, uint64_t res_size, // output
const SVP_PPOL* ppol, // prepared pol
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->nn;
double* const dres = (double*)res;
double* const dppol = (double*)ppol;
const uint64_t auto_end_idx = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < auto_end_idx; ++i) {
const int64_t* a_ptr = a + i * a_sl;
double* const res_ptr = dres + i * nn;
// copy the polynomial to res, apply fft in place, call fftvec_mul in place.
reim_from_znx64(module->mod.fft64.p_conv, res_ptr, a_ptr);
reim_fft(module->mod.fft64.p_fft, res_ptr);
reim_fftvec_mul(module->mod.fft64.mul_fft, res_ptr, res_ptr, dppol);
}
// then extend with zeros
memset(dres + auto_end_idx * nn, 0, (res_size - auto_end_idx) * nn * sizeof(double));
}

View File

@@ -1,318 +0,0 @@
#include <string.h>
#include "vec_rnx_arithmetic_private.h"
void fft64_init_rnx_module_precomp(MOD_RNX* module) {
// Add here initialization of items that are in the precomp
const uint64_t m = module->m;
module->precomp.fft64.p_fft = new_reim_fft_precomp(m, 0);
module->precomp.fft64.p_ifft = new_reim_ifft_precomp(m, 0);
module->precomp.fft64.p_fftvec_mul = new_reim_fftvec_mul_precomp(m);
module->precomp.fft64.p_fftvec_addmul = new_reim_fftvec_addmul_precomp(m);
}
void fft64_finalize_rnx_module_precomp(MOD_RNX* module) {
// Add here deleters for items that are in the precomp
delete_reim_fft_precomp(module->precomp.fft64.p_fft);
delete_reim_ifft_precomp(module->precomp.fft64.p_ifft);
delete_reim_fftvec_mul_precomp(module->precomp.fft64.p_fftvec_mul);
delete_reim_fftvec_addmul_precomp(module->precomp.fft64.p_fftvec_addmul);
}
void fft64_init_rnx_module_vtable(MOD_RNX* module) {
// Add function pointers here
module->vtable.vec_rnx_add = vec_rnx_add_ref;
module->vtable.vec_rnx_zero = vec_rnx_zero_ref;
module->vtable.vec_rnx_copy = vec_rnx_copy_ref;
module->vtable.vec_rnx_negate = vec_rnx_negate_ref;
module->vtable.vec_rnx_sub = vec_rnx_sub_ref;
module->vtable.vec_rnx_rotate = vec_rnx_rotate_ref;
module->vtable.vec_rnx_automorphism = vec_rnx_automorphism_ref;
module->vtable.vec_rnx_mul_xp_minus_one = vec_rnx_mul_xp_minus_one_ref;
module->vtable.rnx_vmp_apply_dft_to_dft_tmp_bytes = fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref;
module->vtable.rnx_vmp_apply_dft_to_dft = fft64_rnx_vmp_apply_dft_to_dft_ref;
module->vtable.rnx_vmp_apply_tmp_a_tmp_bytes = fft64_rnx_vmp_apply_tmp_a_tmp_bytes_ref;
module->vtable.rnx_vmp_apply_tmp_a = fft64_rnx_vmp_apply_tmp_a_ref;
module->vtable.rnx_vmp_prepare_contiguous_tmp_bytes = fft64_rnx_vmp_prepare_contiguous_tmp_bytes_ref;
module->vtable.rnx_vmp_prepare_contiguous = fft64_rnx_vmp_prepare_contiguous_ref;
module->vtable.bytes_of_rnx_vmp_pmat = fft64_bytes_of_rnx_vmp_pmat;
module->vtable.rnx_approxdecomp_from_tnxdbl = rnx_approxdecomp_from_tnxdbl_ref;
module->vtable.vec_rnx_to_znx32 = vec_rnx_to_znx32_ref;
module->vtable.vec_rnx_from_znx32 = vec_rnx_from_znx32_ref;
module->vtable.vec_rnx_to_tnx32 = vec_rnx_to_tnx32_ref;
module->vtable.vec_rnx_from_tnx32 = vec_rnx_from_tnx32_ref;
module->vtable.vec_rnx_to_tnxdbl = vec_rnx_to_tnxdbl_ref;
module->vtable.bytes_of_rnx_svp_ppol = fft64_bytes_of_rnx_svp_ppol;
module->vtable.rnx_svp_prepare = fft64_rnx_svp_prepare_ref;
module->vtable.rnx_svp_apply = fft64_rnx_svp_apply_ref;
// Add optimized function pointers here
if (CPU_SUPPORTS("avx")) {
module->vtable.vec_rnx_add = vec_rnx_add_avx;
module->vtable.vec_rnx_sub = vec_rnx_sub_avx;
module->vtable.vec_rnx_negate = vec_rnx_negate_avx;
module->vtable.rnx_vmp_apply_dft_to_dft_tmp_bytes = fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_avx;
module->vtable.rnx_vmp_apply_dft_to_dft = fft64_rnx_vmp_apply_dft_to_dft_avx;
module->vtable.rnx_vmp_apply_tmp_a_tmp_bytes = fft64_rnx_vmp_apply_tmp_a_tmp_bytes_avx;
module->vtable.rnx_vmp_apply_tmp_a = fft64_rnx_vmp_apply_tmp_a_avx;
module->vtable.rnx_vmp_prepare_contiguous_tmp_bytes = fft64_rnx_vmp_prepare_contiguous_tmp_bytes_avx;
module->vtable.rnx_vmp_prepare_contiguous = fft64_rnx_vmp_prepare_contiguous_avx;
module->vtable.rnx_approxdecomp_from_tnxdbl = rnx_approxdecomp_from_tnxdbl_avx;
}
}
void init_rnx_module_info(MOD_RNX* module, //
uint64_t n, RNX_MODULE_TYPE mtype) {
memset(module, 0, sizeof(MOD_RNX));
module->n = n;
module->m = n >> 1;
module->mtype = mtype;
switch (mtype) {
case FFT64:
fft64_init_rnx_module_precomp(module);
fft64_init_rnx_module_vtable(module);
break;
default:
NOT_SUPPORTED(); // unknown mtype
}
}
void finalize_rnx_module_info(MOD_RNX* module) {
if (module->custom) module->custom_deleter(module->custom);
switch (module->mtype) {
case FFT64:
fft64_finalize_rnx_module_precomp(module);
// fft64_finalize_rnx_module_vtable(module); // nothing to finalize
break;
default:
NOT_SUPPORTED(); // unknown mtype
}
}
EXPORT MOD_RNX* new_rnx_module_info(uint64_t nn, RNX_MODULE_TYPE mtype) {
MOD_RNX* res = (MOD_RNX*)malloc(sizeof(MOD_RNX));
init_rnx_module_info(res, nn, mtype);
return res;
}
EXPORT void delete_rnx_module_info(MOD_RNX* module_info) {
finalize_rnx_module_info(module_info);
free(module_info);
}
EXPORT uint64_t rnx_module_get_n(const MOD_RNX* module) { return module->n; }
/** @brief allocates a prepared matrix (release with delete_rnx_vmp_pmat) */
EXPORT RNX_VMP_PMAT* new_rnx_vmp_pmat(const MOD_RNX* module, // N
uint64_t nrows, uint64_t ncols) { // dimensions
return (RNX_VMP_PMAT*)spqlios_alloc(bytes_of_rnx_vmp_pmat(module, nrows, ncols));
}
EXPORT void delete_rnx_vmp_pmat(RNX_VMP_PMAT* ptr) { spqlios_free(ptr); }
//////////////// wrappers //////////////////
/** @brief sets res = a + b */
EXPORT void vec_rnx_add( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
) {
module->vtable.vec_rnx_add(module, res, res_size, res_sl, a, a_size, a_sl, b, b_size, b_sl);
}
/** @brief sets res = 0 */
EXPORT void vec_rnx_zero( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl // res
) {
module->vtable.vec_rnx_zero(module, res, res_size, res_sl);
}
/** @brief sets res = a */
EXPORT void vec_rnx_copy( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_copy(module, res, res_size, res_sl, a, a_size, a_sl);
}
/** @brief sets res = -a */
EXPORT void vec_rnx_negate( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_negate(module, res, res_size, res_sl, a, a_size, a_sl);
}
/** @brief sets res = a - b */
EXPORT void vec_rnx_sub( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
) {
module->vtable.vec_rnx_sub(module, res, res_size, res_sl, a, a_size, a_sl, b, b_size, b_sl);
}
/** @brief sets res = a . X^p */
EXPORT void vec_rnx_rotate( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_rotate(module, p, res, res_size, res_sl, a, a_size, a_sl);
}
/** @brief sets res = a(X^p) */
EXPORT void vec_rnx_automorphism( //
const MOD_RNX* module, // N
int64_t p, // X -> X^p
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_automorphism(module, p, res, res_size, res_sl, a, a_size, a_sl);
}
EXPORT void vec_rnx_mul_xp_minus_one( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_mul_xp_minus_one(module, p, res, res_size, res_sl, a, a_size, a_sl);
}
/** @brief number of bytes in a RNX_VMP_PMAT (for manual allocation) */
EXPORT uint64_t bytes_of_rnx_vmp_pmat(const MOD_RNX* module, // N
uint64_t nrows, uint64_t ncols) { // dimensions
return module->vtable.bytes_of_rnx_vmp_pmat(module, nrows, ncols);
}
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void rnx_vmp_prepare_contiguous( //
const MOD_RNX* module, // N
RNX_VMP_PMAT* pmat, // output
const double* a, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
) {
module->vtable.rnx_vmp_prepare_contiguous(module, pmat, a, nrows, ncols, tmp_space);
}
/** @brief number of scratch bytes necessary to prepare a matrix */
EXPORT uint64_t rnx_vmp_prepare_contiguous_tmp_bytes(const MOD_RNX* module) {
return module->vtable.rnx_vmp_prepare_contiguous_tmp_bytes(module);
}
/** @brief applies a vmp product res = a x pmat */
EXPORT void rnx_vmp_apply_tmp_a( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
double* tmpa, uint64_t a_size, uint64_t a_sl, // a (will be overwritten)
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
) {
module->vtable.rnx_vmp_apply_tmp_a(module, res, res_size, res_sl, tmpa, a_size, a_sl, pmat, nrows, ncols, tmp_space);
}
EXPORT uint64_t rnx_vmp_apply_tmp_a_tmp_bytes( //
const MOD_RNX* module, // N
uint64_t res_size, // res size
uint64_t a_size, // a size
uint64_t nrows, uint64_t ncols // prep matrix dims
) {
return module->vtable.rnx_vmp_apply_tmp_a_tmp_bytes(module, res_size, a_size, nrows, ncols);
}
/** @brief minimal size of the tmp_space */
EXPORT void rnx_vmp_apply_dft_to_dft( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl, // a
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
) {
module->vtable.rnx_vmp_apply_dft_to_dft(module, res, res_size, res_sl, a_dft, a_size, a_sl, pmat, nrows, ncols,
tmp_space);
}
/** @brief minimal size of the tmp_space */
EXPORT uint64_t rnx_vmp_apply_dft_to_dft_tmp_bytes( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
return module->vtable.rnx_vmp_apply_dft_to_dft_tmp_bytes(module, res_size, a_size, nrows, ncols);
}
EXPORT uint64_t bytes_of_rnx_svp_ppol(const MOD_RNX* module) { return module->vtable.bytes_of_rnx_svp_ppol(module); }
EXPORT void rnx_svp_prepare(const MOD_RNX* module, // N
RNX_SVP_PPOL* ppol, // output
const double* pol // a
) {
module->vtable.rnx_svp_prepare(module, ppol, pol);
}
EXPORT void rnx_svp_apply( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // output
const RNX_SVP_PPOL* ppol, // prepared pol
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.rnx_svp_apply(module, // N
res, res_size, res_sl, // output
ppol, // prepared pol
a, a_size, a_sl);
}
EXPORT void rnx_approxdecomp_from_tnxdbl( //
const MOD_RNX* module, // N
const TNXDBL_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a) { // a
module->vtable.rnx_approxdecomp_from_tnxdbl(module, gadget, res, res_size, res_sl, a);
}
EXPORT void vec_rnx_to_znx32( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_to_znx32(module, res, res_size, res_sl, a, a_size, a_sl);
}
EXPORT void vec_rnx_from_znx32( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_from_znx32(module, res, res_size, res_sl, a, a_size, a_sl);
}
EXPORT void vec_rnx_to_tnx32( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_to_tnx32(module, res, res_size, res_sl, a, a_size, a_sl);
}
EXPORT void vec_rnx_from_tnx32( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_from_tnx32(module, res, res_size, res_sl, a, a_size, a_sl);
}
EXPORT void vec_rnx_to_tnxdbl( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
module->vtable.vec_rnx_to_tnxdbl(module, res, res_size, res_sl, a, a_size, a_sl);
}

View File

@@ -1,59 +0,0 @@
#include <memory.h>
#include "immintrin.h"
#include "vec_rnx_arithmetic_private.h"
/** @brief sets res = gadget_decompose(a) */
EXPORT void rnx_approxdecomp_from_tnxdbl_avx( //
const MOD_RNX* module, // N
const TNXDBL_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a // a
) {
const uint64_t nn = module->n;
if (nn < 4) return rnx_approxdecomp_from_tnxdbl_ref(module, gadget, res, res_size, res_sl, a);
const uint64_t ell = gadget->ell;
const __m256i k = _mm256_set1_epi64x(gadget->k);
const __m256d add_cst = _mm256_set1_pd(gadget->add_cst);
const __m256i and_mask = _mm256_set1_epi64x(gadget->and_mask);
const __m256i or_mask = _mm256_set1_epi64x(gadget->or_mask);
const __m256d sub_cst = _mm256_set1_pd(gadget->sub_cst);
const uint64_t msize = res_size <= ell ? res_size : ell;
// gadget decompose column by column
if (msize == ell) {
// this is the main scenario when msize == ell
double* const last_r = res + (msize - 1) * res_sl;
for (uint64_t j = 0; j < nn; j += 4) {
double* rr = last_r + j;
const double* aa = a + j;
__m256d t_dbl = _mm256_add_pd(_mm256_loadu_pd(aa), add_cst);
__m256i t_int = _mm256_castpd_si256(t_dbl);
do {
__m256i u_int = _mm256_or_si256(_mm256_and_si256(t_int, and_mask), or_mask);
_mm256_storeu_pd(rr, _mm256_sub_pd(_mm256_castsi256_pd(u_int), sub_cst));
t_int = _mm256_srlv_epi64(t_int, k);
rr -= res_sl;
} while (rr >= res);
}
} else if (msize > 0) {
// otherwise, if msize < ell: there is one additional rshift
const __m256i first_rsh = _mm256_set1_epi64x((ell - msize) * gadget->k);
double* const last_r = res + (msize - 1) * res_sl;
for (uint64_t j = 0; j < nn; j += 4) {
double* rr = last_r + j;
const double* aa = a + j;
__m256d t_dbl = _mm256_add_pd(_mm256_loadu_pd(aa), add_cst);
__m256i t_int = _mm256_srlv_epi64(_mm256_castpd_si256(t_dbl), first_rsh);
do {
__m256i u_int = _mm256_or_si256(_mm256_and_si256(t_int, and_mask), or_mask);
_mm256_storeu_pd(rr, _mm256_sub_pd(_mm256_castsi256_pd(u_int), sub_cst));
t_int = _mm256_srlv_epi64(t_int, k);
rr -= res_sl;
} while (rr >= res);
}
}
// zero-out the last slices (if any)
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}

View File

@@ -1,75 +0,0 @@
#include <memory.h>
#include "vec_rnx_arithmetic_private.h"
typedef union di {
double dv;
uint64_t uv;
} di_t;
/** @brief new gadget: delete with delete_tnxdbl_approxdecomp_gadget */
EXPORT TNXDBL_APPROXDECOMP_GADGET* new_tnxdbl_approxdecomp_gadget( //
const MOD_RNX* module, // N
uint64_t k, uint64_t ell // base 2^K and size
) {
if (k * ell > 50) return spqlios_error("gadget requires a too large fp precision");
TNXDBL_APPROXDECOMP_GADGET* res = spqlios_alloc(sizeof(TNXDBL_APPROXDECOMP_GADGET));
res->k = k;
res->ell = ell;
// double add_cst; // double(3.2^(51-ell.K) + 1/2.(sum 2^(-iK)) for i=[0,ell[)
union di add_cst;
add_cst.dv = UINT64_C(3) << (51 - ell * k);
for (uint64_t i = 0; i < ell; ++i) {
add_cst.uv |= UINT64_C(1) << ((i + 1) * k - 1);
}
res->add_cst = add_cst.dv;
// uint64_t and_mask; // uint64(2^(K)-1)
res->and_mask = (UINT64_C(1) << k) - 1;
// uint64_t or_mask; // double(2^52)
union di or_mask;
or_mask.dv = (UINT64_C(1) << 52);
res->or_mask = or_mask.uv;
// double sub_cst; // double(2^52 + 2^(K-1))
res->sub_cst = ((UINT64_C(1) << 52) + (UINT64_C(1) << (k - 1)));
return res;
}
EXPORT void delete_tnxdbl_approxdecomp_gadget(TNXDBL_APPROXDECOMP_GADGET* gadget) { spqlios_free(gadget); }
/** @brief sets res = gadget_decompose(a) */
EXPORT void rnx_approxdecomp_from_tnxdbl_ref( //
const MOD_RNX* module, // N
const TNXDBL_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a // a
) {
const uint64_t nn = module->n;
const uint64_t k = gadget->k;
const uint64_t ell = gadget->ell;
const double add_cst = gadget->add_cst;
const uint64_t and_mask = gadget->and_mask;
const uint64_t or_mask = gadget->or_mask;
const double sub_cst = gadget->sub_cst;
const uint64_t msize = res_size <= ell ? res_size : ell;
const uint64_t first_rsh = (ell - msize) * k;
// gadget decompose column by column
if (msize > 0) {
double* const last_r = res + (msize - 1) * res_sl;
for (uint64_t j = 0; j < nn; ++j) {
double* rr = last_r + j;
di_t t = {.dv = a[j] + add_cst};
if (msize < ell) t.uv >>= first_rsh;
do {
di_t u;
u.uv = (t.uv & and_mask) | or_mask;
*rr = u.dv - sub_cst;
t.uv >>= k;
rr -= res_sl;
} while (rr >= res);
}
}
// zero-out the last slices (if any)
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}

View File

@@ -1,223 +0,0 @@
#include <string.h>
#include "../coeffs/coeffs_arithmetic.h"
#include "vec_rnx_arithmetic_private.h"
void rnx_add_ref(uint64_t nn, double* res, const double* a, const double* b) {
for (uint64_t i = 0; i < nn; ++i) {
res[i] = a[i] + b[i];
}
}
void rnx_sub_ref(uint64_t nn, double* res, const double* a, const double* b) {
for (uint64_t i = 0; i < nn; ++i) {
res[i] = a[i] - b[i];
}
}
void rnx_negate_ref(uint64_t nn, double* res, const double* a) {
for (uint64_t i = 0; i < nn; ++i) {
res[i] = -a[i];
}
}
/** @brief sets res = a + b */
EXPORT void vec_rnx_add_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->n;
if (a_size < b_size) {
const uint64_t msize = res_size < a_size ? res_size : a_size;
const uint64_t nsize = res_size < b_size ? res_size : b_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_add_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
memcpy(res + i * res_sl, b + i * b_sl, nn * sizeof(double));
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
} else {
const uint64_t msize = res_size < b_size ? res_size : b_size;
const uint64_t nsize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_add_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
memcpy(res + i * res_sl, a + i * a_sl, nn * sizeof(double));
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
}
/** @brief sets res = 0 */
EXPORT void vec_rnx_zero_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl // res
) {
const uint64_t nn = module->n;
for (uint64_t i = 0; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief sets res = a */
EXPORT void vec_rnx_copy_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t rot_end_idx = res_size < a_size ? res_size : a_size;
// rotate up to the smallest dimension
for (uint64_t i = 0; i < rot_end_idx; ++i) {
double* res_ptr = res + i * res_sl;
const double* a_ptr = a + i * a_sl;
memcpy(res_ptr, a_ptr, nn * sizeof(double));
}
// then extend with zeros
for (uint64_t i = rot_end_idx; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief sets res = -a */
EXPORT void vec_rnx_negate_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t rot_end_idx = res_size < a_size ? res_size : a_size;
// rotate up to the smallest dimension
for (uint64_t i = 0; i < rot_end_idx; ++i) {
double* res_ptr = res + i * res_sl;
const double* a_ptr = a + i * a_sl;
rnx_negate_ref(nn, res_ptr, a_ptr);
}
// then extend with zeros
for (uint64_t i = rot_end_idx; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief sets res = a - b */
EXPORT void vec_rnx_sub_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->n;
if (a_size < b_size) {
const uint64_t msize = res_size < a_size ? res_size : a_size;
const uint64_t nsize = res_size < b_size ? res_size : b_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_sub_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
rnx_negate_ref(nn, res + i * res_sl, b + i * b_sl);
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
} else {
const uint64_t msize = res_size < b_size ? res_size : b_size;
const uint64_t nsize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_sub_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
memcpy(res + i * res_sl, a + i * a_sl, nn * sizeof(double));
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
}
/** @brief sets res = a . X^p */
EXPORT void vec_rnx_rotate_ref( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t rot_end_idx = res_size < a_size ? res_size : a_size;
// rotate up to the smallest dimension
for (uint64_t i = 0; i < rot_end_idx; ++i) {
double* res_ptr = res + i * res_sl;
const double* a_ptr = a + i * a_sl;
if (res_ptr == a_ptr) {
rnx_rotate_inplace_f64(nn, p, res_ptr);
} else {
rnx_rotate_f64(nn, p, res_ptr, a_ptr);
}
}
// then extend with zeros
for (uint64_t i = rot_end_idx; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief sets res = a(X^p) */
EXPORT void vec_rnx_automorphism_ref( //
const MOD_RNX* module, // N
int64_t p, // X -> X^p
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t rot_end_idx = res_size < a_size ? res_size : a_size;
// rotate up to the smallest dimension
for (uint64_t i = 0; i < rot_end_idx; ++i) {
double* res_ptr = res + i * res_sl;
const double* a_ptr = a + i * a_sl;
if (res_ptr == a_ptr) {
rnx_automorphism_inplace_f64(nn, p, res_ptr);
} else {
rnx_automorphism_f64(nn, p, res_ptr, a_ptr);
}
}
// then extend with zeros
for (uint64_t i = rot_end_idx; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief sets res = a . (X^p - 1) */
EXPORT void vec_rnx_mul_xp_minus_one_ref( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t rot_end_idx = res_size < a_size ? res_size : a_size;
// rotate up to the smallest dimension
for (uint64_t i = 0; i < rot_end_idx; ++i) {
double* res_ptr = res + i * res_sl;
const double* a_ptr = a + i * a_sl;
if (res_ptr == a_ptr) {
rnx_mul_xp_minus_one_inplace(nn, p, res_ptr);
} else {
rnx_mul_xp_minus_one(nn, p, res_ptr, a_ptr);
}
}
// then extend with zeros
for (uint64_t i = rot_end_idx; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}

View File

@@ -1,340 +0,0 @@
#ifndef SPQLIOS_VEC_RNX_ARITHMETIC_H
#define SPQLIOS_VEC_RNX_ARITHMETIC_H
#include <stdint.h>
#include "../commons.h"
/**
* We support the following module families:
* - FFT64:
* the overall precision should fit at all times over 52 bits.
*/
typedef enum rnx_module_type_t { FFT64 } RNX_MODULE_TYPE;
/** @brief opaque structure that describes the modules (RnX,ZnX,TnX) and the hardware */
typedef struct rnx_module_info_t MOD_RNX;
/**
* @brief obtain a module info for ring dimension N
* the module-info knows about:
* - the dimension N (or the complex dimension m=N/2)
* - any moduleuted fft or ntt items
* - the hardware (avx, arm64, x86, ...)
*/
EXPORT MOD_RNX* new_rnx_module_info(uint64_t N, RNX_MODULE_TYPE mode);
EXPORT void delete_rnx_module_info(MOD_RNX* module_info);
EXPORT uint64_t rnx_module_get_n(const MOD_RNX* module);
// basic arithmetic
/** @brief sets res = 0 */
EXPORT void vec_rnx_zero( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl // res
);
/** @brief sets res = a */
EXPORT void vec_rnx_copy( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = -a */
EXPORT void vec_rnx_negate( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a + b */
EXPORT void vec_rnx_add( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a - b */
EXPORT void vec_rnx_sub( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a . X^p */
EXPORT void vec_rnx_rotate( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a . (X^p - 1) */
EXPORT void vec_rnx_mul_xp_minus_one( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a(X^p) */
EXPORT void vec_rnx_automorphism( //
const MOD_RNX* module, // N
int64_t p, // X -> X^p
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
///////////////////////////////////////////////////////////////////
// conversions //
///////////////////////////////////////////////////////////////////
EXPORT void vec_rnx_to_znx32( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_from_znx32( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_to_tnx32( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_from_tnx32( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_to_tnx32x2( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_from_tnx32x2( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_to_tnxdbl( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
///////////////////////////////////////////////////////////////////
// isolated products (n.log(n), but not particularly optimized //
///////////////////////////////////////////////////////////////////
/** @brief res = a * b : small polynomial product */
EXPORT void rnx_small_single_product( //
const MOD_RNX* module, // N
double* res, // output
const double* a, // a
const double* b, // b
uint8_t* tmp); // scratch space
EXPORT uint64_t rnx_small_single_product_tmp_bytes(const MOD_RNX* module);
/** @brief res = a * b centermod 1: small polynomial product */
EXPORT void tnxdbl_small_single_product( //
const MOD_RNX* module, // N
double* torus_res, // output
const double* int_a, // a
const double* torus_b, // b
uint8_t* tmp); // scratch space
EXPORT uint64_t tnxdbl_small_single_product_tmp_bytes(const MOD_RNX* module);
/** @brief res = a * b: small polynomial product */
EXPORT void znx32_small_single_product( //
const MOD_RNX* module, // N
int32_t* int_res, // output
const int32_t* int_a, // a
const int32_t* int_b, // b
uint8_t* tmp); // scratch space
EXPORT uint64_t znx32_small_single_product_tmp_bytes(const MOD_RNX* module);
/** @brief res = a * b centermod 1: small polynomial product */
EXPORT void tnx32_small_single_product( //
const MOD_RNX* module, // N
int32_t* torus_res, // output
const int32_t* int_a, // a
const int32_t* torus_b, // b
uint8_t* tmp); // scratch space
EXPORT uint64_t tnx32_small_single_product_tmp_bytes(const MOD_RNX* module);
///////////////////////////////////////////////////////////////////
// prepared gadget decompositions (optimized) //
///////////////////////////////////////////////////////////////////
// decompose from tnx32
typedef struct tnx32_approxdecomp_gadget_t TNX32_APPROXDECOMP_GADGET;
/** @brief new gadget: delete with delete_tnx32_approxdecomp_gadget */
EXPORT TNX32_APPROXDECOMP_GADGET* new_tnx32_approxdecomp_gadget( //
const MOD_RNX* module, // N
uint64_t k, uint64_t ell // base 2^K and size
);
EXPORT void delete_tnx32_approxdecomp_gadget(const MOD_RNX* module);
/** @brief sets res = gadget_decompose(a) */
EXPORT void rnx_approxdecomp_from_tnx32( //
const MOD_RNX* module, // N
const TNX32_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a // a
);
// decompose from tnx32x2
typedef struct tnx32x2_approxdecomp_gadget_t TNX32X2_APPROXDECOMP_GADGET;
/** @brief new gadget: delete with delete_tnx32x2_approxdecomp_gadget */
EXPORT TNX32X2_APPROXDECOMP_GADGET* new_tnx32x2_approxdecomp_gadget(const MOD_RNX* module, uint64_t ka, uint64_t ella,
uint64_t kb, uint64_t ellb);
EXPORT void delete_tnx32x2_approxdecomp_gadget(const MOD_RNX* module);
/** @brief sets res = gadget_decompose(a) */
EXPORT void rnx_approxdecomp_from_tnx32x2( //
const MOD_RNX* module, // N
const TNX32X2_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a // a
);
// decompose from tnxdbl
typedef struct tnxdbl_approxdecomp_gadget_t TNXDBL_APPROXDECOMP_GADGET;
/** @brief new gadget: delete with delete_tnxdbl_approxdecomp_gadget */
EXPORT TNXDBL_APPROXDECOMP_GADGET* new_tnxdbl_approxdecomp_gadget( //
const MOD_RNX* module, // N
uint64_t k, uint64_t ell // base 2^K and size
);
EXPORT void delete_tnxdbl_approxdecomp_gadget(TNXDBL_APPROXDECOMP_GADGET* gadget);
/** @brief sets res = gadget_decompose(a) */
EXPORT void rnx_approxdecomp_from_tnxdbl( //
const MOD_RNX* module, // N
const TNXDBL_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a); // a
///////////////////////////////////////////////////////////////////
// prepared scalar-vector product (optimized) //
///////////////////////////////////////////////////////////////////
/** @brief opaque type that represents a polynomial of RnX prepared for a scalar-vector product */
typedef struct rnx_svp_ppol_t RNX_SVP_PPOL;
/** @brief number of bytes in a RNX_VMP_PMAT (for manual allocation) */
EXPORT uint64_t bytes_of_rnx_svp_ppol(const MOD_RNX* module); // N
/** @brief allocates a prepared vector (release with delete_rnx_svp_ppol) */
EXPORT RNX_SVP_PPOL* new_rnx_svp_ppol(const MOD_RNX* module); // N
/** @brief frees memory for a prepared vector */
EXPORT void delete_rnx_svp_ppol(RNX_SVP_PPOL* res);
/** @brief prepares a svp polynomial */
EXPORT void rnx_svp_prepare(const MOD_RNX* module, // N
RNX_SVP_PPOL* ppol, // output
const double* pol // a
);
/** @brief apply a svp product, result = ppol * a, presented in DFT space */
EXPORT void rnx_svp_apply( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // output
const RNX_SVP_PPOL* ppol, // prepared pol
const double* a, uint64_t a_size, uint64_t a_sl // a
);
///////////////////////////////////////////////////////////////////
// prepared vector-matrix product (optimized) //
///////////////////////////////////////////////////////////////////
typedef struct rnx_vmp_pmat_t RNX_VMP_PMAT;
/** @brief number of bytes in a RNX_VMP_PMAT (for manual allocation) */
EXPORT uint64_t bytes_of_rnx_vmp_pmat(const MOD_RNX* module, // N
uint64_t nrows, uint64_t ncols); // dimensions
/** @brief allocates a prepared matrix (release with delete_rnx_vmp_pmat) */
EXPORT RNX_VMP_PMAT* new_rnx_vmp_pmat(const MOD_RNX* module, // N
uint64_t nrows, uint64_t ncols); // dimensions
EXPORT void delete_rnx_vmp_pmat(RNX_VMP_PMAT* ptr);
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void rnx_vmp_prepare_contiguous( //
const MOD_RNX* module, // N
RNX_VMP_PMAT* pmat, // output
const double* a, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
/** @brief number of scratch bytes necessary to prepare a matrix */
EXPORT uint64_t rnx_vmp_prepare_contiguous_tmp_bytes(const MOD_RNX* module);
/** @brief applies a vmp product res = a x pmat */
EXPORT void rnx_vmp_apply_tmp_a( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
double* tmpa, uint64_t a_size, uint64_t a_sl, // a (will be overwritten)
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
);
EXPORT uint64_t rnx_vmp_apply_tmp_a_tmp_bytes( //
const MOD_RNX* module, // N
uint64_t res_size, // res size
uint64_t a_size, // a size
uint64_t nrows, uint64_t ncols // prep matrix dims
);
/** @brief minimal size of the tmp_space */
EXPORT void rnx_vmp_apply_dft_to_dft( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl, // a
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
);
/** @brief minimal size of the tmp_space */
EXPORT uint64_t rnx_vmp_apply_dft_to_dft_tmp_bytes( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
/** @brief sets res = DFT(a) */
EXPORT void vec_rnx_dft(const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = iDFT(a_dft) -- idft is not normalized */
EXPORT void vec_rnx_idft(const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl // a
);
#endif // SPQLIOS_VEC_RNX_ARITHMETIC_H

View File

@@ -1,189 +0,0 @@
#include <immintrin.h>
#include <string.h>
#include "vec_rnx_arithmetic_private.h"
void rnx_add_avx(uint64_t nn, double* res, const double* a, const double* b) {
if (nn < 8) {
if (nn == 4) {
_mm256_storeu_pd(res, _mm256_add_pd(_mm256_loadu_pd(a), _mm256_loadu_pd(b)));
} else if (nn == 2) {
_mm_storeu_pd(res, _mm_add_pd(_mm_loadu_pd(a), _mm_loadu_pd(b)));
} else if (nn == 1) {
*res = *a + *b;
} else {
NOT_SUPPORTED(); // not a power of 2
}
return;
}
// general case: nn >= 8
__m256d x0, x1, x2, x3, x4, x5;
const double* aa = a;
const double* bb = b;
double* rr = res;
double* const rrend = res + nn;
do {
x0 = _mm256_loadu_pd(aa);
x1 = _mm256_loadu_pd(aa + 4);
x2 = _mm256_loadu_pd(bb);
x3 = _mm256_loadu_pd(bb + 4);
x4 = _mm256_add_pd(x0, x2);
x5 = _mm256_add_pd(x1, x3);
_mm256_storeu_pd(rr, x4);
_mm256_storeu_pd(rr + 4, x5);
aa += 8;
bb += 8;
rr += 8;
} while (rr < rrend);
}
void rnx_sub_avx(uint64_t nn, double* res, const double* a, const double* b) {
if (nn < 8) {
if (nn == 4) {
_mm256_storeu_pd(res, _mm256_sub_pd(_mm256_loadu_pd(a), _mm256_loadu_pd(b)));
} else if (nn == 2) {
_mm_storeu_pd(res, _mm_sub_pd(_mm_loadu_pd(a), _mm_loadu_pd(b)));
} else if (nn == 1) {
*res = *a - *b;
} else {
NOT_SUPPORTED(); // not a power of 2
}
return;
}
// general case: nn >= 8
__m256d x0, x1, x2, x3, x4, x5;
const double* aa = a;
const double* bb = b;
double* rr = res;
double* const rrend = res + nn;
do {
x0 = _mm256_loadu_pd(aa);
x1 = _mm256_loadu_pd(aa + 4);
x2 = _mm256_loadu_pd(bb);
x3 = _mm256_loadu_pd(bb + 4);
x4 = _mm256_sub_pd(x0, x2);
x5 = _mm256_sub_pd(x1, x3);
_mm256_storeu_pd(rr, x4);
_mm256_storeu_pd(rr + 4, x5);
aa += 8;
bb += 8;
rr += 8;
} while (rr < rrend);
}
void rnx_negate_avx(uint64_t nn, double* res, const double* b) {
if (nn < 8) {
if (nn == 4) {
_mm256_storeu_pd(res, _mm256_sub_pd(_mm256_set1_pd(0), _mm256_loadu_pd(b)));
} else if (nn == 2) {
_mm_storeu_pd(res, _mm_sub_pd(_mm_set1_pd(0), _mm_loadu_pd(b)));
} else if (nn == 1) {
*res = -*b;
} else {
NOT_SUPPORTED(); // not a power of 2
}
return;
}
// general case: nn >= 8
__m256d x2, x3, x4, x5;
const __m256d ZERO = _mm256_set1_pd(0);
const double* bb = b;
double* rr = res;
double* const rrend = res + nn;
do {
x2 = _mm256_loadu_pd(bb);
x3 = _mm256_loadu_pd(bb + 4);
x4 = _mm256_sub_pd(ZERO, x2);
x5 = _mm256_sub_pd(ZERO, x3);
_mm256_storeu_pd(rr, x4);
_mm256_storeu_pd(rr + 4, x5);
bb += 8;
rr += 8;
} while (rr < rrend);
}
/** @brief sets res = a + b */
EXPORT void vec_rnx_add_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->n;
if (a_size < b_size) {
const uint64_t msize = res_size < a_size ? res_size : a_size;
const uint64_t nsize = res_size < b_size ? res_size : b_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_add_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
memcpy(res + i * res_sl, b + i * b_sl, nn * sizeof(double));
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
} else {
const uint64_t msize = res_size < b_size ? res_size : b_size;
const uint64_t nsize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_add_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
memcpy(res + i * res_sl, a + i * a_sl, nn * sizeof(double));
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
}
/** @brief sets res = -a */
EXPORT void vec_rnx_negate_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_negate_avx(nn, res + i * res_sl, a + i * a_sl);
}
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief sets res = a - b */
EXPORT void vec_rnx_sub_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->n;
if (a_size < b_size) {
const uint64_t msize = res_size < a_size ? res_size : a_size;
const uint64_t nsize = res_size < b_size ? res_size : b_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_sub_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
rnx_negate_avx(nn, res + i * res_sl, b + i * b_sl);
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
} else {
const uint64_t msize = res_size < b_size ? res_size : b_size;
const uint64_t nsize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
rnx_sub_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
for (uint64_t i = msize; i < nsize; ++i) {
memcpy(res + i * res_sl, a + i * a_sl, nn * sizeof(double));
}
for (uint64_t i = nsize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
}

View File

@@ -1,88 +0,0 @@
#ifndef SPQLIOS_VEC_RNX_ARITHMETIC_PLUGIN_H
#define SPQLIOS_VEC_RNX_ARITHMETIC_PLUGIN_H
#include "vec_rnx_arithmetic.h"
typedef typeof(vec_rnx_zero) VEC_RNX_ZERO_F;
typedef typeof(vec_rnx_copy) VEC_RNX_COPY_F;
typedef typeof(vec_rnx_negate) VEC_RNX_NEGATE_F;
typedef typeof(vec_rnx_add) VEC_RNX_ADD_F;
typedef typeof(vec_rnx_sub) VEC_RNX_SUB_F;
typedef typeof(vec_rnx_rotate) VEC_RNX_ROTATE_F;
typedef typeof(vec_rnx_mul_xp_minus_one) VEC_RNX_MUL_XP_MINUS_ONE_F;
typedef typeof(vec_rnx_automorphism) VEC_RNX_AUTOMORPHISM_F;
typedef typeof(vec_rnx_to_znx32) VEC_RNX_TO_ZNX32_F;
typedef typeof(vec_rnx_from_znx32) VEC_RNX_FROM_ZNX32_F;
typedef typeof(vec_rnx_to_tnx32) VEC_RNX_TO_TNX32_F;
typedef typeof(vec_rnx_from_tnx32) VEC_RNX_FROM_TNX32_F;
typedef typeof(vec_rnx_to_tnx32x2) VEC_RNX_TO_TNX32X2_F;
typedef typeof(vec_rnx_from_tnx32x2) VEC_RNX_FROM_TNX32X2_F;
typedef typeof(vec_rnx_to_tnxdbl) VEC_RNX_TO_TNXDBL_F;
// typedef typeof(vec_rnx_from_tnxdbl) VEC_RNX_FROM_TNXDBL_F;
typedef typeof(rnx_small_single_product) RNX_SMALL_SINGLE_PRODUCT_F;
typedef typeof(rnx_small_single_product_tmp_bytes) RNX_SMALL_SINGLE_PRODUCT_TMP_BYTES_F;
typedef typeof(tnxdbl_small_single_product) TNXDBL_SMALL_SINGLE_PRODUCT_F;
typedef typeof(tnxdbl_small_single_product_tmp_bytes) TNXDBL_SMALL_SINGLE_PRODUCT_TMP_BYTES_F;
typedef typeof(znx32_small_single_product) ZNX32_SMALL_SINGLE_PRODUCT_F;
typedef typeof(znx32_small_single_product_tmp_bytes) ZNX32_SMALL_SINGLE_PRODUCT_TMP_BYTES_F;
typedef typeof(tnx32_small_single_product) TNX32_SMALL_SINGLE_PRODUCT_F;
typedef typeof(tnx32_small_single_product_tmp_bytes) TNX32_SMALL_SINGLE_PRODUCT_TMP_BYTES_F;
typedef typeof(rnx_approxdecomp_from_tnx32) RNX_APPROXDECOMP_FROM_TNX32_F;
typedef typeof(rnx_approxdecomp_from_tnx32x2) RNX_APPROXDECOMP_FROM_TNX32X2_F;
typedef typeof(rnx_approxdecomp_from_tnxdbl) RNX_APPROXDECOMP_FROM_TNXDBL_F;
typedef typeof(bytes_of_rnx_svp_ppol) BYTES_OF_RNX_SVP_PPOL_F;
typedef typeof(rnx_svp_prepare) RNX_SVP_PREPARE_F;
typedef typeof(rnx_svp_apply) RNX_SVP_APPLY_F;
typedef typeof(bytes_of_rnx_vmp_pmat) BYTES_OF_RNX_VMP_PMAT_F;
typedef typeof(rnx_vmp_prepare_contiguous) RNX_VMP_PREPARE_CONTIGUOUS_F;
typedef typeof(rnx_vmp_prepare_contiguous_tmp_bytes) RNX_VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F;
typedef typeof(rnx_vmp_apply_tmp_a) RNX_VMP_APPLY_TMP_A_F;
typedef typeof(rnx_vmp_apply_tmp_a_tmp_bytes) RNX_VMP_APPLY_TMP_A_TMP_BYTES_F;
typedef typeof(rnx_vmp_apply_dft_to_dft) RNX_VMP_APPLY_DFT_TO_DFT_F;
typedef typeof(rnx_vmp_apply_dft_to_dft_tmp_bytes) RNX_VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F;
typedef typeof(vec_rnx_dft) VEC_RNX_DFT_F;
typedef typeof(vec_rnx_idft) VEC_RNX_IDFT_F;
typedef struct rnx_module_vtable_t RNX_MODULE_VTABLE;
struct rnx_module_vtable_t {
VEC_RNX_ZERO_F* vec_rnx_zero;
VEC_RNX_COPY_F* vec_rnx_copy;
VEC_RNX_NEGATE_F* vec_rnx_negate;
VEC_RNX_ADD_F* vec_rnx_add;
VEC_RNX_SUB_F* vec_rnx_sub;
VEC_RNX_ROTATE_F* vec_rnx_rotate;
VEC_RNX_MUL_XP_MINUS_ONE_F* vec_rnx_mul_xp_minus_one;
VEC_RNX_AUTOMORPHISM_F* vec_rnx_automorphism;
VEC_RNX_TO_ZNX32_F* vec_rnx_to_znx32;
VEC_RNX_FROM_ZNX32_F* vec_rnx_from_znx32;
VEC_RNX_TO_TNX32_F* vec_rnx_to_tnx32;
VEC_RNX_FROM_TNX32_F* vec_rnx_from_tnx32;
VEC_RNX_TO_TNX32X2_F* vec_rnx_to_tnx32x2;
VEC_RNX_FROM_TNX32X2_F* vec_rnx_from_tnx32x2;
VEC_RNX_TO_TNXDBL_F* vec_rnx_to_tnxdbl;
RNX_SMALL_SINGLE_PRODUCT_F* rnx_small_single_product;
RNX_SMALL_SINGLE_PRODUCT_TMP_BYTES_F* rnx_small_single_product_tmp_bytes;
TNXDBL_SMALL_SINGLE_PRODUCT_F* tnxdbl_small_single_product;
TNXDBL_SMALL_SINGLE_PRODUCT_TMP_BYTES_F* tnxdbl_small_single_product_tmp_bytes;
ZNX32_SMALL_SINGLE_PRODUCT_F* znx32_small_single_product;
ZNX32_SMALL_SINGLE_PRODUCT_TMP_BYTES_F* znx32_small_single_product_tmp_bytes;
TNX32_SMALL_SINGLE_PRODUCT_F* tnx32_small_single_product;
TNX32_SMALL_SINGLE_PRODUCT_TMP_BYTES_F* tnx32_small_single_product_tmp_bytes;
RNX_APPROXDECOMP_FROM_TNX32_F* rnx_approxdecomp_from_tnx32;
RNX_APPROXDECOMP_FROM_TNX32X2_F* rnx_approxdecomp_from_tnx32x2;
RNX_APPROXDECOMP_FROM_TNXDBL_F* rnx_approxdecomp_from_tnxdbl;
BYTES_OF_RNX_SVP_PPOL_F* bytes_of_rnx_svp_ppol;
RNX_SVP_PREPARE_F* rnx_svp_prepare;
RNX_SVP_APPLY_F* rnx_svp_apply;
BYTES_OF_RNX_VMP_PMAT_F* bytes_of_rnx_vmp_pmat;
RNX_VMP_PREPARE_CONTIGUOUS_F* rnx_vmp_prepare_contiguous;
RNX_VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F* rnx_vmp_prepare_contiguous_tmp_bytes;
RNX_VMP_APPLY_TMP_A_F* rnx_vmp_apply_tmp_a;
RNX_VMP_APPLY_TMP_A_TMP_BYTES_F* rnx_vmp_apply_tmp_a_tmp_bytes;
RNX_VMP_APPLY_DFT_TO_DFT_F* rnx_vmp_apply_dft_to_dft;
RNX_VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F* rnx_vmp_apply_dft_to_dft_tmp_bytes;
VEC_RNX_DFT_F* vec_rnx_dft;
VEC_RNX_IDFT_F* vec_rnx_idft;
};
#endif // SPQLIOS_VEC_RNX_ARITHMETIC_PLUGIN_H

View File

@@ -1,284 +0,0 @@
#ifndef SPQLIOS_VEC_RNX_ARITHMETIC_PRIVATE_H
#define SPQLIOS_VEC_RNX_ARITHMETIC_PRIVATE_H
#include "../commons_private.h"
#include "../reim/reim_fft.h"
#include "vec_rnx_arithmetic.h"
#include "vec_rnx_arithmetic_plugin.h"
typedef struct fft64_rnx_module_precomp_t FFT64_RNX_MODULE_PRECOMP;
struct fft64_rnx_module_precomp_t {
REIM_FFT_PRECOMP* p_fft;
REIM_IFFT_PRECOMP* p_ifft;
REIM_FFTVEC_MUL_PRECOMP* p_fftvec_mul;
REIM_FFTVEC_ADDMUL_PRECOMP* p_fftvec_addmul;
};
typedef union rnx_module_precomp_t RNX_MODULE_PRECOMP;
union rnx_module_precomp_t {
FFT64_RNX_MODULE_PRECOMP fft64;
};
void fft64_init_rnx_module_precomp(MOD_RNX* module);
void fft64_finalize_rnx_module_precomp(MOD_RNX* module);
/** @brief opaque structure that describes the modules (RnX,ZnX,TnX) and the hardware */
struct rnx_module_info_t {
uint64_t n;
uint64_t m;
RNX_MODULE_TYPE mtype;
RNX_MODULE_VTABLE vtable;
RNX_MODULE_PRECOMP precomp;
void* custom;
void (*custom_deleter)(void*);
};
void init_rnx_module_info(MOD_RNX* module, //
uint64_t, RNX_MODULE_TYPE mtype);
void finalize_rnx_module_info(MOD_RNX* module);
void fft64_init_rnx_module_vtable(MOD_RNX* module);
///////////////////////////////////////////////////////////////////
// prepared gadget decompositions (optimized) //
///////////////////////////////////////////////////////////////////
struct tnx32_approxdec_gadget_t {
uint64_t k;
uint64_t ell;
int32_t add_cst; // 1/2.(sum 2^-(i+1)K)
int32_t rshift_base; // 32 - K
int64_t and_mask; // 2^K-1
int64_t or_mask; // double(2^52)
double sub_cst; // double(2^52 + 2^(K-1))
uint8_t rshifts[8]; // 32 - (i+1).K
};
struct tnx32x2_approxdec_gadget_t {
// TODO
};
struct tnxdbl_approxdecomp_gadget_t {
uint64_t k;
uint64_t ell;
double add_cst; // double(3.2^(51-ell.K) + 1/2.(sum 2^(-iK)) for i=[0,ell[)
uint64_t and_mask; // uint64(2^(K)-1)
uint64_t or_mask; // double(2^52)
double sub_cst; // double(2^52 + 2^(K-1))
};
EXPORT void vec_rnx_add_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_rnx_add_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = 0 */
EXPORT void vec_rnx_zero_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl // res
);
/** @brief sets res = a */
EXPORT void vec_rnx_copy_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = -a */
EXPORT void vec_rnx_negate_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = -a */
EXPORT void vec_rnx_negate_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a - b */
EXPORT void vec_rnx_sub_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a - b */
EXPORT void vec_rnx_sub_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl, // a
const double* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a . X^p */
EXPORT void vec_rnx_rotate_ref( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a(X^p) */
EXPORT void vec_rnx_automorphism_ref( //
const MOD_RNX* module, // N
int64_t p, // X -> X^p
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief number of bytes in a RNX_VMP_PMAT (for manual allocation) */
EXPORT uint64_t fft64_bytes_of_rnx_vmp_pmat(const MOD_RNX* module, // N
uint64_t nrows, uint64_t ncols);
EXPORT void fft64_rnx_vmp_apply_dft_to_dft_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl, // a
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
);
EXPORT void fft64_rnx_vmp_apply_dft_to_dft_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl, // a
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
);
EXPORT uint64_t fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
EXPORT uint64_t fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_avx( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
EXPORT void fft64_rnx_vmp_prepare_contiguous_ref( //
const MOD_RNX* module, // N
RNX_VMP_PMAT* pmat, // output
const double* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
EXPORT void fft64_rnx_vmp_prepare_contiguous_avx( //
const MOD_RNX* module, // N
RNX_VMP_PMAT* pmat, // output
const double* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
EXPORT uint64_t fft64_rnx_vmp_prepare_contiguous_tmp_bytes_ref(const MOD_RNX* module);
EXPORT uint64_t fft64_rnx_vmp_prepare_contiguous_tmp_bytes_avx(const MOD_RNX* module);
EXPORT void fft64_rnx_vmp_apply_tmp_a_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res (addr must be != a)
double* tmpa, uint64_t a_size, uint64_t a_sl, // a (will be overwritten)
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
);
EXPORT void fft64_rnx_vmp_apply_tmp_a_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res (addr must be != a)
double* tmpa, uint64_t a_size, uint64_t a_sl, // a (will be overwritten)
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
);
EXPORT uint64_t fft64_rnx_vmp_apply_tmp_a_tmp_bytes_ref( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
EXPORT uint64_t fft64_rnx_vmp_apply_tmp_a_tmp_bytes_avx( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
/// gadget decompositions
/** @brief sets res = gadget_decompose(a) */
EXPORT void rnx_approxdecomp_from_tnxdbl_ref( //
const MOD_RNX* module, // N
const TNXDBL_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a); // a
EXPORT void rnx_approxdecomp_from_tnxdbl_avx( //
const MOD_RNX* module, // N
const TNXDBL_APPROXDECOMP_GADGET* gadget, // output base 2^K
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a); // a
EXPORT void vec_rnx_mul_xp_minus_one_ref( //
const MOD_RNX* module, // N
const int64_t p, // rotation value
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_to_znx32_ref( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_from_znx32_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_to_tnx32_ref( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_from_tnx32_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_rnx_to_tnxdbl_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT uint64_t fft64_bytes_of_rnx_svp_ppol(const MOD_RNX* module); // N
/** @brief prepares a svp polynomial */
EXPORT void fft64_rnx_svp_prepare_ref(const MOD_RNX* module, // N
RNX_SVP_PPOL* ppol, // output
const double* pol // a
);
/** @brief apply a svp product, result = ppol * a, presented in DFT space */
EXPORT void fft64_rnx_svp_apply_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // output
const RNX_SVP_PPOL* ppol, // prepared pol
const double* a, uint64_t a_size, uint64_t a_sl // a
);
#endif // SPQLIOS_VEC_RNX_ARITHMETIC_PRIVATE_H

View File

@@ -1,91 +0,0 @@
#include <memory.h>
#include "vec_rnx_arithmetic_private.h"
#include "zn_arithmetic_private.h"
EXPORT void vec_rnx_to_znx32_ref( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
dbl_round_to_i32_ref(NULL, res + i * res_sl, nn, a + i * a_sl, nn);
}
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(int32_t));
}
}
EXPORT void vec_rnx_from_znx32_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
i32_to_dbl_ref(NULL, res + i * res_sl, nn, a + i * a_sl, nn);
}
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(int32_t));
}
}
EXPORT void vec_rnx_to_tnx32_ref( //
const MOD_RNX* module, // N
int32_t* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
dbl_to_tn32_ref(NULL, res + i * res_sl, nn, a + i * a_sl, nn);
}
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(int32_t));
}
}
EXPORT void vec_rnx_from_tnx32_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const int32_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
tn32_to_dbl_ref(NULL, res + i * res_sl, nn, a + i * a_sl, nn);
}
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(int32_t));
}
}
static void dbl_to_tndbl_ref( //
const void* UNUSED, // N
double* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
static const double OFF_CST = INT64_C(3) << 51;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
double ai = a[i] + OFF_CST;
res[i] = a[i] - (ai - OFF_CST);
}
memset(res + msize, 0, (res_size - msize) * sizeof(double));
}
EXPORT void vec_rnx_to_tnxdbl_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
dbl_to_tndbl_ref(NULL, res + i * res_sl, nn, a + i * a_sl, nn);
}
for (uint64_t i = msize; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(int32_t));
}
}

View File

@@ -1,47 +0,0 @@
#include <string.h>
#include "../coeffs/coeffs_arithmetic.h"
#include "vec_rnx_arithmetic_private.h"
EXPORT uint64_t fft64_bytes_of_rnx_svp_ppol(const MOD_RNX* module) { return module->n * sizeof(double); }
EXPORT RNX_SVP_PPOL* new_rnx_svp_ppol(const MOD_RNX* module) { return spqlios_alloc(bytes_of_rnx_svp_ppol(module)); }
EXPORT void delete_rnx_svp_ppol(RNX_SVP_PPOL* ppol) { spqlios_free(ppol); }
/** @brief prepares a svp polynomial */
EXPORT void fft64_rnx_svp_prepare_ref(const MOD_RNX* module, // N
RNX_SVP_PPOL* ppol, // output
const double* pol // a
) {
double* const dppol = (double*)ppol;
rnx_divide_by_m_ref(module->n, module->m, dppol, pol);
reim_fft(module->precomp.fft64.p_fft, dppol);
}
EXPORT void fft64_rnx_svp_apply_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // output
const RNX_SVP_PPOL* ppol, // prepared pol
const double* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->n;
double* const dppol = (double*)ppol;
const uint64_t auto_end_idx = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < auto_end_idx; ++i) {
const double* a_ptr = a + i * a_sl;
double* const res_ptr = res + i * res_sl;
// copy the polynomial to res, apply fft in place, call fftvec
// _mul, apply ifft in place.
memcpy(res_ptr, a_ptr, nn * sizeof(double));
reim_fft(module->precomp.fft64.p_fft, (double*)res_ptr);
reim_fftvec_mul(module->precomp.fft64.p_fftvec_mul, res_ptr, res_ptr, dppol);
reim_ifft(module->precomp.fft64.p_ifft, res_ptr);
}
// then extend with zeros
for (uint64_t i = auto_end_idx; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}

View File

@@ -1,196 +0,0 @@
#include <assert.h>
#include <immintrin.h>
#include <string.h>
#include "../coeffs/coeffs_arithmetic.h"
#include "../reim/reim_fft.h"
#include "../reim4/reim4_arithmetic.h"
#include "vec_rnx_arithmetic_private.h"
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void fft64_rnx_vmp_prepare_contiguous_avx( //
const MOD_RNX* module, // N
RNX_VMP_PMAT* pmat, // output
const double* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
) {
// there is an edge case if nn < 8
const uint64_t nn = module->n;
const uint64_t m = module->m;
double* const dtmp = (double*)tmp_space;
double* const output_mat = (double*)pmat;
double* start_addr = (double*)pmat;
uint64_t offset = nrows * ncols * 8;
if (nn >= 8) {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
rnx_divide_by_m_avx(nn, m, dtmp, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->precomp.fft64.p_fft, dtmp);
if (col_i == (ncols - 1) && (ncols % 2 == 1)) {
// special case: last column out of an odd column number
start_addr = output_mat + col_i * nrows * 8 // col == ncols-1
+ row_i * 8;
} else {
// general case: columns go by pair
start_addr = output_mat + (col_i / 2) * (2 * nrows) * 8 // second: col pair index
+ row_i * 2 * 8 // third: row index
+ (col_i % 2) * 8;
}
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
// extract blk from tmp and save it
reim4_extract_1blk_from_reim_avx(m, blk_i, start_addr + blk_i * offset, dtmp);
}
}
}
} else {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
double* res = output_mat + (col_i * nrows + row_i) * nn;
rnx_divide_by_m_avx(nn, m, res, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->precomp.fft64.p_fft, res);
}
}
}
}
/** @brief minimal size of the tmp_space */
EXPORT void fft64_rnx_vmp_apply_dft_to_dft_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl, // a
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
) {
const uint64_t m = module->m;
const uint64_t nn = module->n;
double* mat2cols_output = (double*)tmp_space; // 128 bytes
double* extracted_blk = (double*)tmp_space + 16; // 64*min(nrows,a_size) bytes
double* mat_input = (double*)pmat;
const uint64_t row_max = nrows < a_size ? nrows : a_size;
const uint64_t col_max = ncols < res_size ? ncols : res_size;
if (row_max > 0 && col_max > 0) {
if (nn >= 8) {
// let's do some prefetching of the GSW key, since on some cpus,
// it helps
const uint64_t ms4 = m >> 2; // m/4
const uint64_t gsw_iter_doubles = 8 * nrows * ncols;
const uint64_t pref_doubles = 1200;
const double* gsw_pref_ptr = mat_input;
const double* const gsw_ptr_end = mat_input + ms4 * gsw_iter_doubles;
const double* gsw_pref_ptr_target = mat_input + pref_doubles;
for (; gsw_pref_ptr < gsw_pref_ptr_target; gsw_pref_ptr += 8) {
__builtin_prefetch(gsw_pref_ptr, 0, _MM_HINT_T0);
}
const double* mat_blk_start;
uint64_t blk_i;
for (blk_i = 0, mat_blk_start = mat_input; blk_i < ms4; blk_i++, mat_blk_start += gsw_iter_doubles) {
// prefetch the next iteration
if (gsw_pref_ptr_target < gsw_ptr_end) {
gsw_pref_ptr_target += gsw_iter_doubles;
if (gsw_pref_ptr_target > gsw_ptr_end) gsw_pref_ptr_target = gsw_ptr_end;
for (; gsw_pref_ptr < gsw_pref_ptr_target; gsw_pref_ptr += 8) {
__builtin_prefetch(gsw_pref_ptr, 0, _MM_HINT_T0);
}
}
reim4_extract_1blk_from_contiguous_reim_sl_avx(m, a_sl, row_max, blk_i, extracted_blk, a_dft);
// apply mat2cols
for (uint64_t col_i = 0; col_i < col_max - 1; col_i += 2) {
uint64_t col_offset = col_i * (8 * nrows);
reim4_vec_mat2cols_product_avx2(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
reim4_save_1blk_to_reim_avx(m, blk_i, res + col_i * res_sl, mat2cols_output);
reim4_save_1blk_to_reim_avx(m, blk_i, res + (col_i + 1) * res_sl, mat2cols_output + 8);
}
// check if col_max is odd, then special case
if (col_max % 2 == 1) {
uint64_t last_col = col_max - 1;
uint64_t col_offset = last_col * (8 * nrows);
// the last column is alone in the pmat: vec_mat1col
if (ncols == col_max) {
reim4_vec_mat1col_product_avx2(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
} else {
// the last column is part of a colpair in the pmat: vec_mat2cols and ignore the second position
reim4_vec_mat2cols_product_avx2(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
}
reim4_save_1blk_to_reim_avx(m, blk_i, res + last_col * res_sl, mat2cols_output);
}
}
} else {
const double* in;
uint64_t in_sl;
if (res == a_dft) {
// it is in place: copy the input vector
in = (double*)tmp_space;
in_sl = nn;
// vec_rnx_copy(module, (double*)tmp_space, row_max, nn, a_dft, row_max, a_sl);
for (uint64_t row_i = 0; row_i < row_max; row_i++) {
memcpy((double*)tmp_space + row_i * nn, a_dft + row_i * a_sl, nn * sizeof(double));
}
} else {
// it is out of place: do the product directly
in = a_dft;
in_sl = a_sl;
}
for (uint64_t col_i = 0; col_i < col_max; col_i++) {
double* pmat_col = mat_input + col_i * nrows * nn;
{
reim_fftvec_mul(module->precomp.fft64.p_fftvec_mul, //
res + col_i * res_sl, //
in, //
pmat_col);
}
for (uint64_t row_i = 1; row_i < row_max; row_i++) {
reim_fftvec_addmul(module->precomp.fft64.p_fftvec_addmul, //
res + col_i * res_sl, //
in + row_i * in_sl, //
pmat_col + row_i * nn);
}
}
}
}
// zero out remaining bytes (if any)
for (uint64_t i = col_max; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief applies a vmp product res = a x pmat */
EXPORT void fft64_rnx_vmp_apply_tmp_a_avx( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res (addr must be != a)
double* tmpa, uint64_t a_size, uint64_t a_sl, // a (will be overwritten)
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
) {
const uint64_t nn = module->n;
const uint64_t rows = nrows < a_size ? nrows : a_size;
const uint64_t cols = ncols < res_size ? ncols : res_size;
// fft is done in place on the input (tmpa is destroyed)
for (uint64_t i = 0; i < rows; ++i) {
reim_fft(module->precomp.fft64.p_fft, tmpa + i * a_sl);
}
fft64_rnx_vmp_apply_dft_to_dft_avx(module, //
res, cols, res_sl, //
tmpa, rows, a_sl, //
pmat, nrows, ncols, //
tmp_space);
// ifft is done in place on the output
for (uint64_t i = 0; i < cols; ++i) {
reim_ifft(module->precomp.fft64.p_ifft, res + i * res_sl);
}
// zero out the remaining positions
for (uint64_t i = cols; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}

View File

@@ -1,251 +0,0 @@
#include <assert.h>
#include <string.h>
#include "../coeffs/coeffs_arithmetic.h"
#include "../reim/reim_fft.h"
#include "../reim4/reim4_arithmetic.h"
#include "vec_rnx_arithmetic_private.h"
/** @brief number of bytes in a RNX_VMP_PMAT (for manual allocation) */
EXPORT uint64_t fft64_bytes_of_rnx_vmp_pmat(const MOD_RNX* module, // N
uint64_t nrows, uint64_t ncols) { // dimensions
return nrows * ncols * module->n * sizeof(double);
}
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void fft64_rnx_vmp_prepare_contiguous_ref( //
const MOD_RNX* module, // N
RNX_VMP_PMAT* pmat, // output
const double* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
) {
// there is an edge case if nn < 8
const uint64_t nn = module->n;
const uint64_t m = module->m;
double* const dtmp = (double*)tmp_space;
double* const output_mat = (double*)pmat;
double* start_addr = (double*)pmat;
uint64_t offset = nrows * ncols * 8;
if (nn >= 8) {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
rnx_divide_by_m_ref(nn, m, dtmp, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->precomp.fft64.p_fft, dtmp);
if (col_i == (ncols - 1) && (ncols % 2 == 1)) {
// special case: last column out of an odd column number
start_addr = output_mat + col_i * nrows * 8 // col == ncols-1
+ row_i * 8;
} else {
// general case: columns go by pair
start_addr = output_mat + (col_i / 2) * (2 * nrows) * 8 // second: col pair index
+ row_i * 2 * 8 // third: row index
+ (col_i % 2) * 8;
}
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
// extract blk from tmp and save it
reim4_extract_1blk_from_reim_ref(m, blk_i, start_addr + blk_i * offset, dtmp);
}
}
}
} else {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
double* res = output_mat + (col_i * nrows + row_i) * nn;
rnx_divide_by_m_ref(nn, m, res, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->precomp.fft64.p_fft, res);
}
}
}
}
/** @brief number of scratch bytes necessary to prepare a matrix */
EXPORT uint64_t fft64_rnx_vmp_prepare_contiguous_tmp_bytes_ref(const MOD_RNX* module) {
const uint64_t nn = module->n;
return nn * sizeof(int64_t);
}
/** @brief minimal size of the tmp_space */
EXPORT void fft64_rnx_vmp_apply_dft_to_dft_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res
const double* a_dft, uint64_t a_size, uint64_t a_sl, // a
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
) {
const uint64_t m = module->m;
const uint64_t nn = module->n;
double* mat2cols_output = (double*)tmp_space; // 128 bytes
double* extracted_blk = (double*)tmp_space + 16; // 64*min(nrows,a_size) bytes
double* mat_input = (double*)pmat;
const uint64_t row_max = nrows < a_size ? nrows : a_size;
const uint64_t col_max = ncols < res_size ? ncols : res_size;
if (row_max > 0 && col_max > 0) {
if (nn >= 8) {
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
double* mat_blk_start = mat_input + blk_i * (8 * nrows * ncols);
reim4_extract_1blk_from_contiguous_reim_sl_ref(m, a_sl, row_max, blk_i, extracted_blk, a_dft);
// apply mat2cols
for (uint64_t col_i = 0; col_i < col_max - 1; col_i += 2) {
uint64_t col_offset = col_i * (8 * nrows);
reim4_vec_mat2cols_product_ref(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
reim4_save_1blk_to_reim_ref(m, blk_i, res + col_i * res_sl, mat2cols_output);
reim4_save_1blk_to_reim_ref(m, blk_i, res + (col_i + 1) * res_sl, mat2cols_output + 8);
}
// check if col_max is odd, then special case
if (col_max % 2 == 1) {
uint64_t last_col = col_max - 1;
uint64_t col_offset = last_col * (8 * nrows);
// the last column is alone in the pmat: vec_mat1col
if (ncols == col_max) {
reim4_vec_mat1col_product_ref(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
} else {
// the last column is part of a colpair in the pmat: vec_mat2cols and ignore the second position
reim4_vec_mat2cols_product_ref(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
}
reim4_save_1blk_to_reim_ref(m, blk_i, res + last_col * res_sl, mat2cols_output);
}
}
} else {
const double* in;
uint64_t in_sl;
if (res == a_dft) {
// it is in place: copy the input vector
in = (double*)tmp_space;
in_sl = nn;
// vec_rnx_copy(module, (double*)tmp_space, row_max, nn, a_dft, row_max, a_sl);
for (uint64_t row_i = 0; row_i < row_max; row_i++) {
memcpy((double*)tmp_space + row_i * nn, a_dft + row_i * a_sl, nn * sizeof(double));
}
} else {
// it is out of place: do the product directly
in = a_dft;
in_sl = a_sl;
}
for (uint64_t col_i = 0; col_i < col_max; col_i++) {
double* pmat_col = mat_input + col_i * nrows * nn;
{
reim_fftvec_mul(module->precomp.fft64.p_fftvec_mul, //
res + col_i * res_sl, //
in, //
pmat_col);
}
for (uint64_t row_i = 1; row_i < row_max; row_i++) {
reim_fftvec_addmul(module->precomp.fft64.p_fftvec_addmul, //
res + col_i * res_sl, //
in + row_i * in_sl, //
pmat_col + row_i * nn);
}
}
}
}
// zero out remaining bytes (if any)
for (uint64_t i = col_max; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief applies a vmp product res = a x pmat */
EXPORT void fft64_rnx_vmp_apply_tmp_a_ref( //
const MOD_RNX* module, // N
double* res, uint64_t res_size, uint64_t res_sl, // res (addr must be != a)
double* tmpa, uint64_t a_size, uint64_t a_sl, // a (will be overwritten)
const RNX_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
) {
const uint64_t nn = module->n;
const uint64_t rows = nrows < a_size ? nrows : a_size;
const uint64_t cols = ncols < res_size ? ncols : res_size;
// fft is done in place on the input (tmpa is destroyed)
for (uint64_t i = 0; i < rows; ++i) {
reim_fft(module->precomp.fft64.p_fft, tmpa + i * a_sl);
}
fft64_rnx_vmp_apply_dft_to_dft_ref(module, //
res, cols, res_sl, //
tmpa, rows, a_sl, //
pmat, nrows, ncols, //
tmp_space);
// ifft is done in place on the output
for (uint64_t i = 0; i < cols; ++i) {
reim_ifft(module->precomp.fft64.p_ifft, res + i * res_sl);
}
// zero out the remaining positions
for (uint64_t i = cols; i < res_size; ++i) {
memset(res + i * res_sl, 0, nn * sizeof(double));
}
}
/** @brief minimal size of the tmp_space */
EXPORT uint64_t fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
const uint64_t row_max = nrows < a_size ? nrows : a_size;
return (128) + (64 * row_max);
}
#ifdef __APPLE__
EXPORT uint64_t fft64_rnx_vmp_apply_tmp_a_tmp_bytes_ref( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
return fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref(module, res_size, a_size, nrows, ncols);
}
#else
EXPORT uint64_t fft64_rnx_vmp_apply_tmp_a_tmp_bytes_ref( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) __attribute((alias("fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref")));
#endif
// avx aliases that need to be defined in the same .c file
/** @brief number of scratch bytes necessary to prepare a matrix */
#ifdef __APPLE__
#pragma weak fft64_rnx_vmp_prepare_contiguous_tmp_bytes_avx = fft64_rnx_vmp_prepare_contiguous_tmp_bytes_ref
#else
EXPORT uint64_t fft64_rnx_vmp_prepare_contiguous_tmp_bytes_avx(const MOD_RNX* module)
__attribute((alias("fft64_rnx_vmp_prepare_contiguous_tmp_bytes_ref")));
#endif
/** @brief minimal size of the tmp_space */
#ifdef __APPLE__
#pragma weak fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_avx = fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref
#else
EXPORT uint64_t fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_avx( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) __attribute((alias("fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref")));
#endif
#ifdef __APPLE__
#pragma weak fft64_rnx_vmp_apply_tmp_a_tmp_bytes_avx = fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref
#else
EXPORT uint64_t fft64_rnx_vmp_apply_tmp_a_tmp_bytes_avx( //
const MOD_RNX* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) __attribute((alias("fft64_rnx_vmp_apply_dft_to_dft_tmp_bytes_ref")));
#endif
// wrappers

View File

@@ -1,333 +0,0 @@
#include <assert.h>
#include <math.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include "../coeffs/coeffs_arithmetic.h"
#include "../q120/q120_arithmetic.h"
#include "../q120/q120_ntt.h"
#include "../reim/reim_fft_internal.h"
#include "../reim4/reim4_arithmetic.h"
#include "vec_znx_arithmetic.h"
#include "vec_znx_arithmetic_private.h"
// general function (virtual dispatch)
EXPORT void vec_znx_add(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
module->func.vec_znx_add(module, // N
res, res_size, res_sl, // res
a, a_size, a_sl, // a
b, b_size, b_sl // b
);
}
EXPORT void vec_znx_sub(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
module->func.vec_znx_sub(module, // N
res, res_size, res_sl, // res
a, a_size, a_sl, // a
b, b_size, b_sl // b
);
}
EXPORT void vec_znx_rotate(const MODULE* module, // N
const int64_t p, // rotation value
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
module->func.vec_znx_rotate(module, // N
p, // p
res, res_size, res_sl, // res
a, a_size, a_sl // a
);
}
EXPORT void vec_znx_automorphism(const MODULE* module, // N
const int64_t p, // X->X^p
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
module->func.vec_znx_automorphism(module, // N
p, // p
res, res_size, res_sl, // res
a, a_size, a_sl // a
);
}
EXPORT void vec_znx_normalize_base2k(const MODULE* module, // N
uint64_t log2_base2k, // output base 2^K
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
uint8_t* tmp_space // scratch space of size >= N
) {
module->func.vec_znx_normalize_base2k(module, // N
log2_base2k, // log2_base2k
res, res_size, res_sl, // res
a, a_size, a_sl, // a
tmp_space);
}
EXPORT uint64_t vec_znx_normalize_base2k_tmp_bytes(const MODULE* module // N
) {
return module->func.vec_znx_normalize_base2k_tmp_bytes(module // N
);
}
// specialized function (ref)
EXPORT void vec_znx_add_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->nn;
if (a_size <= b_size) {
const uint64_t sum_idx = res_size < a_size ? res_size : a_size;
const uint64_t copy_idx = res_size < b_size ? res_size : b_size;
// add up to the smallest dimension
for (uint64_t i = 0; i < sum_idx; ++i) {
znx_add_i64_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then copy to the largest dimension
for (uint64_t i = sum_idx; i < copy_idx; ++i) {
znx_copy_i64_ref(nn, res + i * res_sl, b + i * b_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
} else {
const uint64_t sum_idx = res_size < b_size ? res_size : b_size;
const uint64_t copy_idx = res_size < a_size ? res_size : a_size;
// add up to the smallest dimension
for (uint64_t i = 0; i < sum_idx; ++i) {
znx_add_i64_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then copy to the largest dimension
for (uint64_t i = sum_idx; i < copy_idx; ++i) {
znx_copy_i64_ref(nn, res + i * res_sl, a + i * a_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
}
EXPORT void vec_znx_sub_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->nn;
if (a_size <= b_size) {
const uint64_t sub_idx = res_size < a_size ? res_size : a_size;
const uint64_t copy_idx = res_size < b_size ? res_size : b_size;
// subtract up to the smallest dimension
for (uint64_t i = 0; i < sub_idx; ++i) {
znx_sub_i64_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then negate to the largest dimension
for (uint64_t i = sub_idx; i < copy_idx; ++i) {
znx_negate_i64_ref(nn, res + i * res_sl, b + i * b_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
} else {
const uint64_t sub_idx = res_size < b_size ? res_size : b_size;
const uint64_t copy_idx = res_size < a_size ? res_size : a_size;
// subtract up to the smallest dimension
for (uint64_t i = 0; i < sub_idx; ++i) {
znx_sub_i64_ref(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then copy to the largest dimension
for (uint64_t i = sub_idx; i < copy_idx; ++i) {
znx_copy_i64_ref(nn, res + i * res_sl, a + i * a_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
}
EXPORT void vec_znx_rotate_ref(const MODULE* module, // N
const int64_t p, // rotation value
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->nn;
const uint64_t rot_end_idx = res_size < a_size ? res_size : a_size;
// rotate up to the smallest dimension
for (uint64_t i = 0; i < rot_end_idx; ++i) {
int64_t* res_ptr = res + i * res_sl;
const int64_t* a_ptr = a + i * a_sl;
if (res_ptr == a_ptr) {
znx_rotate_inplace_i64(nn, p, res_ptr);
} else {
znx_rotate_i64(nn, p, res_ptr, a_ptr);
}
}
// then extend with zeros
for (uint64_t i = rot_end_idx; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
EXPORT void vec_znx_automorphism_ref(const MODULE* module, // N
const int64_t p, // X->X^p
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->nn;
const uint64_t auto_end_idx = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < auto_end_idx; ++i) {
int64_t* res_ptr = res + i * res_sl;
const int64_t* a_ptr = a + i * a_sl;
if (res_ptr == a_ptr) {
znx_automorphism_inplace_i64(nn, p, res_ptr);
} else {
znx_automorphism_i64(nn, p, res_ptr, a_ptr);
}
}
// then extend with zeros
for (uint64_t i = auto_end_idx; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
EXPORT void vec_znx_normalize_base2k_ref(const MODULE* module, // N
uint64_t log2_base2k, // output base 2^K
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
uint8_t* tmp_space // scratch space of size >= N
) {
const uint64_t nn = module->nn;
// use MSB limb of res for carry propagation
int64_t* cout = (int64_t*)tmp_space;
int64_t* cin = 0x0;
// propagate carry until first limb of res
int64_t i = a_size - 1;
for (; i >= res_size; --i) {
znx_normalize(nn, log2_base2k, 0x0, cout, a + i * a_sl, cin);
cin = cout;
}
// propagate carry and normalize
for (; i >= 1; --i) {
znx_normalize(nn, log2_base2k, res + i * res_sl, cout, a + i * a_sl, cin);
cin = cout;
}
// normalize last limb
znx_normalize(nn, log2_base2k, res, 0x0, a, cin);
// extend result with zeros
for (uint64_t i = a_size; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
EXPORT uint64_t vec_znx_normalize_base2k_tmp_bytes_ref(const MODULE* module // N
) {
const uint64_t nn = module->nn;
return nn * sizeof(int64_t);
}
// alias have to be defined in this unit: do not move
#ifdef __APPLE__
EXPORT uint64_t fft64_vec_znx_big_range_normalize_base2k_tmp_bytes( //
const MODULE* module // N
) {
return vec_znx_normalize_base2k_tmp_bytes_ref(module);
}
EXPORT uint64_t fft64_vec_znx_big_normalize_base2k_tmp_bytes( //
const MODULE* module // N
) {
return vec_znx_normalize_base2k_tmp_bytes_ref(module);
}
#else
EXPORT uint64_t fft64_vec_znx_big_normalize_base2k_tmp_bytes( //
const MODULE* module // N
) __attribute((alias("vec_znx_normalize_base2k_tmp_bytes_ref")));
EXPORT uint64_t fft64_vec_znx_big_range_normalize_base2k_tmp_bytes( //
const MODULE* module // N
) __attribute((alias("vec_znx_normalize_base2k_tmp_bytes_ref")));
#endif
/** @brief sets res = 0 */
EXPORT void vec_znx_zero(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl // res
) {
module->func.vec_znx_zero(module, res, res_size, res_sl);
}
/** @brief sets res = a */
EXPORT void vec_znx_copy(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
module->func.vec_znx_copy(module, res, res_size, res_sl, a, a_size, a_sl);
}
/** @brief sets res = a */
EXPORT void vec_znx_negate(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
module->func.vec_znx_negate(module, res, res_size, res_sl, a, a_size, a_sl);
}
EXPORT void vec_znx_zero_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl // res
) {
uint64_t nn = module->nn;
for (uint64_t i = 0; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
EXPORT void vec_znx_copy_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
uint64_t nn = module->nn;
uint64_t smin = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < smin; ++i) {
znx_copy_i64_ref(nn, res + i * res_sl, a + i * a_sl);
}
for (uint64_t i = smin; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}
EXPORT void vec_znx_negate_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
uint64_t nn = module->nn;
uint64_t smin = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < smin; ++i) {
znx_negate_i64_ref(nn, res + i * res_sl, a + i * a_sl);
}
for (uint64_t i = smin; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}

View File

@@ -1,357 +0,0 @@
#ifndef SPQLIOS_VEC_ZNX_ARITHMETIC_H
#define SPQLIOS_VEC_ZNX_ARITHMETIC_H
#include <stdint.h>
#include "../commons.h"
#include "../reim/reim_fft.h"
/**
* We support the following module families:
* - FFT64:
* all the polynomials should fit at all times over 52 bits.
* for FHE implementations, the recommended limb-sizes are
* between K=10 and 20, which is good for low multiplicative depths.
* - NTT120:
* all the polynomials should fit at all times over 119 bits.
* for FHE implementations, the recommended limb-sizes are
* between K=20 and 40, which is good for large multiplicative depths.
*/
typedef enum module_type_t { FFT64, NTT120 } MODULE_TYPE;
/** @brief opaque structure that describr the modules (ZnX,TnX) and the hardware */
typedef struct module_info_t MODULE;
/** @brief opaque type that represents a prepared matrix */
typedef struct vmp_pmat_t VMP_PMAT;
/** @brief opaque type that represents a vector of znx in DFT space */
typedef struct vec_znx_dft_t VEC_ZNX_DFT;
/** @brief opaque type that represents a vector of znx in large coeffs space */
typedef struct vec_znx_bigcoeff_t VEC_ZNX_BIG;
/** @brief opaque type that represents a prepared scalar vector product */
typedef struct svp_ppol_t SVP_PPOL;
/** @brief opaque type that represents a prepared left convolution vector product */
typedef struct cnv_pvec_l_t CNV_PVEC_L;
/** @brief opaque type that represents a prepared right convolution vector product */
typedef struct cnv_pvec_r_t CNV_PVEC_R;
/** @brief bytes needed for a vec_znx in DFT space */
EXPORT uint64_t bytes_of_vec_znx_dft(const MODULE* module, // N
uint64_t size);
/** @brief allocates a vec_znx in DFT space */
EXPORT VEC_ZNX_DFT* new_vec_znx_dft(const MODULE* module, // N
uint64_t size);
/** @brief frees memory from a vec_znx in DFT space */
EXPORT void delete_vec_znx_dft(VEC_ZNX_DFT* res);
/** @brief bytes needed for a vec_znx_big */
EXPORT uint64_t bytes_of_vec_znx_big(const MODULE* module, // N
uint64_t size);
/** @brief allocates a vec_znx_big */
EXPORT VEC_ZNX_BIG* new_vec_znx_big(const MODULE* module, // N
uint64_t size);
/** @brief frees memory from a vec_znx_big */
EXPORT void delete_vec_znx_big(VEC_ZNX_BIG* res);
/** @brief bytes needed for a prepared vector */
EXPORT uint64_t bytes_of_svp_ppol(const MODULE* module); // N
/** @brief allocates a prepared vector */
EXPORT SVP_PPOL* new_svp_ppol(const MODULE* module); // N
/** @brief frees memory for a prepared vector */
EXPORT void delete_svp_ppol(SVP_PPOL* res);
/** @brief bytes needed for a prepared matrix */
EXPORT uint64_t bytes_of_vmp_pmat(const MODULE* module, // N
uint64_t nrows, uint64_t ncols);
/** @brief allocates a prepared matrix */
EXPORT VMP_PMAT* new_vmp_pmat(const MODULE* module, // N
uint64_t nrows, uint64_t ncols);
/** @brief frees memory for a prepared matrix */
EXPORT void delete_vmp_pmat(VMP_PMAT* res);
/**
* @brief obtain a module info for ring dimension N
* the module-info knows about:
* - the dimension N (or the complex dimension m=N/2)
* - any moduleuted fft or ntt items
* - the hardware (avx, arm64, x86, ...)
*/
EXPORT MODULE* new_module_info(uint64_t N, MODULE_TYPE mode);
EXPORT void delete_module_info(MODULE* module_info);
EXPORT uint64_t module_get_n(const MODULE* module);
/** @brief sets res = 0 */
EXPORT void vec_znx_zero(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl // res
);
/** @brief sets res = a */
EXPORT void vec_znx_copy(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a */
EXPORT void vec_znx_negate(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a + b */
EXPORT void vec_znx_add(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a - b */
EXPORT void vec_znx_sub(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = k-normalize-reduce(a) */
EXPORT void vec_znx_normalize_base2k(const MODULE* module, // N
uint64_t log2_base2k, // output base 2^K
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
uint8_t* tmp_space // scratch space (size >= N)
);
/** @brief returns the minimal byte length of scratch space for vec_znx_normalize_base2k */
EXPORT uint64_t vec_znx_normalize_base2k_tmp_bytes(const MODULE* module // N
);
/** @brief sets res = a . X^p */
EXPORT void vec_znx_rotate(const MODULE* module, // N
const int64_t p, // rotation value
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = a(X^p) */
EXPORT void vec_znx_automorphism(const MODULE* module, // N
const int64_t p, // X-X^p
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void vmp_prepare_contiguous(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
/** @brief prepares a vmp matrix (mat[row*ncols+col] points to the item) */
EXPORT void vmp_prepare_dblptr(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t** mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
/** @brief sets res = 0 */
EXPORT void vec_dft_zero(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size // res
);
/** @brief sets res = a+b */
EXPORT void vec_dft_add(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a, uint64_t a_size, // a
const VEC_ZNX_DFT* b, uint64_t b_size // b
);
/** @brief sets res = a-b */
EXPORT void vec_dft_sub(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a, uint64_t a_size, // a
const VEC_ZNX_DFT* b, uint64_t b_size // b
);
/** @brief sets res = DFT(a) */
EXPORT void vec_znx_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = iDFT(a_dft) -- output in big coeffs space */
EXPORT void vec_znx_idft(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
uint8_t* tmp // scratch space
);
/** @brief tmp bytes required for vec_znx_idft */
EXPORT uint64_t vec_znx_idft_tmp_bytes(const MODULE* module);
/**
* @brief sets res = iDFT(a_dft) -- output in big coeffs space
*
* @note a_dft is overwritten
*/
EXPORT void vec_znx_idft_tmp_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten
);
/** @brief sets res = a+b */
EXPORT void vec_znx_big_add(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
);
/** @brief sets res = a+b */
EXPORT void vec_znx_big_add_small(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_znx_big_add_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a-b */
EXPORT void vec_znx_big_sub(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
);
EXPORT void vec_znx_big_sub_small_b(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_znx_big_sub_small_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
);
EXPORT void vec_znx_big_sub_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = k-normalize(a) -- output in int64 coeffs space */
EXPORT void vec_znx_big_normalize_base2k(const MODULE* module, // N
uint64_t log2_base2k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
uint8_t* tmp_space // temp space
);
/** @brief returns the minimal byte length of scratch space for vec_znx_big_normalize_base2k */
EXPORT uint64_t vec_znx_big_normalize_base2k_tmp_bytes(const MODULE* module // N
);
/** @brief apply a svp product, result = ppol * a, presented in DFT space */
EXPORT void fft64_svp_apply_dft(const MODULE* module, // N
const VEC_ZNX_DFT* res, uint64_t res_size, // output
const SVP_PPOL* ppol, // prepared pol
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = k-normalize(a.subrange) -- output in int64 coeffs space */
EXPORT void vec_znx_big_range_normalize_base2k( //
const MODULE* module, // N
uint64_t log2_base2k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_range_begin, uint64_t a_range_xend, uint64_t a_range_step, // range
uint8_t* tmp_space // temp space
);
/** @brief returns the minimal byte length of scratch space for vec_znx_big_range_normalize_base2k */
EXPORT uint64_t vec_znx_big_range_normalize_base2k_tmp_bytes( //
const MODULE* module // N
);
/** @brief sets res = a . X^p */
EXPORT void vec_znx_big_rotate(const MODULE* module, // N
int64_t p, // rotation value
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
);
/** @brief sets res = a(X^p) */
EXPORT void vec_znx_big_automorphism(const MODULE* module, // N
int64_t p, // X-X^p
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
);
/** @brief apply a svp product, result = ppol * a, presented in DFT space */
EXPORT void svp_apply_dft(const MODULE* module, // N
const VEC_ZNX_DFT* res, uint64_t res_size, // output
const SVP_PPOL* ppol, // prepared pol
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief prepares a svp polynomial */
EXPORT void svp_prepare(const MODULE* module, // N
SVP_PPOL* ppol, // output
const int64_t* pol // a
);
/** @brief res = a * b : small integer polynomial product */
EXPORT void znx_small_single_product(const MODULE* module, // N
int64_t* res, // output
const int64_t* a, // a
const int64_t* b, // b
uint8_t* tmp);
/** @brief tmp bytes required for znx_small_single_product */
EXPORT uint64_t znx_small_single_product_tmp_bytes(const MODULE* module);
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void vmp_prepare_contiguous(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
/** @brief minimal scratch space byte-size required for the vmp_prepare function */
EXPORT uint64_t vmp_prepare_contiguous_tmp_bytes(const MODULE* module, // N
uint64_t nrows, uint64_t ncols);
/** @brief applies a vmp product (result in DFT space) */
EXPORT void vmp_apply_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
);
/** @brief minimal size of the tmp_space */
EXPORT uint64_t vmp_apply_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
/** @brief minimal size of the tmp_space */
EXPORT void vmp_apply_dft_to_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, const uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
const VMP_PMAT* pmat, const uint64_t nrows,
const uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
);
;
/** @brief minimal size of the tmp_space */
EXPORT uint64_t vmp_apply_dft_to_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
#endif // SPQLIOS_VEC_ZNX_ARITHMETIC_H

View File

@@ -1,481 +0,0 @@
#ifndef SPQLIOS_VEC_ZNX_ARITHMETIC_PRIVATE_H
#define SPQLIOS_VEC_ZNX_ARITHMETIC_PRIVATE_H
#include "../commons_private.h"
#include "../q120/q120_ntt.h"
#include "vec_znx_arithmetic.h"
/**
* Layouts families:
*
* fft64:
* K: <= 20, N: <= 65536, ell: <= 200
* vec<ZnX> normalized: represented by int64
* vec<ZnX> large: represented by int64 (expect <=52 bits)
* vec<ZnX> DFT: represented by double (reim_fft space)
* On AVX2 inftastructure, PMAT, LCNV, RCNV use a special reim4_fft space
*
* ntt120:
* K: <= 50, N: <= 65536, ell: <= 80
* vec<ZnX> normalized: represented by int64
* vec<ZnX> large: represented by int128 (expect <=120 bits)
* vec<ZnX> DFT: represented by int64x4 (ntt120 space)
* On AVX2 inftastructure, PMAT, LCNV, RCNV use a special ntt120 space
*
* ntt104:
* K: <= 40, N: <= 65536, ell: <= 80
* vec<ZnX> normalized: represented by int64
* vec<ZnX> large: represented by int128 (expect <=120 bits)
* vec<ZnX> DFT: represented by int64x4 (ntt120 space)
* On AVX512 inftastructure, PMAT, LCNV, RCNV use a special ntt104 space
*/
struct fft64_module_info_t {
// pre-computation for reim_fft
REIM_FFT_PRECOMP* p_fft;
// pre-computation for mul_fft
REIM_FFTVEC_MUL_PRECOMP* mul_fft;
// pre-computation for reim_from_znx6
REIM_FROM_ZNX64_PRECOMP* p_conv;
// pre-computation for reim_tp_znx6
REIM_TO_ZNX64_PRECOMP* p_reim_to_znx;
// pre-computation for reim_fft
REIM_IFFT_PRECOMP* p_ifft;
// pre-computation for reim_fftvec_addmul
REIM_FFTVEC_ADDMUL_PRECOMP* p_addmul;
};
struct q120_module_info_t {
// pre-computation for q120b to q120b ntt
q120_ntt_precomp* p_ntt;
// pre-computation for q120b to q120b intt
q120_ntt_precomp* p_intt;
};
// TODO add function types here
typedef typeof(vec_znx_zero) VEC_ZNX_ZERO_F;
typedef typeof(vec_znx_copy) VEC_ZNX_COPY_F;
typedef typeof(vec_znx_negate) VEC_ZNX_NEGATE_F;
typedef typeof(vec_znx_add) VEC_ZNX_ADD_F;
typedef typeof(vec_znx_dft) VEC_ZNX_DFT_F;
typedef typeof(vec_znx_idft) VEC_ZNX_IDFT_F;
typedef typeof(vec_znx_idft_tmp_bytes) VEC_ZNX_IDFT_TMP_BYTES_F;
typedef typeof(vec_znx_idft_tmp_a) VEC_ZNX_IDFT_TMP_A_F;
typedef typeof(vec_znx_sub) VEC_ZNX_SUB_F;
typedef typeof(vec_znx_rotate) VEC_ZNX_ROTATE_F;
typedef typeof(vec_znx_automorphism) VEC_ZNX_AUTOMORPHISM_F;
typedef typeof(vec_znx_normalize_base2k) VEC_ZNX_NORMALIZE_BASE2K_F;
typedef typeof(vec_znx_normalize_base2k_tmp_bytes) VEC_ZNX_NORMALIZE_BASE2K_TMP_BYTES_F;
typedef typeof(vec_znx_big_normalize_base2k) VEC_ZNX_BIG_NORMALIZE_BASE2K_F;
typedef typeof(vec_znx_big_normalize_base2k_tmp_bytes) VEC_ZNX_BIG_NORMALIZE_BASE2K_TMP_BYTES_F;
typedef typeof(vec_znx_big_range_normalize_base2k) VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_F;
typedef typeof(vec_znx_big_range_normalize_base2k_tmp_bytes) VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_TMP_BYTES_F;
typedef typeof(vec_znx_big_add) VEC_ZNX_BIG_ADD_F;
typedef typeof(vec_znx_big_add_small) VEC_ZNX_BIG_ADD_SMALL_F;
typedef typeof(vec_znx_big_add_small2) VEC_ZNX_BIG_ADD_SMALL2_F;
typedef typeof(vec_znx_big_sub) VEC_ZNX_BIG_SUB_F;
typedef typeof(vec_znx_big_sub_small_a) VEC_ZNX_BIG_SUB_SMALL_A_F;
typedef typeof(vec_znx_big_sub_small_b) VEC_ZNX_BIG_SUB_SMALL_B_F;
typedef typeof(vec_znx_big_sub_small2) VEC_ZNX_BIG_SUB_SMALL2_F;
typedef typeof(vec_znx_big_rotate) VEC_ZNX_BIG_ROTATE_F;
typedef typeof(vec_znx_big_automorphism) VEC_ZNX_BIG_AUTOMORPHISM_F;
typedef typeof(svp_prepare) SVP_PREPARE;
typedef typeof(svp_apply_dft) SVP_APPLY_DFT_F;
typedef typeof(znx_small_single_product) ZNX_SMALL_SINGLE_PRODUCT_F;
typedef typeof(znx_small_single_product_tmp_bytes) ZNX_SMALL_SINGLE_PRODUCT_TMP_BYTES_F;
typedef typeof(vmp_prepare_contiguous) VMP_PREPARE_CONTIGUOUS_F;
typedef typeof(vmp_prepare_contiguous_tmp_bytes) VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F;
typedef typeof(vmp_apply_dft) VMP_APPLY_DFT_F;
typedef typeof(vmp_apply_dft_tmp_bytes) VMP_APPLY_DFT_TMP_BYTES_F;
typedef typeof(vmp_apply_dft_to_dft) VMP_APPLY_DFT_TO_DFT_F;
typedef typeof(vmp_apply_dft_to_dft_tmp_bytes) VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F;
typedef typeof(bytes_of_vec_znx_dft) BYTES_OF_VEC_ZNX_DFT_F;
typedef typeof(bytes_of_vec_znx_big) BYTES_OF_VEC_ZNX_BIG_F;
typedef typeof(bytes_of_svp_ppol) BYTES_OF_SVP_PPOL_F;
typedef typeof(bytes_of_vmp_pmat) BYTES_OF_VMP_PMAT_F;
struct module_virtual_functions_t {
// TODO add functions here
VEC_ZNX_ZERO_F* vec_znx_zero;
VEC_ZNX_COPY_F* vec_znx_copy;
VEC_ZNX_NEGATE_F* vec_znx_negate;
VEC_ZNX_ADD_F* vec_znx_add;
VEC_ZNX_DFT_F* vec_znx_dft;
VEC_ZNX_IDFT_F* vec_znx_idft;
VEC_ZNX_IDFT_TMP_BYTES_F* vec_znx_idft_tmp_bytes;
VEC_ZNX_IDFT_TMP_A_F* vec_znx_idft_tmp_a;
VEC_ZNX_SUB_F* vec_znx_sub;
VEC_ZNX_ROTATE_F* vec_znx_rotate;
VEC_ZNX_AUTOMORPHISM_F* vec_znx_automorphism;
VEC_ZNX_NORMALIZE_BASE2K_F* vec_znx_normalize_base2k;
VEC_ZNX_NORMALIZE_BASE2K_TMP_BYTES_F* vec_znx_normalize_base2k_tmp_bytes;
VEC_ZNX_BIG_NORMALIZE_BASE2K_F* vec_znx_big_normalize_base2k;
VEC_ZNX_BIG_NORMALIZE_BASE2K_TMP_BYTES_F* vec_znx_big_normalize_base2k_tmp_bytes;
VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_F* vec_znx_big_range_normalize_base2k;
VEC_ZNX_BIG_RANGE_NORMALIZE_BASE2K_TMP_BYTES_F* vec_znx_big_range_normalize_base2k_tmp_bytes;
VEC_ZNX_BIG_ADD_F* vec_znx_big_add;
VEC_ZNX_BIG_ADD_SMALL_F* vec_znx_big_add_small;
VEC_ZNX_BIG_ADD_SMALL2_F* vec_znx_big_add_small2;
VEC_ZNX_BIG_SUB_F* vec_znx_big_sub;
VEC_ZNX_BIG_SUB_SMALL_A_F* vec_znx_big_sub_small_a;
VEC_ZNX_BIG_SUB_SMALL_B_F* vec_znx_big_sub_small_b;
VEC_ZNX_BIG_SUB_SMALL2_F* vec_znx_big_sub_small2;
VEC_ZNX_BIG_ROTATE_F* vec_znx_big_rotate;
VEC_ZNX_BIG_AUTOMORPHISM_F* vec_znx_big_automorphism;
SVP_PREPARE* svp_prepare;
SVP_APPLY_DFT_F* svp_apply_dft;
ZNX_SMALL_SINGLE_PRODUCT_F* znx_small_single_product;
ZNX_SMALL_SINGLE_PRODUCT_TMP_BYTES_F* znx_small_single_product_tmp_bytes;
VMP_PREPARE_CONTIGUOUS_F* vmp_prepare_contiguous;
VMP_PREPARE_CONTIGUOUS_TMP_BYTES_F* vmp_prepare_contiguous_tmp_bytes;
VMP_APPLY_DFT_F* vmp_apply_dft;
VMP_APPLY_DFT_TMP_BYTES_F* vmp_apply_dft_tmp_bytes;
VMP_APPLY_DFT_TO_DFT_F* vmp_apply_dft_to_dft;
VMP_APPLY_DFT_TO_DFT_TMP_BYTES_F* vmp_apply_dft_to_dft_tmp_bytes;
BYTES_OF_VEC_ZNX_DFT_F* bytes_of_vec_znx_dft;
BYTES_OF_VEC_ZNX_BIG_F* bytes_of_vec_znx_big;
BYTES_OF_SVP_PPOL_F* bytes_of_svp_ppol;
BYTES_OF_VMP_PMAT_F* bytes_of_vmp_pmat;
};
union backend_module_info_t {
struct fft64_module_info_t fft64;
struct q120_module_info_t q120;
};
struct module_info_t {
// generic parameters
MODULE_TYPE module_type;
uint64_t nn;
uint64_t m;
// backend_dependent functions
union backend_module_info_t mod;
// virtual functions
struct module_virtual_functions_t func;
};
EXPORT uint64_t fft64_bytes_of_vec_znx_dft(const MODULE* module, // N
uint64_t size);
EXPORT uint64_t fft64_bytes_of_vec_znx_big(const MODULE* module, // N
uint64_t size);
EXPORT uint64_t fft64_bytes_of_svp_ppol(const MODULE* module); // N
EXPORT uint64_t fft64_bytes_of_vmp_pmat(const MODULE* module, // N
uint64_t nrows, uint64_t ncols);
EXPORT void vec_znx_zero_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl // res
);
EXPORT void vec_znx_copy_ref(const MODULE* precomp, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_znx_negate_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_znx_negate_avx(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_znx_add_ref(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_znx_add_avx(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_znx_sub_ref(const MODULE* precomp, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_znx_sub_avx(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void vec_znx_normalize_base2k_ref(const MODULE* module, // N
uint64_t log2_base2k, // output base 2^K
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // inp
uint8_t* tmp_space // scratch space
);
EXPORT uint64_t vec_znx_normalize_base2k_tmp_bytes_ref(const MODULE* module // N
);
EXPORT void vec_znx_rotate_ref(const MODULE* module, // N
const int64_t p, // rotation value
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_znx_automorphism_ref(const MODULE* module, // N
const int64_t p, // X->X^p
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vmp_prepare_ref(const MODULE* precomp, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols // a
);
EXPORT void vmp_apply_dft_ref(const MODULE* precomp, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols // prep matrix
);
EXPORT void vec_dft_zero_ref(const MODULE* precomp, // N
VEC_ZNX_DFT* res, uint64_t res_size // res
);
EXPORT void vec_dft_add_ref(const MODULE* precomp, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a, uint64_t a_size, // a
const VEC_ZNX_DFT* b, uint64_t b_size // b
);
EXPORT void vec_dft_sub_ref(const MODULE* precomp, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a, uint64_t a_size, // a
const VEC_ZNX_DFT* b, uint64_t b_size // b
);
EXPORT void vec_dft_ref(const MODULE* precomp, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void vec_idft_ref(const MODULE* precomp, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size);
EXPORT void vec_znx_big_normalize_ref(const MODULE* precomp, // N
uint64_t k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
);
/** @brief apply a svp product, result = ppol * a, presented in DFT space */
EXPORT void fft64_svp_apply_dft_ref(const MODULE* module, // N
const VEC_ZNX_DFT* res, uint64_t res_size, // output
const SVP_PPOL* ppol, // prepared pol
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** @brief sets res = k-normalize(a) -- output in int64 coeffs space */
EXPORT void fft64_vec_znx_big_normalize_base2k(const MODULE* module, // N
uint64_t k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
uint8_t* tmp_space // temp space
);
/** @brief returns the minimal byte length of scratch space for vec_znx_big_normalize_base2k */
EXPORT uint64_t fft64_vec_znx_big_normalize_base2k_tmp_bytes(const MODULE* module // N
);
/** @brief sets res = k-normalize(a.subrange) -- output in int64 coeffs space */
EXPORT void fft64_vec_znx_big_range_normalize_base2k(const MODULE* module, // N
uint64_t log2_base2k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_range_begin, // a
uint64_t a_range_xend, uint64_t a_range_step, // range
uint8_t* tmp_space // temp space
);
/** @brief returns the minimal byte length of scratch space for vec_znx_big_range_normalize_base2k */
EXPORT uint64_t fft64_vec_znx_big_range_normalize_base2k_tmp_bytes(const MODULE* module // N
);
EXPORT void fft64_vec_znx_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
EXPORT void fft64_vec_znx_idft(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
uint8_t* tmp // scratch space
);
EXPORT uint64_t fft64_vec_znx_idft_tmp_bytes(const MODULE* module);
EXPORT void fft64_vec_znx_idft_tmp_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten
);
EXPORT void ntt120_vec_znx_dft_avx(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
);
/** */
EXPORT void ntt120_vec_znx_idft_avx(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
uint8_t* tmp // scratch space
);
EXPORT uint64_t ntt120_vec_znx_idft_tmp_bytes_avx(const MODULE* module);
EXPORT void ntt120_vec_znx_idft_tmp_a_avx(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten
);
// big additions/subtractions
/** @brief sets res = a+b */
EXPORT void fft64_vec_znx_big_add(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
);
/** @brief sets res = a+b */
EXPORT void fft64_vec_znx_big_add_small(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void fft64_vec_znx_big_add_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a-b */
EXPORT void fft64_vec_znx_big_sub(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
);
EXPORT void fft64_vec_znx_big_sub_small_b(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
EXPORT void fft64_vec_znx_big_sub_small_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
);
EXPORT void fft64_vec_znx_big_sub_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
);
/** @brief sets res = a . X^p */
EXPORT void fft64_vec_znx_big_rotate(const MODULE* module, // N
int64_t p, // rotation value
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
);
/** @brief sets res = a(X^p) */
EXPORT void fft64_vec_znx_big_automorphism(const MODULE* module, // N
int64_t p, // X-X^p
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
);
/** @brief prepares a svp polynomial */
EXPORT void fft64_svp_prepare_ref(const MODULE* module, // N
SVP_PPOL* ppol, // output
const int64_t* pol // a
);
/** @brief res = a * b : small integer polynomial product */
EXPORT void fft64_znx_small_single_product(const MODULE* module, // N
int64_t* res, // output
const int64_t* a, // a
const int64_t* b, // b
uint8_t* tmp);
/** @brief tmp bytes required for znx_small_single_product */
EXPORT uint64_t fft64_znx_small_single_product_tmp_bytes(const MODULE* module);
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void fft64_vmp_prepare_contiguous_ref(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void fft64_vmp_prepare_contiguous_avx(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
);
/** @brief minimal scratch space byte-size required for the vmp_prepare function */
EXPORT uint64_t fft64_vmp_prepare_contiguous_tmp_bytes(const MODULE* module, // N
uint64_t nrows, uint64_t ncols);
/** @brief applies a vmp product (result in DFT space) */
EXPORT void fft64_vmp_apply_dft_ref(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
);
/** @brief applies a vmp product (result in DFT space) */
EXPORT void fft64_vmp_apply_dft_avx(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
);
/** @brief this inner function could be very handy */
EXPORT void fft64_vmp_apply_dft_to_dft_ref(const MODULE* module, // N
VEC_ZNX_DFT* res, const uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
const VMP_PMAT* pmat, const uint64_t nrows,
const uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
);
/** @brief this inner function could be very handy */
EXPORT void fft64_vmp_apply_dft_to_dft_avx(const MODULE* module, // N
VEC_ZNX_DFT* res, const uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
const VMP_PMAT* pmat, const uint64_t nrows,
const uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
);
/** @brief minimal size of the tmp_space */
EXPORT uint64_t fft64_vmp_apply_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
/** @brief minimal size of the tmp_space */
EXPORT uint64_t fft64_vmp_apply_dft_to_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
);
#endif // SPQLIOS_VEC_ZNX_ARITHMETIC_PRIVATE_H

View File

@@ -1,103 +0,0 @@
#include <string.h>
#include "../coeffs/coeffs_arithmetic.h"
#include "../reim4/reim4_arithmetic.h"
#include "vec_znx_arithmetic_private.h"
// specialized function (ref)
// Note: these functions do not have an avx variant.
#define znx_copy_i64_avx znx_copy_i64_ref
#define znx_zero_i64_avx znx_zero_i64_ref
EXPORT void vec_znx_add_avx(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->nn;
if (a_size <= b_size) {
const uint64_t sum_idx = res_size < a_size ? res_size : a_size;
const uint64_t copy_idx = res_size < b_size ? res_size : b_size;
// add up to the smallest dimension
for (uint64_t i = 0; i < sum_idx; ++i) {
znx_add_i64_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then copy to the largest dimension
for (uint64_t i = sum_idx; i < copy_idx; ++i) {
znx_copy_i64_avx(nn, res + i * res_sl, b + i * b_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_avx(nn, res + i * res_sl);
}
} else {
const uint64_t sum_idx = res_size < b_size ? res_size : b_size;
const uint64_t copy_idx = res_size < a_size ? res_size : a_size;
// add up to the smallest dimension
for (uint64_t i = 0; i < sum_idx; ++i) {
znx_add_i64_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then copy to the largest dimension
for (uint64_t i = sum_idx; i < copy_idx; ++i) {
znx_copy_i64_avx(nn, res + i * res_sl, a + i * a_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_avx(nn, res + i * res_sl);
}
}
}
EXPORT void vec_znx_sub_avx(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t nn = module->nn;
if (a_size <= b_size) {
const uint64_t sub_idx = res_size < a_size ? res_size : a_size;
const uint64_t copy_idx = res_size < b_size ? res_size : b_size;
// subtract up to the smallest dimension
for (uint64_t i = 0; i < sub_idx; ++i) {
znx_sub_i64_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then negate to the largest dimension
for (uint64_t i = sub_idx; i < copy_idx; ++i) {
znx_negate_i64_avx(nn, res + i * res_sl, b + i * b_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_avx(nn, res + i * res_sl);
}
} else {
const uint64_t sub_idx = res_size < b_size ? res_size : b_size;
const uint64_t copy_idx = res_size < a_size ? res_size : a_size;
// subtract up to the smallest dimension
for (uint64_t i = 0; i < sub_idx; ++i) {
znx_sub_i64_avx(nn, res + i * res_sl, a + i * a_sl, b + i * b_sl);
}
// then copy to the largest dimension
for (uint64_t i = sub_idx; i < copy_idx; ++i) {
znx_copy_i64_avx(nn, res + i * res_sl, a + i * a_sl);
}
// then extend with zeros
for (uint64_t i = copy_idx; i < res_size; ++i) {
znx_zero_i64_avx(nn, res + i * res_sl);
}
}
}
EXPORT void vec_znx_negate_avx(const MODULE* module, // N
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
uint64_t nn = module->nn;
uint64_t smin = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < smin; ++i) {
znx_negate_i64_avx(nn, res + i * res_sl, a + i * a_sl);
}
for (uint64_t i = smin; i < res_size; ++i) {
znx_zero_i64_ref(nn, res + i * res_sl);
}
}

View File

@@ -1,270 +0,0 @@
#include "vec_znx_arithmetic_private.h"
EXPORT uint64_t bytes_of_vec_znx_big(const MODULE* module, // N
uint64_t size) {
return module->func.bytes_of_vec_znx_big(module, size);
}
// public wrappers
/** @brief sets res = a+b */
EXPORT void vec_znx_big_add(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
) {
module->func.vec_znx_big_add(module, res, res_size, a, a_size, b, b_size);
}
/** @brief sets res = a+b */
EXPORT void vec_znx_big_add_small(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
module->func.vec_znx_big_add_small(module, res, res_size, a, a_size, b, b_size, b_sl);
}
EXPORT void vec_znx_big_add_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
module->func.vec_znx_big_add_small2(module, res, res_size, a, a_size, a_sl, b, b_size, b_sl);
}
/** @brief sets res = a-b */
EXPORT void vec_znx_big_sub(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
) {
module->func.vec_znx_big_sub(module, res, res_size, a, a_size, b, b_size);
}
EXPORT void vec_znx_big_sub_small_b(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
module->func.vec_znx_big_sub_small_b(module, res, res_size, a, a_size, b, b_size, b_sl);
}
EXPORT void vec_znx_big_sub_small_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
) {
module->func.vec_znx_big_sub_small_a(module, res, res_size, a, a_size, a_sl, b, b_size);
}
EXPORT void vec_znx_big_sub_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
module->func.vec_znx_big_sub_small2(module, res, res_size, a, a_size, a_sl, b, b_size, b_sl);
}
/** @brief sets res = a . X^p */
EXPORT void vec_znx_big_rotate(const MODULE* module, // N
int64_t p, // rotation value
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
) {
module->func.vec_znx_big_rotate(module, p, res, res_size, a, a_size);
}
/** @brief sets res = a(X^p) */
EXPORT void vec_znx_big_automorphism(const MODULE* module, // N
int64_t p, // X-X^p
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
) {
module->func.vec_znx_big_automorphism(module, p, res, res_size, a, a_size);
}
// private wrappers
EXPORT uint64_t fft64_bytes_of_vec_znx_big(const MODULE* module, // N
uint64_t size) {
return module->nn * size * sizeof(double);
}
EXPORT VEC_ZNX_BIG* new_vec_znx_big(const MODULE* module, // N
uint64_t size) {
return spqlios_alloc(bytes_of_vec_znx_big(module, size));
}
EXPORT void delete_vec_znx_big(VEC_ZNX_BIG* res) { spqlios_free(res); }
/** @brief sets res = a+b */
EXPORT void fft64_vec_znx_big_add(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
) {
const uint64_t n = module->nn;
vec_znx_add(module, //
(int64_t*)res, res_size, n, //
(int64_t*)a, a_size, n, //
(int64_t*)b, b_size, n);
}
/** @brief sets res = a+b */
EXPORT void fft64_vec_znx_big_add_small(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t n = module->nn;
vec_znx_add(module, //
(int64_t*)res, res_size, n, //
(int64_t*)a, a_size, n, //
b, b_size, b_sl);
}
EXPORT void fft64_vec_znx_big_add_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t n = module->nn;
vec_znx_add(module, //
(int64_t*)res, res_size, n, //
a, a_size, a_sl, //
b, b_size, b_sl);
}
/** @brief sets res = a-b */
EXPORT void fft64_vec_znx_big_sub(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
) {
const uint64_t n = module->nn;
vec_znx_sub(module, //
(int64_t*)res, res_size, n, //
(int64_t*)a, a_size, n, //
(int64_t*)b, b_size, n);
}
EXPORT void fft64_vec_znx_big_sub_small_b(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t n = module->nn;
vec_znx_sub(module, //
(int64_t*)res, res_size, n, //
(int64_t*)a, a_size, //
n, b, b_size, b_sl);
}
EXPORT void fft64_vec_znx_big_sub_small_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VEC_ZNX_BIG* b, uint64_t b_size // b
) {
const uint64_t n = module->nn;
vec_znx_sub(module, //
(int64_t*)res, res_size, n, //
a, a_size, a_sl, //
(int64_t*)b, b_size, n);
}
EXPORT void fft64_vec_znx_big_sub_small2(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const int64_t* b, uint64_t b_size, uint64_t b_sl // b
) {
const uint64_t n = module->nn;
vec_znx_sub(module, //
(int64_t*)res, res_size, //
n, a, a_size, //
a_sl, b, b_size, b_sl);
}
/** @brief sets res = a . X^p */
EXPORT void fft64_vec_znx_big_rotate(const MODULE* module, // N
int64_t p, // rotation value
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
) {
uint64_t nn = module->nn;
vec_znx_rotate(module, p, (int64_t*)res, res_size, nn, (int64_t*)a, a_size, nn);
}
/** @brief sets res = a(X^p) */
EXPORT void fft64_vec_znx_big_automorphism(const MODULE* module, // N
int64_t p, // X-X^p
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_BIG* a, uint64_t a_size // a
) {
uint64_t nn = module->nn;
vec_znx_automorphism(module, p, (int64_t*)res, res_size, nn, (int64_t*)a, a_size, nn);
}
EXPORT void vec_znx_big_normalize_base2k(const MODULE* module, // N
uint64_t k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
uint8_t* tmp_space // temp space
) {
module->func.vec_znx_big_normalize_base2k(module, // N
k, // base-2^k
res, res_size, res_sl, // res
a, a_size, // a
tmp_space);
}
EXPORT uint64_t vec_znx_big_normalize_base2k_tmp_bytes(const MODULE* module // N
) {
return module->func.vec_znx_big_normalize_base2k_tmp_bytes(module // N
);
}
/** @brief sets res = k-normalize(a.subrange) -- output in int64 coeffs space */
EXPORT void vec_znx_big_range_normalize_base2k( //
const MODULE* module, // N
uint64_t log2_base2k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_range_begin, uint64_t a_range_xend, uint64_t a_range_step, // range
uint8_t* tmp_space // temp space
) {
module->func.vec_znx_big_range_normalize_base2k(module, log2_base2k, res, res_size, res_sl, a, a_range_begin,
a_range_xend, a_range_step, tmp_space);
}
/** @brief returns the minimal byte length of scratch space for vec_znx_big_range_normalize_base2k */
EXPORT uint64_t vec_znx_big_range_normalize_base2k_tmp_bytes( //
const MODULE* module // N
) {
return module->func.vec_znx_big_range_normalize_base2k_tmp_bytes(module);
}
EXPORT void fft64_vec_znx_big_normalize_base2k(const MODULE* module, // N
uint64_t k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_size, // a
uint8_t* tmp_space) {
uint64_t a_sl = module->nn;
module->func.vec_znx_normalize_base2k(module, // N
k, // log2_base2k
res, res_size, res_sl, // res
(int64_t*)a, a_size, a_sl, // a
tmp_space);
}
EXPORT void fft64_vec_znx_big_range_normalize_base2k( //
const MODULE* module, // N
uint64_t k, // base-2^k
int64_t* res, uint64_t res_size, uint64_t res_sl, // res
const VEC_ZNX_BIG* a, uint64_t a_begin, uint64_t a_end, uint64_t a_step, // a
uint8_t* tmp_space) {
// convert the range indexes to int64[] slices
const int64_t* a_st = ((int64_t*)a) + module->nn * a_begin;
const uint64_t a_size = (a_end + a_step - 1 - a_begin) / a_step;
const uint64_t a_sl = module->nn * a_step;
// forward the call
module->func.vec_znx_normalize_base2k(module, // N
k, // log2_base2k
res, res_size, res_sl, // res
a_st, a_size, a_sl, // a
tmp_space);
}

View File

@@ -1,162 +0,0 @@
#include <string.h>
#include "../q120/q120_arithmetic.h"
#include "vec_znx_arithmetic_private.h"
EXPORT void vec_znx_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
return module->func.vec_znx_dft(module, res, res_size, a, a_size, a_sl);
}
EXPORT void vec_znx_idft(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
uint8_t* tmp // scratch space
) {
return module->func.vec_znx_idft(module, res, res_size, a_dft, a_size, tmp);
}
EXPORT uint64_t vec_znx_idft_tmp_bytes(const MODULE* module) { return module->func.vec_znx_idft_tmp_bytes(module); }
EXPORT void vec_znx_idft_tmp_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten
) {
return module->func.vec_znx_idft_tmp_a(module, res, res_size, a_dft, a_size);
}
EXPORT uint64_t bytes_of_vec_znx_dft(const MODULE* module, // N
uint64_t size) {
return module->func.bytes_of_vec_znx_dft(module, size);
}
// fft64 backend
EXPORT uint64_t fft64_bytes_of_vec_znx_dft(const MODULE* module, // N
uint64_t size) {
return module->nn * size * sizeof(double);
}
EXPORT VEC_ZNX_DFT* new_vec_znx_dft(const MODULE* module, // N
uint64_t size) {
return spqlios_alloc(bytes_of_vec_znx_dft(module, size));
}
EXPORT void delete_vec_znx_dft(VEC_ZNX_DFT* res) { spqlios_free(res); }
EXPORT void fft64_vec_znx_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t smin = res_size < a_size ? res_size : a_size;
const uint64_t nn = module->nn;
for (uint64_t i = 0; i < smin; i++) {
reim_from_znx64(module->mod.fft64.p_conv, ((double*)res) + i * nn, a + i * a_sl);
reim_fft(module->mod.fft64.p_fft, ((double*)res) + i * nn);
}
// fill up remaining part with 0's
double* const dres = (double*)res;
memset(dres + smin * nn, 0, (res_size - smin) * nn * sizeof(double));
}
EXPORT void fft64_vec_znx_idft(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
uint8_t* tmp // unused
) {
const uint64_t nn = module->nn;
const uint64_t smin = res_size < a_size ? res_size : a_size;
if ((double*)res != (double*)a_dft) {
memcpy(res, a_dft, smin * nn * sizeof(double));
}
for (uint64_t i = 0; i < smin; i++) {
reim_ifft(module->mod.fft64.p_ifft, ((double*)res) + i * nn);
reim_to_znx64(module->mod.fft64.p_reim_to_znx, ((int64_t*)res) + i * nn, ((int64_t*)res) + i * nn);
}
// fill up remaining part with 0's
int64_t* const dres = (int64_t*)res;
memset(dres + smin * nn, 0, (res_size - smin) * nn * sizeof(double));
}
EXPORT uint64_t fft64_vec_znx_idft_tmp_bytes(const MODULE* module) { return 0; }
EXPORT void fft64_vec_znx_idft_tmp_a(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten
) {
const uint64_t nn = module->nn;
const uint64_t smin = res_size < a_size ? res_size : a_size;
int64_t* const tres = (int64_t*)res;
double* const ta = (double*)a_dft;
for (uint64_t i = 0; i < smin; i++) {
reim_ifft(module->mod.fft64.p_ifft, ta + i * nn);
reim_to_znx64(module->mod.fft64.p_reim_to_znx, tres + i * nn, ta + i * nn);
}
// fill up remaining part with 0's
memset(tres + smin * nn, 0, (res_size - smin) * nn * sizeof(double));
}
// ntt120 backend
EXPORT void ntt120_vec_znx_dft_avx(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl // a
) {
const uint64_t nn = module->nn;
const uint64_t smin = res_size < a_size ? res_size : a_size;
int64_t* tres = (int64_t*)res;
for (uint64_t i = 0; i < smin; i++) {
q120_b_from_znx64_simple(nn, (q120b*)(tres + i * nn * 4), a + i * a_sl);
q120_ntt_bb_avx2(module->mod.q120.p_ntt, (q120b*)(tres + i * nn * 4));
}
// fill up remaining part with 0's
memset(tres + smin * nn * 4, 0, (res_size - smin) * nn * 4 * sizeof(int64_t));
}
EXPORT void ntt120_vec_znx_idft_avx(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
uint8_t* tmp) {
const uint64_t nn = module->nn;
const uint64_t smin = res_size < a_size ? res_size : a_size;
__int128_t* const tres = (__int128_t*)res;
const int64_t* const ta = (int64_t*)a_dft;
for (uint64_t i = 0; i < smin; i++) {
memcpy(tmp, ta + i * nn * 4, nn * 4 * sizeof(uint64_t));
q120_intt_bb_avx2(module->mod.q120.p_intt, (q120b*)tmp);
q120_b_to_znx128_simple(nn, tres + i * nn, (q120b*)tmp);
}
// fill up remaining part with 0's
memset(tres + smin * nn, 0, (res_size - smin) * nn * sizeof(*tres));
}
EXPORT uint64_t ntt120_vec_znx_idft_tmp_bytes_avx(const MODULE* module) { return module->nn * 4 * sizeof(uint64_t); }
EXPORT void ntt120_vec_znx_idft_tmp_a_avx(const MODULE* module, // N
VEC_ZNX_BIG* res, uint64_t res_size, // res
VEC_ZNX_DFT* a_dft, uint64_t a_size // a is overwritten
) {
const uint64_t nn = module->nn;
const uint64_t smin = res_size < a_size ? res_size : a_size;
__int128_t* const tres = (__int128_t*)res;
int64_t* const ta = (int64_t*)a_dft;
for (uint64_t i = 0; i < smin; i++) {
q120_intt_bb_avx2(module->mod.q120.p_intt, (q120b*)(ta + i * nn * 4));
q120_b_to_znx128_simple(nn, tres + i * nn, (q120b*)(ta + i * nn * 4));
}
// fill up remaining part with 0's
memset(tres + smin * nn, 0, (res_size - smin) * nn * sizeof(*tres));
}

View File

@@ -1 +0,0 @@
#include "vec_znx_arithmetic_private.h"

View File

@@ -1,240 +0,0 @@
#include <string.h>
#include "../reim4/reim4_arithmetic.h"
#include "vec_znx_arithmetic_private.h"
EXPORT uint64_t bytes_of_vmp_pmat(const MODULE* module, // N
uint64_t nrows, uint64_t ncols // dimensions
) {
return module->func.bytes_of_vmp_pmat(module, nrows, ncols);
}
// fft64
EXPORT uint64_t fft64_bytes_of_vmp_pmat(const MODULE* module, // N
uint64_t nrows, uint64_t ncols // dimensions
) {
return module->nn * nrows * ncols * sizeof(double);
}
EXPORT VMP_PMAT* new_vmp_pmat(const MODULE* module, // N
uint64_t nrows, uint64_t ncols // dimensions
) {
return spqlios_alloc(bytes_of_vmp_pmat(module, nrows, ncols));
}
EXPORT void delete_vmp_pmat(VMP_PMAT* res) { spqlios_free(res); }
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void vmp_prepare_contiguous(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
) {
module->func.vmp_prepare_contiguous(module, pmat, mat, nrows, ncols, tmp_space);
}
/** @brief minimal scratch space byte-size required for the vmp_prepare function */
EXPORT uint64_t vmp_prepare_contiguous_tmp_bytes(const MODULE* module, // N
uint64_t nrows, uint64_t ncols) {
return module->func.vmp_prepare_contiguous_tmp_bytes(module, nrows, ncols);
}
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void fft64_vmp_prepare_contiguous_ref(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
) {
// there is an edge case if nn < 8
const uint64_t nn = module->nn;
const uint64_t m = module->m;
double* output_mat = (double*)pmat;
double* start_addr = (double*)pmat;
uint64_t offset = nrows * ncols * 8;
if (nn >= 8) {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
reim_from_znx64(module->mod.fft64.p_conv, (SVP_PPOL*)tmp_space, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->mod.fft64.p_fft, (double*)tmp_space);
if (col_i == (ncols - 1) && (ncols % 2 == 1)) {
// special case: last column out of an odd column number
start_addr = output_mat + col_i * nrows * 8 // col == ncols-1
+ row_i * 8;
} else {
// general case: columns go by pair
start_addr = output_mat + (col_i / 2) * (2 * nrows) * 8 // second: col pair index
+ row_i * 2 * 8 // third: row index
+ (col_i % 2) * 8;
}
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
// extract blk from tmp and save it
reim4_extract_1blk_from_reim_ref(m, blk_i, start_addr + blk_i * offset, (double*)tmp_space);
}
}
}
} else {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
double* res = (double*)pmat + (col_i * nrows + row_i) * nn;
reim_from_znx64(module->mod.fft64.p_conv, (SVP_PPOL*)res, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->mod.fft64.p_fft, res);
}
}
}
}
/** @brief minimal scratch space byte-size required for the vmp_prepare function */
EXPORT uint64_t fft64_vmp_prepare_contiguous_tmp_bytes(const MODULE* module, // N
uint64_t nrows, uint64_t ncols) {
const uint64_t nn = module->nn;
return nn * sizeof(int64_t);
}
/** @brief applies a vmp product (result in DFT space) */
EXPORT void fft64_vmp_apply_dft_ref(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
) {
const uint64_t nn = module->nn;
const uint64_t rows = nrows < a_size ? nrows : a_size;
VEC_ZNX_DFT* a_dft = (VEC_ZNX_DFT*)tmp_space;
uint8_t* new_tmp_space = (uint8_t*)tmp_space + rows * nn * sizeof(double);
fft64_vec_znx_dft(module, a_dft, rows, a, a_size, a_sl);
fft64_vmp_apply_dft_to_dft_ref(module, res, res_size, a_dft, a_size, pmat, nrows, ncols, new_tmp_space);
}
/** @brief this inner function could be very handy */
EXPORT void fft64_vmp_apply_dft_to_dft_ref(const MODULE* module, // N
VEC_ZNX_DFT* res, const uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
const VMP_PMAT* pmat, const uint64_t nrows,
const uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
) {
const uint64_t m = module->m;
const uint64_t nn = module->nn;
double* mat2cols_output = (double*)tmp_space; // 128 bytes
double* extracted_blk = (double*)tmp_space + 16; // 64*min(nrows,a_size) bytes
double* mat_input = (double*)pmat;
double* vec_input = (double*)a_dft;
double* vec_output = (double*)res;
const uint64_t row_max = nrows < a_size ? nrows : a_size;
const uint64_t col_max = ncols < res_size ? ncols : res_size;
if (nn >= 8) {
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
double* mat_blk_start = mat_input + blk_i * (8 * nrows * ncols);
reim4_extract_1blk_from_contiguous_reim_ref(m, row_max, blk_i, (double*)extracted_blk, (double*)a_dft);
// apply mat2cols
for (uint64_t col_i = 0; col_i < col_max - 1; col_i += 2) {
uint64_t col_offset = col_i * (8 * nrows);
reim4_vec_mat2cols_product_ref(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
reim4_save_1blk_to_reim_ref(m, blk_i, vec_output + col_i * nn, mat2cols_output);
reim4_save_1blk_to_reim_ref(m, blk_i, vec_output + (col_i + 1) * nn, mat2cols_output + 8);
}
// check if col_max is odd, then special case
if (col_max % 2 == 1) {
uint64_t last_col = col_max - 1;
uint64_t col_offset = last_col * (8 * nrows);
// the last column is alone in the pmat: vec_mat1col
if (ncols == col_max) {
reim4_vec_mat1col_product_ref(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
} else {
// the last column is part of a colpair in the pmat: vec_mat2cols and ignore the second position
reim4_vec_mat2cols_product_ref(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
}
reim4_save_1blk_to_reim_ref(m, blk_i, vec_output + last_col * nn, mat2cols_output);
}
}
} else {
for (uint64_t col_i = 0; col_i < col_max; col_i++) {
double* pmat_col = mat_input + col_i * nrows * nn;
for (uint64_t row_i = 0; row_i < 1; row_i++) {
reim_fftvec_mul(module->mod.fft64.mul_fft, vec_output + col_i * nn, vec_input + row_i * nn,
pmat_col + row_i * nn);
}
for (uint64_t row_i = 1; row_i < row_max; row_i++) {
reim_fftvec_addmul(module->mod.fft64.p_addmul, vec_output + col_i * nn, vec_input + row_i * nn,
pmat_col + row_i * nn);
}
}
}
// zero out remaining bytes
memset(vec_output + col_max * nn, 0, (res_size - col_max) * nn * sizeof(double));
}
/** @brief minimal size of the tmp_space */
EXPORT uint64_t fft64_vmp_apply_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
const uint64_t nn = module->nn;
const uint64_t row_max = nrows < a_size ? nrows : a_size;
return (row_max * nn * sizeof(double)) + (128) + (64 * row_max);
}
/** @brief minimal size of the tmp_space */
EXPORT uint64_t fft64_vmp_apply_dft_to_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
const uint64_t row_max = nrows < a_size ? nrows : a_size;
return (128) + (64 * row_max);
}
EXPORT void vmp_apply_dft_to_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, const uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
const VMP_PMAT* pmat, const uint64_t nrows,
const uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
) {
module->func.vmp_apply_dft_to_dft(module, res, res_size, a_dft, a_size, pmat, nrows, ncols, tmp_space);
}
EXPORT uint64_t vmp_apply_dft_to_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
return module->func.vmp_apply_dft_to_dft_tmp_bytes(module, res_size, a_size, nrows, ncols);
}
/** @brief applies a vmp product (result in DFT space) */
EXPORT void vmp_apply_dft(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
) {
module->func.vmp_apply_dft(module, res, res_size, a, a_size, a_sl, pmat, nrows, ncols, tmp_space);
}
/** @brief minimal size of the tmp_space */
EXPORT uint64_t vmp_apply_dft_tmp_bytes(const MODULE* module, // N
uint64_t res_size, // res
uint64_t a_size, // a
uint64_t nrows, uint64_t ncols // prep matrix
) {
return module->func.vmp_apply_dft_tmp_bytes(module, res_size, a_size, nrows, ncols);
}

View File

@@ -1,137 +0,0 @@
#include <string.h>
#include "../reim4/reim4_arithmetic.h"
#include "vec_znx_arithmetic_private.h"
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void fft64_vmp_prepare_contiguous_avx(const MODULE* module, // N
VMP_PMAT* pmat, // output
const int64_t* mat, uint64_t nrows, uint64_t ncols, // a
uint8_t* tmp_space // scratch space
) {
// there is an edge case if nn < 8
const uint64_t nn = module->nn;
const uint64_t m = module->m;
double* output_mat = (double*)pmat;
double* start_addr = (double*)pmat;
uint64_t offset = nrows * ncols * 8;
if (nn >= 8) {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
reim_from_znx64(module->mod.fft64.p_conv, (SVP_PPOL*)tmp_space, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->mod.fft64.p_fft, (double*)tmp_space);
if (col_i == (ncols - 1) && (ncols % 2 == 1)) {
// special case: last column out of an odd column number
start_addr = output_mat + col_i * nrows * 8 // col == ncols-1
+ row_i * 8;
} else {
// general case: columns go by pair
start_addr = output_mat + (col_i / 2) * (2 * nrows) * 8 // second: col pair index
+ row_i * 2 * 8 // third: row index
+ (col_i % 2) * 8;
}
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
// extract blk from tmp and save it
reim4_extract_1blk_from_reim_avx(m, blk_i, start_addr + blk_i * offset, (double*)tmp_space);
}
}
}
} else {
for (uint64_t row_i = 0; row_i < nrows; row_i++) {
for (uint64_t col_i = 0; col_i < ncols; col_i++) {
double* res = (double*)pmat + (col_i * nrows + row_i) * nn;
reim_from_znx64(module->mod.fft64.p_conv, (SVP_PPOL*)res, mat + (row_i * ncols + col_i) * nn);
reim_fft(module->mod.fft64.p_fft, res);
}
}
}
}
/** @brief applies a vmp product (result in DFT space) */
EXPORT void fft64_vmp_apply_dft_avx(const MODULE* module, // N
VEC_ZNX_DFT* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size, uint64_t a_sl, // a
const VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space
) {
const uint64_t nn = module->nn;
const uint64_t rows = nrows < a_size ? nrows : a_size;
VEC_ZNX_DFT* a_dft = (VEC_ZNX_DFT*)tmp_space;
uint8_t* new_tmp_space = (uint8_t*)tmp_space + rows * nn * sizeof(double);
fft64_vec_znx_dft(module, a_dft, rows, a, a_size, a_sl);
fft64_vmp_apply_dft_to_dft_avx(module, res, res_size, a_dft, a_size, pmat, nrows, ncols, new_tmp_space);
}
/** @brief this inner function could be very handy */
EXPORT void fft64_vmp_apply_dft_to_dft_avx(const MODULE* module, // N
VEC_ZNX_DFT* res, const uint64_t res_size, // res
const VEC_ZNX_DFT* a_dft, uint64_t a_size, // a
const VMP_PMAT* pmat, const uint64_t nrows,
const uint64_t ncols, // prep matrix
uint8_t* tmp_space // scratch space (a_size*sizeof(reim4) bytes)
) {
const uint64_t m = module->m;
const uint64_t nn = module->nn;
double* mat2cols_output = (double*)tmp_space; // 128 bytes
double* extracted_blk = (double*)tmp_space + 16; // 64*min(nrows,a_size) bytes
double* mat_input = (double*)pmat;
double* vec_input = (double*)a_dft;
double* vec_output = (double*)res;
const uint64_t row_max = nrows < a_size ? nrows : a_size;
const uint64_t col_max = ncols < res_size ? ncols : res_size;
if (nn >= 8) {
for (uint64_t blk_i = 0; blk_i < m / 4; blk_i++) {
double* mat_blk_start = mat_input + blk_i * (8 * nrows * ncols);
reim4_extract_1blk_from_contiguous_reim_avx(m, row_max, blk_i, (double*)extracted_blk, (double*)a_dft);
// apply mat2cols
for (uint64_t col_i = 0; col_i < col_max - 1; col_i += 2) {
uint64_t col_offset = col_i * (8 * nrows);
reim4_vec_mat2cols_product_avx2(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
reim4_save_1blk_to_reim_avx(m, blk_i, vec_output + col_i * nn, mat2cols_output);
reim4_save_1blk_to_reim_avx(m, blk_i, vec_output + (col_i + 1) * nn, mat2cols_output + 8);
}
// check if col_max is odd, then special case
if (col_max % 2 == 1) {
uint64_t last_col = col_max - 1;
uint64_t col_offset = last_col * (8 * nrows);
// the last column is alone in the pmat: vec_mat1col
if (ncols == col_max)
reim4_vec_mat1col_product_avx2(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
else {
// the last column is part of a colpair in the pmat: vec_mat2cols and ignore the second position
reim4_vec_mat2cols_product_avx2(row_max, mat2cols_output, extracted_blk, mat_blk_start + col_offset);
}
reim4_save_1blk_to_reim_avx(m, blk_i, vec_output + last_col * nn, mat2cols_output);
}
}
} else {
for (uint64_t col_i = 0; col_i < col_max; col_i++) {
double* pmat_col = mat_input + col_i * nrows * nn;
for (uint64_t row_i = 0; row_i < 1; row_i++) {
reim_fftvec_mul(module->mod.fft64.mul_fft, vec_output + col_i * nn, vec_input + row_i * nn,
pmat_col + row_i * nn);
}
for (uint64_t row_i = 1; row_i < row_max; row_i++) {
reim_fftvec_addmul(module->mod.fft64.p_addmul, vec_output + col_i * nn, vec_input + row_i * nn,
pmat_col + row_i * nn);
}
}
}
// zero out remaining bytes
memset(vec_output + col_max * nn, 0, (res_size - col_max) * nn * sizeof(double));
}

View File

@@ -1,169 +0,0 @@
#include <string.h>
#include "zn_arithmetic_private.h"
void default_init_z_module_precomp(MOD_Z* module) {
// Add here initialization of items that are in the precomp
}
void default_finalize_z_module_precomp(MOD_Z* module) {
// Add here deleters for items that are in the precomp
}
void default_init_z_module_vtable(MOD_Z* module) {
// Add function pointers here
module->vtable.i8_approxdecomp_from_tndbl = default_i8_approxdecomp_from_tndbl_ref;
module->vtable.i16_approxdecomp_from_tndbl = default_i16_approxdecomp_from_tndbl_ref;
module->vtable.i32_approxdecomp_from_tndbl = default_i32_approxdecomp_from_tndbl_ref;
module->vtable.zn32_vmp_prepare_contiguous = default_zn32_vmp_prepare_contiguous_ref;
module->vtable.zn32_vmp_apply_i8 = default_zn32_vmp_apply_i8_ref;
module->vtable.zn32_vmp_apply_i16 = default_zn32_vmp_apply_i16_ref;
module->vtable.zn32_vmp_apply_i32 = default_zn32_vmp_apply_i32_ref;
module->vtable.dbl_to_tn32 = dbl_to_tn32_ref;
module->vtable.tn32_to_dbl = tn32_to_dbl_ref;
module->vtable.dbl_round_to_i32 = dbl_round_to_i32_ref;
module->vtable.i32_to_dbl = i32_to_dbl_ref;
module->vtable.dbl_round_to_i64 = dbl_round_to_i64_ref;
module->vtable.i64_to_dbl = i64_to_dbl_ref;
// Add optimized function pointers here
if (CPU_SUPPORTS("avx")) {
module->vtable.zn32_vmp_apply_i8 = default_zn32_vmp_apply_i8_avx;
module->vtable.zn32_vmp_apply_i16 = default_zn32_vmp_apply_i16_avx;
module->vtable.zn32_vmp_apply_i32 = default_zn32_vmp_apply_i32_avx;
}
}
void init_z_module_info(MOD_Z* module, //
Z_MODULE_TYPE mtype) {
memset(module, 0, sizeof(MOD_Z));
module->mtype = mtype;
switch (mtype) {
case DEFAULT:
default_init_z_module_precomp(module);
default_init_z_module_vtable(module);
break;
default:
NOT_SUPPORTED(); // unknown mtype
}
}
void finalize_z_module_info(MOD_Z* module) {
if (module->custom) module->custom_deleter(module->custom);
switch (module->mtype) {
case DEFAULT:
default_finalize_z_module_precomp(module);
// fft64_finalize_rnx_module_vtable(module); // nothing to finalize
break;
default:
NOT_SUPPORTED(); // unknown mtype
}
}
EXPORT MOD_Z* new_z_module_info(Z_MODULE_TYPE mtype) {
MOD_Z* res = (MOD_Z*)malloc(sizeof(MOD_Z));
init_z_module_info(res, mtype);
return res;
}
EXPORT void delete_z_module_info(MOD_Z* module_info) {
finalize_z_module_info(module_info);
free(module_info);
}
//////////////// wrappers //////////////////
/** @brief sets res = gadget_decompose(a) (int8_t* output) */
EXPORT void i8_approxdecomp_from_tndbl(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int8_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size) { // a
module->vtable.i8_approxdecomp_from_tndbl(module, gadget, res, res_size, a, a_size);
}
/** @brief sets res = gadget_decompose(a) (int16_t* output) */
EXPORT void i16_approxdecomp_from_tndbl(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int16_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size) { // a
module->vtable.i16_approxdecomp_from_tndbl(module, gadget, res, res_size, a, a_size);
}
/** @brief sets res = gadget_decompose(a) (int32_t* output) */
EXPORT void i32_approxdecomp_from_tndbl(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int32_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size) { // a
module->vtable.i32_approxdecomp_from_tndbl(module, gadget, res, res_size, a, a_size);
}
EXPORT void zn32_vmp_prepare_contiguous( //
const MOD_Z* module,
ZN32_VMP_PMAT* pmat, // output
const int32_t* mat, uint64_t nrows, uint64_t ncols) { // a
module->vtable.zn32_vmp_prepare_contiguous(module, pmat, mat, nrows, ncols);
}
/** @brief applies a vmp product (int32_t* input) */
EXPORT void zn32_vmp_apply_i32(const MOD_Z* module, int32_t* res, uint64_t res_size, const int32_t* a, uint64_t a_size,
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols) {
module->vtable.zn32_vmp_apply_i32(module, res, res_size, a, a_size, pmat, nrows, ncols);
}
/** @brief applies a vmp product (int16_t* input) */
EXPORT void zn32_vmp_apply_i16(const MOD_Z* module, int32_t* res, uint64_t res_size, const int16_t* a, uint64_t a_size,
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols) {
module->vtable.zn32_vmp_apply_i16(module, res, res_size, a, a_size, pmat, nrows, ncols);
}
/** @brief applies a vmp product (int8_t* input) */
EXPORT void zn32_vmp_apply_i8(const MOD_Z* module, int32_t* res, uint64_t res_size, const int8_t* a, uint64_t a_size,
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols) {
module->vtable.zn32_vmp_apply_i8(module, res, res_size, a, a_size, pmat, nrows, ncols);
}
/** reduction mod 1, output in torus32 space */
EXPORT void dbl_to_tn32(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
module->vtable.dbl_to_tn32(module, res, res_size, a, a_size);
}
/** real centerlift mod 1, output in double space */
EXPORT void tn32_to_dbl(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
) {
module->vtable.tn32_to_dbl(module, res, res_size, a, a_size);
}
/** round to the nearest int, output in i32 space */
EXPORT void dbl_round_to_i32(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
module->vtable.dbl_round_to_i32(module, res, res_size, a, a_size);
}
/** small int (int32 space) to double */
EXPORT void i32_to_dbl(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
) {
module->vtable.i32_to_dbl(module, res, res_size, a, a_size);
}
/** round to the nearest int, output in int64 space */
EXPORT void dbl_round_to_i64(const MOD_Z* module, //
int64_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
module->vtable.dbl_round_to_i64(module, res, res_size, a, a_size);
}
/** small int (int64 space, <= 2^50) to double */
EXPORT void i64_to_dbl(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size // a
) {
module->vtable.i64_to_dbl(module, res, res_size, a, a_size);
}

View File

@@ -1,81 +0,0 @@
#include <memory.h>
#include "zn_arithmetic_private.h"
EXPORT TNDBL_APPROXDECOMP_GADGET* new_tndbl_approxdecomp_gadget(const MOD_Z* module, //
uint64_t k, uint64_t ell) {
if (k * ell > 50) {
return spqlios_error("approx decomposition requested is too precise for doubles");
}
if (k < 1) {
return spqlios_error("approx decomposition supports k>=1");
}
TNDBL_APPROXDECOMP_GADGET* res = malloc(sizeof(TNDBL_APPROXDECOMP_GADGET));
memset(res, 0, sizeof(TNDBL_APPROXDECOMP_GADGET));
res->k = k;
res->ell = ell;
double add_cst = INT64_C(3) << (51 - k * ell);
for (uint64_t i = 0; i < ell; ++i) {
add_cst += pow(2., -(double)(i * k + 1));
}
res->add_cst = add_cst;
res->and_mask = (UINT64_C(1) << k) - 1;
res->sub_cst = UINT64_C(1) << (k - 1);
for (uint64_t i = 0; i < ell; ++i) res->rshifts[i] = (ell - 1 - i) * k;
return res;
}
EXPORT void delete_tndbl_approxdecomp_gadget(TNDBL_APPROXDECOMP_GADGET* ptr) { free(ptr); }
EXPORT int default_init_tndbl_approxdecomp_gadget(const MOD_Z* module, //
TNDBL_APPROXDECOMP_GADGET* res, //
uint64_t k, uint64_t ell) {
return 0;
}
typedef union {
double dv;
uint64_t uv;
} du_t;
#define IMPL_ixx_approxdecomp_from_tndbl_ref(ITYPE) \
if (res_size != a_size * gadget->ell) NOT_IMPLEMENTED(); \
const uint64_t ell = gadget->ell; \
const double add_cst = gadget->add_cst; \
const uint8_t* const rshifts = gadget->rshifts; \
const ITYPE and_mask = gadget->and_mask; \
const ITYPE sub_cst = gadget->sub_cst; \
ITYPE* rr = res; \
const double* aa = a; \
const double* aaend = a + a_size; \
while (aa < aaend) { \
du_t t = {.dv = *aa + add_cst}; \
for (uint64_t i = 0; i < ell; ++i) { \
ITYPE v = (ITYPE)(t.uv >> rshifts[i]); \
*rr = (v & and_mask) - sub_cst; \
++rr; \
} \
++aa; \
}
/** @brief sets res = gadget_decompose(a) (int8_t* output) */
EXPORT void default_i8_approxdecomp_from_tndbl_ref(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int8_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size //
){IMPL_ixx_approxdecomp_from_tndbl_ref(int8_t)}
/** @brief sets res = gadget_decompose(a) (int16_t* output) */
EXPORT void default_i16_approxdecomp_from_tndbl_ref(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int16_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
){IMPL_ixx_approxdecomp_from_tndbl_ref(int16_t)}
/** @brief sets res = gadget_decompose(a) (int32_t* output) */
EXPORT void default_i32_approxdecomp_from_tndbl_ref(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
IMPL_ixx_approxdecomp_from_tndbl_ref(int32_t)
}

View File

@@ -1,135 +0,0 @@
#ifndef SPQLIOS_ZN_ARITHMETIC_H
#define SPQLIOS_ZN_ARITHMETIC_H
#include <stdint.h>
#include "../commons.h"
typedef enum z_module_type_t { DEFAULT } Z_MODULE_TYPE;
/** @brief opaque structure that describes the module and the hardware */
typedef struct z_module_info_t MOD_Z;
/**
* @brief obtain a module info for ring dimension N
* the module-info knows about:
* - the dimension N (or the complex dimension m=N/2)
* - any moduleuted fft or ntt items
* - the hardware (avx, arm64, x86, ...)
*/
EXPORT MOD_Z* new_z_module_info(Z_MODULE_TYPE mode);
EXPORT void delete_z_module_info(MOD_Z* module_info);
typedef struct tndbl_approxdecomp_gadget_t TNDBL_APPROXDECOMP_GADGET;
EXPORT TNDBL_APPROXDECOMP_GADGET* new_tndbl_approxdecomp_gadget(const MOD_Z* module, //
uint64_t k,
uint64_t ell); // base 2^k, and size
EXPORT void delete_tndbl_approxdecomp_gadget(TNDBL_APPROXDECOMP_GADGET* ptr);
/** @brief sets res = gadget_decompose(a) (int8_t* output) */
EXPORT void i8_approxdecomp_from_tndbl(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int8_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size); // a
/** @brief sets res = gadget_decompose(a) (int16_t* output) */
EXPORT void i16_approxdecomp_from_tndbl(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int16_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size); // a
/** @brief sets res = gadget_decompose(a) (int32_t* output) */
EXPORT void i32_approxdecomp_from_tndbl(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int32_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size); // a
/** @brief opaque type that represents a prepared matrix */
typedef struct zn32_vmp_pmat_t ZN32_VMP_PMAT;
/** @brief size in bytes of a prepared matrix (for custom allocation) */
EXPORT uint64_t bytes_of_zn32_vmp_pmat(const MOD_Z* module, // N
uint64_t nrows, uint64_t ncols); // dimensions
/** @brief allocates a prepared matrix (release with delete_zn32_vmp_pmat) */
EXPORT ZN32_VMP_PMAT* new_zn32_vmp_pmat(const MOD_Z* module, // N
uint64_t nrows, uint64_t ncols); // dimensions
/** @brief deletes a prepared matrix (release with free) */
EXPORT void delete_zn32_vmp_pmat(ZN32_VMP_PMAT* ptr); // dimensions
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void zn32_vmp_prepare_contiguous( //
const MOD_Z* module,
ZN32_VMP_PMAT* pmat, // output
const int32_t* mat, uint64_t nrows, uint64_t ncols); // a
/** @brief applies a vmp product (int32_t* input) */
EXPORT void zn32_vmp_apply_i32( //
const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int16_t* input) */
EXPORT void zn32_vmp_apply_i16( //
const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const int16_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int8_t* input) */
EXPORT void zn32_vmp_apply_i8( //
const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const int8_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
// explicit conversions
/** reduction mod 1, output in torus32 space */
EXPORT void dbl_to_tn32(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
);
/** real centerlift mod 1, output in double space */
EXPORT void tn32_to_dbl(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
);
/** round to the nearest int, output in i32 space.
* WARNING: ||a||_inf must be <= 2^18 in this function
*/
EXPORT void dbl_round_to_i32(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
);
/** small int (int32 space) to double
* WARNING: ||a||_inf must be <= 2^18 in this function
*/
EXPORT void i32_to_dbl(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
);
/** round to the nearest int, output in int64 space
* WARNING: ||a||_inf must be <= 2^50 in this function
*/
EXPORT void dbl_round_to_i64(const MOD_Z* module, //
int64_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
);
/** small int (int64 space, <= 2^50) to double
* WARNING: ||a||_inf must be <= 2^50 in this function
*/
EXPORT void i64_to_dbl(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size // a
);
#endif // SPQLIOS_ZN_ARITHMETIC_H

View File

@@ -1,39 +0,0 @@
#ifndef SPQLIOS_ZN_ARITHMETIC_PLUGIN_H
#define SPQLIOS_ZN_ARITHMETIC_PLUGIN_H
#include "zn_arithmetic.h"
typedef typeof(i8_approxdecomp_from_tndbl) I8_APPROXDECOMP_FROM_TNDBL_F;
typedef typeof(i16_approxdecomp_from_tndbl) I16_APPROXDECOMP_FROM_TNDBL_F;
typedef typeof(i32_approxdecomp_from_tndbl) I32_APPROXDECOMP_FROM_TNDBL_F;
typedef typeof(bytes_of_zn32_vmp_pmat) BYTES_OF_ZN32_VMP_PMAT_F;
typedef typeof(zn32_vmp_prepare_contiguous) ZN32_VMP_PREPARE_CONTIGUOUS_F;
typedef typeof(zn32_vmp_apply_i32) ZN32_VMP_APPLY_I32_F;
typedef typeof(zn32_vmp_apply_i16) ZN32_VMP_APPLY_I16_F;
typedef typeof(zn32_vmp_apply_i8) ZN32_VMP_APPLY_I8_F;
typedef typeof(dbl_to_tn32) DBL_TO_TN32_F;
typedef typeof(tn32_to_dbl) TN32_TO_DBL_F;
typedef typeof(dbl_round_to_i32) DBL_ROUND_TO_I32_F;
typedef typeof(i32_to_dbl) I32_TO_DBL_F;
typedef typeof(dbl_round_to_i64) DBL_ROUND_TO_I64_F;
typedef typeof(i64_to_dbl) I64_TO_DBL_F;
typedef struct z_module_vtable_t Z_MODULE_VTABLE;
struct z_module_vtable_t {
I8_APPROXDECOMP_FROM_TNDBL_F* i8_approxdecomp_from_tndbl;
I16_APPROXDECOMP_FROM_TNDBL_F* i16_approxdecomp_from_tndbl;
I32_APPROXDECOMP_FROM_TNDBL_F* i32_approxdecomp_from_tndbl;
BYTES_OF_ZN32_VMP_PMAT_F* bytes_of_zn32_vmp_pmat;
ZN32_VMP_PREPARE_CONTIGUOUS_F* zn32_vmp_prepare_contiguous;
ZN32_VMP_APPLY_I32_F* zn32_vmp_apply_i32;
ZN32_VMP_APPLY_I16_F* zn32_vmp_apply_i16;
ZN32_VMP_APPLY_I8_F* zn32_vmp_apply_i8;
DBL_TO_TN32_F* dbl_to_tn32;
TN32_TO_DBL_F* tn32_to_dbl;
DBL_ROUND_TO_I32_F* dbl_round_to_i32;
I32_TO_DBL_F* i32_to_dbl;
DBL_ROUND_TO_I64_F* dbl_round_to_i64;
I64_TO_DBL_F* i64_to_dbl;
};
#endif // SPQLIOS_ZN_ARITHMETIC_PLUGIN_H

View File

@@ -1,150 +0,0 @@
#ifndef SPQLIOS_ZN_ARITHMETIC_PRIVATE_H
#define SPQLIOS_ZN_ARITHMETIC_PRIVATE_H
#include "../commons_private.h"
#include "zn_arithmetic.h"
#include "zn_arithmetic_plugin.h"
typedef struct main_z_module_precomp_t MAIN_Z_MODULE_PRECOMP;
struct main_z_module_precomp_t {
// TODO
};
typedef union z_module_precomp_t Z_MODULE_PRECOMP;
union z_module_precomp_t {
MAIN_Z_MODULE_PRECOMP main;
};
void main_init_z_module_precomp(MOD_Z* module);
void main_finalize_z_module_precomp(MOD_Z* module);
/** @brief opaque structure that describes the modules (RnX,ZnX,TnX) and the hardware */
struct z_module_info_t {
Z_MODULE_TYPE mtype;
Z_MODULE_VTABLE vtable;
Z_MODULE_PRECOMP precomp;
void* custom;
void (*custom_deleter)(void*);
};
void init_z_module_info(MOD_Z* module, Z_MODULE_TYPE mtype);
void main_init_z_module_vtable(MOD_Z* module);
struct tndbl_approxdecomp_gadget_t {
uint64_t k;
uint64_t ell;
double add_cst; // 3.2^51-(K.ell) + 1/2.(sum 2^-(i+1)K)
int64_t and_mask; // (2^K)-1
int64_t sub_cst; // 2^(K-1)
uint8_t rshifts[64]; // 2^(ell-1-i).K for i in [0:ell-1]
};
/** @brief sets res = gadget_decompose(a) (int8_t* output) */
EXPORT void default_i8_approxdecomp_from_tndbl_ref(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int8_t* res, uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size); // a
/** @brief sets res = gadget_decompose(a) (int16_t* output) */
EXPORT void default_i16_approxdecomp_from_tndbl_ref(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int16_t* res,
uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size); // a
/** @brief sets res = gadget_decompose(a) (int32_t* output) */
EXPORT void default_i32_approxdecomp_from_tndbl_ref(const MOD_Z* module, // N
const TNDBL_APPROXDECOMP_GADGET* gadget, // gadget
int32_t* res,
uint64_t res_size, // res (in general, size ell.a_size)
const double* a, uint64_t a_size); // a
/** @brief prepares a vmp matrix (contiguous row-major version) */
EXPORT void default_zn32_vmp_prepare_contiguous_ref( //
const MOD_Z* module,
ZN32_VMP_PMAT* pmat, // output
const int32_t* mat, uint64_t nrows, uint64_t ncols // a
);
/** @brief applies a vmp product (int32_t* input) */
EXPORT void default_zn32_vmp_apply_i32_ref( //
const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int16_t* input) */
EXPORT void default_zn32_vmp_apply_i16_ref( //
const MOD_Z* module, // N
int32_t* res, uint64_t res_size, // res
const int16_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int8_t* input) */
EXPORT void default_zn32_vmp_apply_i8_ref( //
const MOD_Z* module, // N
int32_t* res, uint64_t res_size, // res
const int8_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int32_t* input) */
EXPORT void default_zn32_vmp_apply_i32_avx( //
const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int16_t* input) */
EXPORT void default_zn32_vmp_apply_i16_avx( //
const MOD_Z* module, // N
int32_t* res, uint64_t res_size, // res
const int16_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
/** @brief applies a vmp product (int8_t* input) */
EXPORT void default_zn32_vmp_apply_i8_avx( //
const MOD_Z* module, // N
int32_t* res, uint64_t res_size, // res
const int8_t* a, uint64_t a_size, // a
const ZN32_VMP_PMAT* pmat, uint64_t nrows, uint64_t ncols); // prep matrix
// explicit conversions
/** reduction mod 1, output in torus32 space */
EXPORT void dbl_to_tn32_ref(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
);
/** real centerlift mod 1, output in double space */
EXPORT void tn32_to_dbl_ref(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
);
/** round to the nearest int, output in i32 space */
EXPORT void dbl_round_to_i32_ref(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
);
/** small int (int32 space) to double */
EXPORT void i32_to_dbl_ref(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
);
/** round to the nearest int, output in int64 space */
EXPORT void dbl_round_to_i64_ref(const MOD_Z* module, //
int64_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
);
/** small int (int64 space) to double */
EXPORT void i64_to_dbl_ref(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size // a
);
#endif // SPQLIOS_ZN_ARITHMETIC_PRIVATE_H

View File

@@ -1,108 +0,0 @@
#include <memory.h>
#include "zn_arithmetic_private.h"
typedef union {
double dv;
int64_t s64v;
int32_t s32v;
uint64_t u64v;
uint32_t u32v;
} di_t;
/** reduction mod 1, output in torus32 space */
EXPORT void dbl_to_tn32_ref(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
static const double ADD_CST = 0.5 + (double)(INT64_C(3) << (51 - 32));
static const int32_t XOR_CST = (INT32_C(1) << 31);
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
di_t t = {.dv = a[i] + ADD_CST};
res[i] = t.s32v ^ XOR_CST;
}
memset(res + msize, 0, (res_size - msize) * sizeof(int32_t));
}
/** real centerlift mod 1, output in double space */
EXPORT void tn32_to_dbl_ref(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
) {
static const uint32_t XOR_CST = (UINT32_C(1) << 31);
static const di_t OR_CST = {.dv = (double)(INT64_C(1) << (52 - 32))};
static const double SUB_CST = 0.5 + (double)(INT64_C(1) << (52 - 32));
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
uint32_t ai = a[i] ^ XOR_CST;
di_t t = {.u64v = OR_CST.u64v | (uint64_t)ai};
res[i] = t.dv - SUB_CST;
}
memset(res + msize, 0, (res_size - msize) * sizeof(double));
}
/** round to the nearest int, output in i32 space */
EXPORT void dbl_round_to_i32_ref(const MOD_Z* module, //
int32_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
static const double ADD_CST = (double)((INT64_C(3) << (51)) + (INT64_C(1) << (31)));
static const int32_t XOR_CST = INT32_C(1) << 31;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
di_t t = {.dv = a[i] + ADD_CST};
res[i] = t.s32v ^ XOR_CST;
}
memset(res + msize, 0, (res_size - msize) * sizeof(int32_t));
}
/** small int (int32 space) to double */
EXPORT void i32_to_dbl_ref(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int32_t* a, uint64_t a_size // a
) {
static const uint32_t XOR_CST = (UINT32_C(1) << 31);
static const di_t OR_CST = {.dv = (double)(INT64_C(1) << 52)};
static const double SUB_CST = (double)((INT64_C(1) << 52) + (INT64_C(1) << 31));
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
uint32_t ai = a[i] ^ XOR_CST;
di_t t = {.u64v = OR_CST.u64v | (uint64_t)ai};
res[i] = t.dv - SUB_CST;
}
memset(res + msize, 0, (res_size - msize) * sizeof(double));
}
/** round to the nearest int, output in int64 space */
EXPORT void dbl_round_to_i64_ref(const MOD_Z* module, //
int64_t* res, uint64_t res_size, // res
const double* a, uint64_t a_size // a
) {
static const double ADD_CST = (double)(INT64_C(3) << (51));
static const int64_t AND_CST = (INT64_C(1) << 52) - 1;
static const int64_t SUB_CST = INT64_C(1) << 51;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
di_t t = {.dv = a[i] + ADD_CST};
res[i] = (t.s64v & AND_CST) - SUB_CST;
}
memset(res + msize, 0, (res_size - msize) * sizeof(int64_t));
}
/** small int (int64 space) to double */
EXPORT void i64_to_dbl_ref(const MOD_Z* module, //
double* res, uint64_t res_size, // res
const int64_t* a, uint64_t a_size // a
) {
static const uint64_t ADD_CST = UINT64_C(1) << 51;
static const uint64_t AND_CST = (UINT64_C(1) << 52) - 1;
static const di_t OR_CST = {.dv = (INT64_C(1) << 52)};
static const double SUB_CST = INT64_C(3) << 51;
const uint64_t msize = res_size < a_size ? res_size : a_size;
for (uint64_t i = 0; i < msize; ++i) {
di_t t = {.u64v = ((a[i] + ADD_CST) & AND_CST) | OR_CST.u64v};
res[i] = t.dv - SUB_CST;
}
memset(res + msize, 0, (res_size - msize) * sizeof(double));
}

View File

@@ -1,4 +0,0 @@
#define INTTYPE int16_t
#define INTSN i16
#include "zn_vmp_int32_avx.c"

Some files were not shown because too many files have changed in this diff Show More