mirror of
https://github.com/arnaucube/poulpy.git
synced 2026-02-10 13:16:44 +01:00
478 lines
17 KiB
C++
478 lines
17 KiB
C++
#include <inttypes.h>
|
|
|
|
#include <cmath>
|
|
|
|
#include "gtest/gtest.h"
|
|
#include "spqlios/commons_private.h"
|
|
#include "spqlios/cplx/cplx_fft_internal.h"
|
|
#include "spqlios/reim/reim_fft_internal.h"
|
|
#include "spqlios/reim/reim_fft_private.h"
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_fft_avx2_vs_fft_reim_ref) {
|
|
for (uint64_t nn : {16, 32, 64, 1024, 8192, 65536}) {
|
|
uint64_t m = nn / 2;
|
|
// CPLX_FFT_PRECOMP* tables = new_cplx_fft_precomp(m, 0);
|
|
REIM_FFT_PRECOMP* reimtables = new_reim_fft_precomp(m, 0);
|
|
CPLX* a = (CPLX*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* a1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* a2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
int64_t p = 1 << 16;
|
|
for (uint32_t i = 0; i < nn / 2; i++) {
|
|
a[i][0] = (rand() % p) - p / 2; // between -p/2 and p/2
|
|
a[i][1] = (rand() % p) - p / 2;
|
|
}
|
|
memcpy(a1, a, nn / 2 * sizeof(CPLX));
|
|
memcpy(a2, a, nn / 2 * sizeof(CPLX));
|
|
reim_fft_ref(reimtables, a2);
|
|
reim_fft_avx2_fma(reimtables, a1);
|
|
double d = 0;
|
|
for (uint32_t i = 0; i < nn / 2; i++) {
|
|
double dre = fabs(a1[i] - a2[i]);
|
|
double dim = fabs(a1[nn / 2 + i] - a2[nn / 2 + i]);
|
|
if (dre > d) d = dre;
|
|
if (dim > d) d = dim;
|
|
ASSERT_LE(d, nn * 1e-10) << nn;
|
|
}
|
|
ASSERT_LE(d, nn * 1e-10) << nn;
|
|
spqlios_free(a);
|
|
spqlios_free(a1);
|
|
spqlios_free(a2);
|
|
// delete_cplx_fft_precomp(tables);
|
|
delete_reim_fft_precomp(reimtables);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_ifft_avx2_vs_reim_ifft_ref) {
|
|
for (uint64_t nn : {16, 32, 64, 1024, 8192, 65536}) {
|
|
uint64_t m = nn / 2;
|
|
// CPLX_FFT_PRECOMP* tables = new_cplx_fft_precomp(m, 0);
|
|
REIM_IFFT_PRECOMP* reimtables = new_reim_ifft_precomp(m, 0);
|
|
CPLX* a = (CPLX*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* a1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* a2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
int64_t p = 1 << 16;
|
|
for (uint32_t i = 0; i < nn / 2; i++) {
|
|
a[i][0] = (rand() % p) - p / 2; // between -p/2 and p/2
|
|
a[i][1] = (rand() % p) - p / 2;
|
|
}
|
|
memcpy(a1, a, nn / 2 * sizeof(CPLX));
|
|
memcpy(a2, a, nn / 2 * sizeof(CPLX));
|
|
reim_ifft_ref(reimtables, a2);
|
|
reim_ifft_avx2_fma(reimtables, a1);
|
|
double d = 0;
|
|
for (uint32_t i = 0; i < nn / 2; i++) {
|
|
double dre = fabs(a1[i] - a2[i]);
|
|
double dim = fabs(a1[nn / 2 + i] - a2[nn / 2 + i]);
|
|
if (dre > d) d = dre;
|
|
if (dim > d) d = dim;
|
|
ASSERT_LE(d, 1e-8);
|
|
}
|
|
ASSERT_LE(d, 1e-8);
|
|
spqlios_free(a);
|
|
spqlios_free(a1);
|
|
spqlios_free(a2);
|
|
// delete_cplx_fft_precomp(tables);
|
|
delete_reim_fft_precomp(reimtables);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_vecfft_addmul_fma_vs_ref) {
|
|
for (uint64_t nn : {16, 32, 64, 1024, 8192, 65536}) {
|
|
uint64_t m = nn / 2;
|
|
REIM_FFTVEC_ADDMUL_PRECOMP* tbl = new_reim_fftvec_addmul_precomp(m);
|
|
ASSERT_TRUE(tbl != nullptr);
|
|
double* a1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* a2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* b1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* b2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* r1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* r2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
int64_t p = 1 << 16;
|
|
for (uint32_t i = 0; i < nn; i++) {
|
|
a1[i] = (rand() % p) - p / 2; // between -p/2 and p/2
|
|
b1[i] = (rand() % p) - p / 2;
|
|
r1[i] = (rand() % p) - p / 2;
|
|
}
|
|
memcpy(a2, a1, nn / 2 * sizeof(CPLX));
|
|
memcpy(b2, b1, nn / 2 * sizeof(CPLX));
|
|
memcpy(r2, r1, nn / 2 * sizeof(CPLX));
|
|
reim_fftvec_addmul_ref(tbl, r1, a1, b1);
|
|
reim_fftvec_addmul_fma(tbl, r2, a2, b2);
|
|
double d = 0;
|
|
for (uint32_t i = 0; i < nn; i++) {
|
|
double di = fabs(r1[i] - r2[i]);
|
|
if (di > d) d = di;
|
|
ASSERT_LE(d, 1e-8);
|
|
}
|
|
ASSERT_LE(d, 1e-8);
|
|
spqlios_free(a1);
|
|
spqlios_free(a2);
|
|
spqlios_free(b1);
|
|
spqlios_free(b2);
|
|
spqlios_free(r1);
|
|
spqlios_free(r2);
|
|
delete_reim_fftvec_addmul_precomp(tbl);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_vecfft_mul_fma_vs_ref) {
|
|
for (uint64_t nn : {16, 32, 64, 1024, 8192, 65536}) {
|
|
uint64_t m = nn / 2;
|
|
REIM_FFTVEC_MUL_PRECOMP* tbl = new_reim_fftvec_mul_precomp(m);
|
|
double* a1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* a2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* b1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* b2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* r1 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
double* r2 = (double*)spqlios_alloc_custom_align(32, nn / 2 * sizeof(CPLX));
|
|
int64_t p = 1 << 16;
|
|
for (uint32_t i = 0; i < nn; i++) {
|
|
a1[i] = (rand() % p) - p / 2; // between -p/2 and p/2
|
|
b1[i] = (rand() % p) - p / 2;
|
|
r1[i] = (rand() % p) - p / 2;
|
|
}
|
|
memcpy(a2, a1, nn / 2 * sizeof(CPLX));
|
|
memcpy(b2, b1, nn / 2 * sizeof(CPLX));
|
|
memcpy(r2, r1, nn / 2 * sizeof(CPLX));
|
|
reim_fftvec_mul_ref(tbl, r1, a1, b1);
|
|
reim_fftvec_mul_fma(tbl, r2, a2, b2);
|
|
double d = 0;
|
|
for (uint32_t i = 0; i < nn; i++) {
|
|
double di = fabs(r1[i] - r2[i]);
|
|
if (di > d) d = di;
|
|
ASSERT_LE(d, 1e-8);
|
|
}
|
|
ASSERT_LE(d, 1e-8);
|
|
spqlios_free(a1);
|
|
spqlios_free(a2);
|
|
spqlios_free(b1);
|
|
spqlios_free(b2);
|
|
spqlios_free(r1);
|
|
spqlios_free(r2);
|
|
delete_reim_fftvec_mul_precomp(tbl);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
typedef void (*FILL_REIM_FFT_OMG_F)(const double entry_pwr, double** omg);
|
|
typedef void (*REIM_FFT_F)(double* dre, double* dim, const void* omega);
|
|
|
|
// template to test a fixed-dimension fft vs. naive
|
|
template <uint64_t N>
|
|
void test_reim_fft_ref_vs_naive(FILL_REIM_FFT_OMG_F fill_omega_f, REIM_FFT_F reim_fft_f) {
|
|
double om[N];
|
|
double data[2 * N];
|
|
double datacopy[2 * N];
|
|
double* omg = om;
|
|
fill_omega_f(0.25, &omg);
|
|
ASSERT_EQ(omg - om, ptrdiff_t(N)); // it may depend on N
|
|
for (uint64_t i = 0; i < N; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[N + i] = data[N + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_fft_f(datacopy, datacopy + N, om);
|
|
reim_naive_fft(N, 0.25, data, data + N);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * N; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-7);
|
|
}
|
|
|
|
template <uint64_t N>
|
|
void test_reim_fft_ref_vs_accel(REIM_FFT_F reim_fft_ref_f, REIM_FFT_F reim_fft_accel_f) {
|
|
double om[N];
|
|
double data[2 * N];
|
|
double datacopy[2 * N];
|
|
for (uint64_t i = 0; i < N; ++i) {
|
|
om[i] = (rand() % 100) - 50;
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[N + i] = data[N + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_fft_ref_f(datacopy, datacopy + N, om);
|
|
reim_fft_accel_f(data, data + N, om);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * N; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
if (d > 1e-15) {
|
|
for (uint64_t i = 0; i < N; ++i) {
|
|
printf("%" PRId64 " %lf %lf %lf %lf\n", i, data[i], data[N + i], datacopy[i], datacopy[N + i]);
|
|
}
|
|
ASSERT_LE(d, 0);
|
|
}
|
|
}
|
|
|
|
TEST(fft, reim_fft16_ref_vs_naive) { test_reim_fft_ref_vs_naive<16>(fill_reim_fft16_omegas, reim_fft16_ref); }
|
|
#ifdef __aarch64__
|
|
TEST(fft, reim_fft16_neon_vs_naive) { test_reim_fft_ref_vs_naive<16>(fill_reim_fft16_omegas_neon, reim_fft16_neon); }
|
|
#endif
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_fft16_ref_vs_fma) { test_reim_fft_ref_vs_accel<16>(reim_fft16_ref, reim_fft16_avx_fma); }
|
|
#endif
|
|
|
|
#ifdef __aarch64__
|
|
static void reim_fft16_ref_neon_pom(double* dre, double* dim, const void* omega) {
|
|
const double* pom = (double*) omega;
|
|
// put the omegas in neon order
|
|
double x_pom[] = {
|
|
pom[0], pom[1], pom[2], pom[3],
|
|
pom[4],pom[5], pom[6], pom[7],
|
|
pom[8], pom[10],pom[12], pom[14],
|
|
pom[9], pom[11],pom[13], pom[15]
|
|
};
|
|
reim_fft16_ref(dre, dim, x_pom);
|
|
}
|
|
TEST(fft, reim_fft16_ref_vs_neon) { test_reim_fft_ref_vs_accel<16>(reim_fft16_ref_neon_pom, reim_fft16_neon); }
|
|
#endif
|
|
|
|
TEST(fft, reim_fft8_ref_vs_naive) { test_reim_fft_ref_vs_naive<8>(fill_reim_fft8_omegas, reim_fft8_ref); }
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_fft8_ref_vs_fma) { test_reim_fft_ref_vs_accel<8>(reim_fft8_ref, reim_fft8_avx_fma); }
|
|
#endif
|
|
|
|
TEST(fft, reim_fft4_ref_vs_naive) { test_reim_fft_ref_vs_naive<4>(fill_reim_fft4_omegas, reim_fft4_ref); }
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_fft4_ref_vs_fma) { test_reim_fft_ref_vs_accel<4>(reim_fft4_ref, reim_fft4_avx_fma); }
|
|
#endif
|
|
|
|
TEST(fft, reim_fft2_ref_vs_naive) { test_reim_fft_ref_vs_naive<2>(fill_reim_fft2_omegas, reim_fft2_ref); }
|
|
|
|
TEST(fft, reim_fft_bfs_16_ref_vs_naive) {
|
|
for (const uint64_t m : {16, 32, 64, 128, 256, 512, 1024, 2048}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
double* omg = om.data();
|
|
fill_reim_fft_bfs_16_omegas(m, 0.25, &omg);
|
|
ASSERT_LE(omg - om.data(), ptrdiff_t(2 * m)); // it may depend on m
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
omg = om.data();
|
|
reim_fft_bfs_16_ref(m, datacopy.data(), datacopy.data() + m, &omg);
|
|
reim_naive_fft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-7);
|
|
}
|
|
}
|
|
|
|
TEST(fft, reim_fft_rec_16_ref_vs_naive) {
|
|
for (const uint64_t m : {2048, 4096, 8192, 32768, 65536}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
double* omg = om.data();
|
|
fill_reim_fft_rec_16_omegas(m, 0.25, &omg);
|
|
ASSERT_LE(omg - om.data(), ptrdiff_t(2 * m)); // it may depend on m
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
omg = om.data();
|
|
reim_fft_rec_16_ref(m, datacopy.data(), datacopy.data() + m, &omg);
|
|
reim_naive_fft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-5);
|
|
}
|
|
}
|
|
|
|
TEST(fft, reim_fft_ref_vs_naive) {
|
|
for (const uint64_t m : {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 32768, 65536}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
REIM_FFT_PRECOMP* precomp = new_reim_fft_precomp(m, 0);
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_fft_ref(precomp, datacopy.data());
|
|
reim_naive_fft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-5) << m;
|
|
delete_reim_fft_precomp(precomp);
|
|
}
|
|
}
|
|
|
|
#ifdef __aarch64__
|
|
EXPORT REIM_FFT_PRECOMP* new_reim_fft_precomp_neon(uint32_t m, uint32_t num_buffers);
|
|
EXPORT void reim_fft_neon(const REIM_FFT_PRECOMP* precomp, double* d);
|
|
TEST(fft, reim_fft_neon_vs_naive) {
|
|
for (const uint64_t m : {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 32768, 65536}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
REIM_FFT_PRECOMP* precomp = new_reim_fft_precomp_neon(m, 0);
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_fft_neon(precomp, datacopy.data());
|
|
reim_naive_fft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-5) << m;
|
|
delete_reim_fft_precomp(precomp);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
typedef void (*FILL_REIM_IFFT_OMG_F)(const double entry_pwr, double** omg);
|
|
typedef void (*REIM_IFFT_F)(double* dre, double* dim, const void* omega);
|
|
|
|
// template to test a fixed-dimension fft vs. naive
|
|
template <uint64_t N>
|
|
void test_reim_ifft_ref_vs_naive(FILL_REIM_IFFT_OMG_F fill_omega_f, REIM_IFFT_F reim_ifft_f) {
|
|
double om[N];
|
|
double data[2 * N];
|
|
double datacopy[2 * N];
|
|
double* omg = om;
|
|
fill_omega_f(0.25, &omg);
|
|
ASSERT_EQ(omg - om, ptrdiff_t(N)); // it may depend on N
|
|
for (uint64_t i = 0; i < N; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[N + i] = data[N + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_ifft_f(datacopy, datacopy + N, om);
|
|
reim_naive_ifft(N, 0.25, data, data + N);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * N; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-7);
|
|
}
|
|
|
|
template <uint64_t N>
|
|
void test_reim_ifft_ref_vs_accel(REIM_IFFT_F reim_ifft_ref_f, REIM_IFFT_F reim_ifft_accel_f) {
|
|
double om[N];
|
|
double data[2 * N];
|
|
double datacopy[2 * N];
|
|
for (uint64_t i = 0; i < N; ++i) {
|
|
om[i] = (rand() % 100) - 50;
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[N + i] = data[N + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_ifft_ref_f(datacopy, datacopy + N, om);
|
|
reim_ifft_accel_f(data, data + N, om);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * N; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
if (d > 1e-15) {
|
|
for (uint64_t i = 0; i < N; ++i) {
|
|
printf("%" PRId64 " %lf %lf %lf %lf\n", i, data[i], data[N + i], datacopy[i], datacopy[N + i]);
|
|
}
|
|
ASSERT_LE(d, 0);
|
|
}
|
|
}
|
|
|
|
TEST(fft, reim_ifft16_ref_vs_naive) { test_reim_ifft_ref_vs_naive<16>(fill_reim_ifft16_omegas, reim_ifft16_ref); }
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_ifft16_ref_vs_fma) { test_reim_ifft_ref_vs_accel<16>(reim_ifft16_ref, reim_ifft16_avx_fma); }
|
|
#endif
|
|
|
|
TEST(fft, reim_ifft8_ref_vs_naive) { test_reim_ifft_ref_vs_naive<8>(fill_reim_ifft8_omegas, reim_ifft8_ref); }
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_ifft8_ref_vs_fma) { test_reim_ifft_ref_vs_accel<8>(reim_ifft8_ref, reim_ifft8_avx_fma); }
|
|
#endif
|
|
|
|
TEST(fft, reim_ifft4_ref_vs_naive) { test_reim_ifft_ref_vs_naive<4>(fill_reim_ifft4_omegas, reim_ifft4_ref); }
|
|
|
|
#ifdef __x86_64__
|
|
TEST(fft, reim_ifft4_ref_vs_fma) { test_reim_ifft_ref_vs_accel<4>(reim_ifft4_ref, reim_ifft4_avx_fma); }
|
|
#endif
|
|
|
|
TEST(fft, reim_ifft2_ref_vs_naive) { test_reim_ifft_ref_vs_naive<2>(fill_reim_ifft2_omegas, reim_ifft2_ref); }
|
|
|
|
TEST(fft, reim_ifft_bfs_16_ref_vs_naive) {
|
|
for (const uint64_t m : {16, 32, 64, 128, 256, 512, 1024, 2048}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
double* omg = om.data();
|
|
fill_reim_ifft_bfs_16_omegas(m, 0.25, &omg);
|
|
ASSERT_LE(omg - om.data(), ptrdiff_t(2 * m)); // it may depend on m
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
omg = om.data();
|
|
reim_ifft_bfs_16_ref(m, datacopy.data(), datacopy.data() + m, &omg);
|
|
reim_naive_ifft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-7);
|
|
}
|
|
}
|
|
|
|
TEST(fft, reim_ifft_rec_16_ref_vs_naive) {
|
|
for (const uint64_t m : {2048, 4096, 8192, 32768, 65536}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
double* omg = om.data();
|
|
fill_reim_ifft_rec_16_omegas(m, 0.25, &omg);
|
|
ASSERT_LE(omg - om.data(), ptrdiff_t(2 * m)); // it may depend on m
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
omg = om.data();
|
|
reim_ifft_rec_16_ref(m, datacopy.data(), datacopy.data() + m, &omg);
|
|
reim_naive_ifft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-5);
|
|
}
|
|
}
|
|
|
|
TEST(fft, reim_ifft_ref_vs_naive) {
|
|
for (const uint64_t m : {1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 32768, 65536}) {
|
|
std::vector<double> om(2 * m);
|
|
std::vector<double> data(2 * m);
|
|
std::vector<double> datacopy(2 * m);
|
|
REIM_IFFT_PRECOMP* precomp = new_reim_ifft_precomp(m, 0);
|
|
for (uint64_t i = 0; i < m; ++i) {
|
|
datacopy[i] = data[i] = (rand() % 100) - 50;
|
|
datacopy[m + i] = data[m + i] = (rand() % 100) - 50;
|
|
}
|
|
reim_ifft_ref(precomp, datacopy.data());
|
|
reim_naive_ifft(m, 0.25, data.data(), data.data() + m);
|
|
double d = 0;
|
|
for (uint64_t i = 0; i < 2 * m; ++i) {
|
|
d += fabs(datacopy[i] - data[i]);
|
|
}
|
|
ASSERT_LE(d, 1e-5) << m;
|
|
delete_reim_ifft_precomp(precomp);
|
|
}
|
|
}
|