mirror of
https://github.com/arnaucube/miden-crypto.git
synced 2026-01-12 00:51:29 +01:00
583 lines
14 KiB
C
583 lines
14 KiB
C
/*
|
|
* RPO implementation.
|
|
*/
|
|
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
/* ================================================================================================
|
|
* Modular Arithmetic
|
|
*/
|
|
|
|
#define P 0xFFFFFFFF00000001
|
|
#define M 12289
|
|
|
|
// From https://github.com/ncw/iprime/blob/master/mod_math_noasm.go
|
|
static uint64_t add_mod_p(uint64_t a, uint64_t b)
|
|
{
|
|
a = P - a;
|
|
uint64_t res = b - a;
|
|
if (b < a)
|
|
res += P;
|
|
return res;
|
|
}
|
|
|
|
static uint64_t sub_mod_p(uint64_t a, uint64_t b)
|
|
{
|
|
uint64_t r = a - b;
|
|
if (a < b)
|
|
r += P;
|
|
return r;
|
|
}
|
|
|
|
static uint64_t reduce_mod_p(uint64_t b, uint64_t a)
|
|
{
|
|
uint32_t d = b >> 32,
|
|
c = b;
|
|
if (a >= P)
|
|
a -= P;
|
|
a = sub_mod_p(a, c);
|
|
a = sub_mod_p(a, d);
|
|
a = add_mod_p(a, ((uint64_t)c) << 32);
|
|
return a;
|
|
}
|
|
|
|
static uint64_t mult_mod_p(uint64_t x, uint64_t y)
|
|
{
|
|
uint32_t a = x,
|
|
b = x >> 32,
|
|
c = y,
|
|
d = y >> 32;
|
|
|
|
/* first synthesize the product using 32*32 -> 64 bit multiplies */
|
|
x = b * (uint64_t)c; /* b*c */
|
|
y = a * (uint64_t)d; /* a*d */
|
|
uint64_t e = a * (uint64_t)c, /* a*c */
|
|
f = b * (uint64_t)d, /* b*d */
|
|
t;
|
|
|
|
x += y; /* b*c + a*d */
|
|
/* carry? */
|
|
if (x < y)
|
|
f += 1LL << 32; /* carry into upper 32 bits - can't overflow */
|
|
|
|
t = x << 32;
|
|
e += t; /* a*c + LSW(b*c + a*d) */
|
|
/* carry? */
|
|
if (e < t)
|
|
f += 1; /* carry into upper 64 bits - can't overflow*/
|
|
t = x >> 32;
|
|
f += t; /* b*d + MSW(b*c + a*d) */
|
|
/* can't overflow */
|
|
|
|
/* now reduce: (b*d + MSW(b*c + a*d), a*c + LSW(b*c + a*d)) */
|
|
return reduce_mod_p(f, e);
|
|
}
|
|
|
|
/* ================================================================================================
|
|
* RPO128 Permutation
|
|
*/
|
|
|
|
#define STATE_WIDTH 12
|
|
#define NUM_ROUNDS 7
|
|
|
|
/*
|
|
* MDS matrix
|
|
*/
|
|
static const uint64_t MDS[12][12] = {
|
|
{ 7, 23, 8, 26, 13, 10, 9, 7, 6, 22, 21, 8 },
|
|
{ 8, 7, 23, 8, 26, 13, 10, 9, 7, 6, 22, 21 },
|
|
{ 21, 8, 7, 23, 8, 26, 13, 10, 9, 7, 6, 22 },
|
|
{ 22, 21, 8, 7, 23, 8, 26, 13, 10, 9, 7, 6 },
|
|
{ 6, 22, 21, 8, 7, 23, 8, 26, 13, 10, 9, 7 },
|
|
{ 7, 6, 22, 21, 8, 7, 23, 8, 26, 13, 10, 9 },
|
|
{ 9, 7, 6, 22, 21, 8, 7, 23, 8, 26, 13, 10 },
|
|
{ 10, 9, 7, 6, 22, 21, 8, 7, 23, 8, 26, 13 },
|
|
{ 13, 10, 9, 7, 6, 22, 21, 8, 7, 23, 8, 26 },
|
|
{ 26, 13, 10, 9, 7, 6, 22, 21, 8, 7, 23, 8 },
|
|
{ 8, 26, 13, 10, 9, 7, 6, 22, 21, 8, 7, 23 },
|
|
{ 23, 8, 26, 13, 10, 9, 7, 6, 22, 21, 8, 7 },
|
|
};
|
|
|
|
/*
|
|
* Round constants.
|
|
*/
|
|
static const uint64_t ARK1[7][12] = {
|
|
{
|
|
5789762306288267392ULL,
|
|
6522564764413701783ULL,
|
|
17809893479458208203ULL,
|
|
107145243989736508ULL,
|
|
6388978042437517382ULL,
|
|
15844067734406016715ULL,
|
|
9975000513555218239ULL,
|
|
3344984123768313364ULL,
|
|
9959189626657347191ULL,
|
|
12960773468763563665ULL,
|
|
9602914297752488475ULL,
|
|
16657542370200465908ULL,
|
|
},
|
|
{
|
|
12987190162843096997ULL,
|
|
653957632802705281ULL,
|
|
4441654670647621225ULL,
|
|
4038207883745915761ULL,
|
|
5613464648874830118ULL,
|
|
13222989726778338773ULL,
|
|
3037761201230264149ULL,
|
|
16683759727265180203ULL,
|
|
8337364536491240715ULL,
|
|
3227397518293416448ULL,
|
|
8110510111539674682ULL,
|
|
2872078294163232137ULL,
|
|
},
|
|
{
|
|
18072785500942327487ULL,
|
|
6200974112677013481ULL,
|
|
17682092219085884187ULL,
|
|
10599526828986756440ULL,
|
|
975003873302957338ULL,
|
|
8264241093196931281ULL,
|
|
10065763900435475170ULL,
|
|
2181131744534710197ULL,
|
|
6317303992309418647ULL,
|
|
1401440938888741532ULL,
|
|
8884468225181997494ULL,
|
|
13066900325715521532ULL,
|
|
},
|
|
{
|
|
5674685213610121970ULL,
|
|
5759084860419474071ULL,
|
|
13943282657648897737ULL,
|
|
1352748651966375394ULL,
|
|
17110913224029905221ULL,
|
|
1003883795902368422ULL,
|
|
4141870621881018291ULL,
|
|
8121410972417424656ULL,
|
|
14300518605864919529ULL,
|
|
13712227150607670181ULL,
|
|
17021852944633065291ULL,
|
|
6252096473787587650ULL,
|
|
},
|
|
{
|
|
4887609836208846458ULL,
|
|
3027115137917284492ULL,
|
|
9595098600469470675ULL,
|
|
10528569829048484079ULL,
|
|
7864689113198939815ULL,
|
|
17533723827845969040ULL,
|
|
5781638039037710951ULL,
|
|
17024078752430719006ULL,
|
|
109659393484013511ULL,
|
|
7158933660534805869ULL,
|
|
2955076958026921730ULL,
|
|
7433723648458773977ULL,
|
|
},
|
|
{
|
|
16308865189192447297ULL,
|
|
11977192855656444890ULL,
|
|
12532242556065780287ULL,
|
|
14594890931430968898ULL,
|
|
7291784239689209784ULL,
|
|
5514718540551361949ULL,
|
|
10025733853830934803ULL,
|
|
7293794580341021693ULL,
|
|
6728552937464861756ULL,
|
|
6332385040983343262ULL,
|
|
13277683694236792804ULL,
|
|
2600778905124452676ULL,
|
|
},
|
|
{
|
|
7123075680859040534ULL,
|
|
1034205548717903090ULL,
|
|
7717824418247931797ULL,
|
|
3019070937878604058ULL,
|
|
11403792746066867460ULL,
|
|
10280580802233112374ULL,
|
|
337153209462421218ULL,
|
|
13333398568519923717ULL,
|
|
3596153696935337464ULL,
|
|
8104208463525993784ULL,
|
|
14345062289456085693ULL,
|
|
17036731477169661256ULL,
|
|
}};
|
|
|
|
const uint64_t ARK2[7][12] = {
|
|
{
|
|
6077062762357204287ULL,
|
|
15277620170502011191ULL,
|
|
5358738125714196705ULL,
|
|
14233283787297595718ULL,
|
|
13792579614346651365ULL,
|
|
11614812331536767105ULL,
|
|
14871063686742261166ULL,
|
|
10148237148793043499ULL,
|
|
4457428952329675767ULL,
|
|
15590786458219172475ULL,
|
|
10063319113072092615ULL,
|
|
14200078843431360086ULL,
|
|
},
|
|
{
|
|
6202948458916099932ULL,
|
|
17690140365333231091ULL,
|
|
3595001575307484651ULL,
|
|
373995945117666487ULL,
|
|
1235734395091296013ULL,
|
|
14172757457833931602ULL,
|
|
707573103686350224ULL,
|
|
15453217512188187135ULL,
|
|
219777875004506018ULL,
|
|
17876696346199469008ULL,
|
|
17731621626449383378ULL,
|
|
2897136237748376248ULL,
|
|
},
|
|
{
|
|
8023374565629191455ULL,
|
|
15013690343205953430ULL,
|
|
4485500052507912973ULL,
|
|
12489737547229155153ULL,
|
|
9500452585969030576ULL,
|
|
2054001340201038870ULL,
|
|
12420704059284934186ULL,
|
|
355990932618543755ULL,
|
|
9071225051243523860ULL,
|
|
12766199826003448536ULL,
|
|
9045979173463556963ULL,
|
|
12934431667190679898ULL,
|
|
},
|
|
{
|
|
18389244934624494276ULL,
|
|
16731736864863925227ULL,
|
|
4440209734760478192ULL,
|
|
17208448209698888938ULL,
|
|
8739495587021565984ULL,
|
|
17000774922218161967ULL,
|
|
13533282547195532087ULL,
|
|
525402848358706231ULL,
|
|
16987541523062161972ULL,
|
|
5466806524462797102ULL,
|
|
14512769585918244983ULL,
|
|
10973956031244051118ULL,
|
|
},
|
|
{
|
|
6982293561042362913ULL,
|
|
14065426295947720331ULL,
|
|
16451845770444974180ULL,
|
|
7139138592091306727ULL,
|
|
9012006439959783127ULL,
|
|
14619614108529063361ULL,
|
|
1394813199588124371ULL,
|
|
4635111139507788575ULL,
|
|
16217473952264203365ULL,
|
|
10782018226466330683ULL,
|
|
6844229992533662050ULL,
|
|
7446486531695178711ULL,
|
|
},
|
|
{
|
|
3736792340494631448ULL,
|
|
577852220195055341ULL,
|
|
6689998335515779805ULL,
|
|
13886063479078013492ULL,
|
|
14358505101923202168ULL,
|
|
7744142531772274164ULL,
|
|
16135070735728404443ULL,
|
|
12290902521256031137ULL,
|
|
12059913662657709804ULL,
|
|
16456018495793751911ULL,
|
|
4571485474751953524ULL,
|
|
17200392109565783176ULL,
|
|
},
|
|
{
|
|
17130398059294018733ULL,
|
|
519782857322261988ULL,
|
|
9625384390925085478ULL,
|
|
1664893052631119222ULL,
|
|
7629576092524553570ULL,
|
|
3485239601103661425ULL,
|
|
9755891797164033838ULL,
|
|
15218148195153269027ULL,
|
|
16460604813734957368ULL,
|
|
9643968136937729763ULL,
|
|
3611348709641382851ULL,
|
|
18256379591337759196ULL,
|
|
},
|
|
};
|
|
|
|
static void apply_sbox(uint64_t *const state)
|
|
{
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
uint64_t t2 = mult_mod_p(*(state + i), *(state + i));
|
|
uint64_t t4 = mult_mod_p(t2, t2);
|
|
|
|
*(state + i) = mult_mod_p(*(state + i), mult_mod_p(t2, t4));
|
|
}
|
|
}
|
|
|
|
static void apply_mds(uint64_t *state)
|
|
{
|
|
uint64_t res[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
res[i] = 0;
|
|
}
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
for (uint64_t j = 0; j < STATE_WIDTH; j++)
|
|
{
|
|
res[i] = add_mod_p(res[i], mult_mod_p(MDS[i][j], *(state + j)));
|
|
}
|
|
}
|
|
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
*(state + i) = res[i];
|
|
}
|
|
}
|
|
|
|
static void apply_constants(uint64_t *const state, const uint64_t *ark)
|
|
{
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
*(state + i) = add_mod_p(*(state + i), *(ark + i));
|
|
}
|
|
}
|
|
|
|
static void exp_acc(const uint64_t m, const uint64_t *base, const uint64_t *tail, uint64_t *const res)
|
|
{
|
|
for (uint64_t i = 0; i < m; i++)
|
|
{
|
|
for (uint64_t j = 0; j < STATE_WIDTH; j++)
|
|
{
|
|
if (i == 0)
|
|
{
|
|
*(res + j) = mult_mod_p(*(base + j), *(base + j));
|
|
}
|
|
else
|
|
{
|
|
*(res + j) = mult_mod_p(*(res + j), *(res + j));
|
|
}
|
|
}
|
|
}
|
|
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
*(res + i) = mult_mod_p(*(res + i), *(tail + i));
|
|
}
|
|
}
|
|
|
|
static void apply_inv_sbox(uint64_t *const state)
|
|
{
|
|
uint64_t t1[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t1[i] = 0;
|
|
}
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t1[i] = mult_mod_p(*(state + i), *(state + i));
|
|
}
|
|
|
|
uint64_t t2[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t2[i] = 0;
|
|
}
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t2[i] = mult_mod_p(t1[i], t1[i]);
|
|
}
|
|
|
|
uint64_t t3[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t3[i] = 0;
|
|
}
|
|
exp_acc(3, t2, t2, t3);
|
|
|
|
uint64_t t4[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t4[i] = 0;
|
|
}
|
|
exp_acc(6, t3, t3, t4);
|
|
|
|
uint64_t tmp[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
tmp[i] = 0;
|
|
}
|
|
exp_acc(12, t4, t4, tmp);
|
|
|
|
uint64_t t5[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t5[i] = 0;
|
|
}
|
|
exp_acc(6, tmp, t3, t5);
|
|
|
|
uint64_t t6[STATE_WIDTH];
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
t6[i] = 0;
|
|
}
|
|
exp_acc(31, t5, t5, t6);
|
|
|
|
for (uint64_t i = 0; i < STATE_WIDTH; i++)
|
|
{
|
|
uint64_t a = mult_mod_p(mult_mod_p(t6[i], t6[i]), t5[i]);
|
|
a = mult_mod_p(a, a);
|
|
a = mult_mod_p(a, a);
|
|
uint64_t b = mult_mod_p(mult_mod_p(t1[i], t2[i]), *(state + i));
|
|
|
|
*(state + i) = mult_mod_p(a, b);
|
|
}
|
|
}
|
|
|
|
static void apply_round(uint64_t *const state, const uint64_t round)
|
|
{
|
|
apply_mds(state);
|
|
apply_constants(state, ARK1[round]);
|
|
apply_sbox(state);
|
|
|
|
apply_mds(state);
|
|
apply_constants(state, ARK2[round]);
|
|
apply_inv_sbox(state);
|
|
}
|
|
|
|
static void apply_permutation(uint64_t *state)
|
|
{
|
|
for (uint64_t i = 0; i < NUM_ROUNDS; i++)
|
|
{
|
|
apply_round(state, i);
|
|
}
|
|
}
|
|
|
|
/* ================================================================================================
|
|
* RPO128 implementation. This is supposed to substitute SHAKE256 in the hash-to-point algorithm.
|
|
*/
|
|
|
|
#include "rpo.h"
|
|
|
|
void rpo128_init(rpo128_context *rc)
|
|
{
|
|
rc->dptr = 32;
|
|
|
|
memset(rc->st.A, 0, sizeof rc->st.A);
|
|
}
|
|
|
|
void rpo128_absorb(rpo128_context *rc, const uint8_t *in, size_t len)
|
|
{
|
|
size_t dptr;
|
|
|
|
dptr = (size_t)rc->dptr;
|
|
while (len > 0)
|
|
{
|
|
size_t clen, u;
|
|
|
|
/* 136 * 8 = 1088 bit for the rate portion in the case of SHAKE256
|
|
* For RPO, this is 64 * 8 = 512 bits
|
|
* The capacity for SHAKE256 is at the end while for RPO128 it is at the beginning
|
|
*/
|
|
clen = 96 - dptr;
|
|
if (clen > len)
|
|
{
|
|
clen = len;
|
|
}
|
|
|
|
for (u = 0; u < clen; u++)
|
|
{
|
|
rc->st.dbuf[dptr + u] = in[u];
|
|
}
|
|
|
|
dptr += clen;
|
|
in += clen;
|
|
len -= clen;
|
|
if (dptr == 96)
|
|
{
|
|
apply_permutation(rc->st.A);
|
|
dptr = 32;
|
|
}
|
|
}
|
|
rc->dptr = dptr;
|
|
}
|
|
|
|
void rpo128_finalize(rpo128_context *rc)
|
|
{
|
|
// Set dptr to the end of the buffer, so that first call to extract will call the permutation.
|
|
rc->dptr = 96;
|
|
}
|
|
|
|
void rpo128_squeeze(rpo128_context *rc, uint8_t *out, size_t len)
|
|
{
|
|
size_t dptr;
|
|
|
|
dptr = (size_t)rc->dptr;
|
|
while (len > 0)
|
|
{
|
|
size_t clen;
|
|
|
|
if (dptr == 96)
|
|
{
|
|
apply_permutation(rc->st.A);
|
|
dptr = 32;
|
|
}
|
|
clen = 96 - dptr;
|
|
if (clen > len)
|
|
{
|
|
clen = len;
|
|
}
|
|
len -= clen;
|
|
|
|
memcpy(out, rc->st.dbuf + dptr, clen);
|
|
dptr += clen;
|
|
out += clen;
|
|
}
|
|
rc->dptr = dptr;
|
|
}
|
|
|
|
void rpo128_release(rpo128_context *rc)
|
|
{
|
|
memset(rc->st.A, 0, sizeof rc->st.A);
|
|
rc->dptr = 32;
|
|
}
|
|
|
|
/* ================================================================================================
|
|
* Hash-to-Point algorithm implementation based on RPO128
|
|
*/
|
|
|
|
void PQCLEAN_FALCON512_CLEAN_hash_to_point_rpo(rpo128_context *rc, uint16_t *x, unsigned logn)
|
|
{
|
|
/*
|
|
* This implementation avoids the rejection sampling step needed in the
|
|
* per-the-spec implementation. It uses a remark in https://falcon-sign.info/falcon.pdf
|
|
* page 31, which argues that the current variant is secure for the parameters set by NIST.
|
|
* Avoiding the rejection-sampling step leads to an implementation that is constant-time.
|
|
* TODO: Check that the current implementation is indeed constant-time.
|
|
*/
|
|
size_t n;
|
|
|
|
n = (size_t)1 << logn;
|
|
while (n > 0)
|
|
{
|
|
uint8_t buf[8];
|
|
uint64_t w;
|
|
|
|
rpo128_squeeze(rc, (void *)buf, sizeof buf);
|
|
w = ((uint64_t)(buf[7]) << 56) |
|
|
((uint64_t)(buf[6]) << 48) |
|
|
((uint64_t)(buf[5]) << 40) |
|
|
((uint64_t)(buf[4]) << 32) |
|
|
((uint64_t)(buf[3]) << 24) |
|
|
((uint64_t)(buf[2]) << 16) |
|
|
((uint64_t)(buf[1]) << 8) |
|
|
((uint64_t)(buf[0]));
|
|
|
|
w %= M;
|
|
|
|
*x++ = (uint16_t)w;
|
|
n--;
|
|
}
|
|
}
|