/* * RPO implementation. */ #include #include #include /* ================================================================================================ * Modular Arithmetic */ #define P 0xFFFFFFFF00000001 #define M 12289 // From https://github.com/ncw/iprime/blob/master/mod_math_noasm.go static uint64_t add_mod_p(uint64_t a, uint64_t b) { a = P - a; uint64_t res = b - a; if (b < a) res += P; return res; } static uint64_t sub_mod_p(uint64_t a, uint64_t b) { uint64_t r = a - b; if (a < b) r += P; return r; } static uint64_t reduce_mod_p(uint64_t b, uint64_t a) { uint32_t d = b >> 32, c = b; if (a >= P) a -= P; a = sub_mod_p(a, c); a = sub_mod_p(a, d); a = add_mod_p(a, ((uint64_t)c) << 32); return a; } static uint64_t mult_mod_p(uint64_t x, uint64_t y) { uint32_t a = x, b = x >> 32, c = y, d = y >> 32; /* first synthesize the product using 32*32 -> 64 bit multiplies */ x = b * (uint64_t)c; /* b*c */ y = a * (uint64_t)d; /* a*d */ uint64_t e = a * (uint64_t)c, /* a*c */ f = b * (uint64_t)d, /* b*d */ t; x += y; /* b*c + a*d */ /* carry? */ if (x < y) f += 1LL << 32; /* carry into upper 32 bits - can't overflow */ t = x << 32; e += t; /* a*c + LSW(b*c + a*d) */ /* carry? */ if (e < t) f += 1; /* carry into upper 64 bits - can't overflow*/ t = x >> 32; f += t; /* b*d + MSW(b*c + a*d) */ /* can't overflow */ /* now reduce: (b*d + MSW(b*c + a*d), a*c + LSW(b*c + a*d)) */ return reduce_mod_p(f, e); } /* ================================================================================================ * RPO128 Permutation */ #define STATE_WIDTH 12 #define NUM_ROUNDS 7 /* * MDS matrix */ static const uint64_t MDS[12][12] = { { 7, 23, 8, 26, 13, 10, 9, 7, 6, 22, 21, 8 }, { 8, 7, 23, 8, 26, 13, 10, 9, 7, 6, 22, 21 }, { 21, 8, 7, 23, 8, 26, 13, 10, 9, 7, 6, 22 }, { 22, 21, 8, 7, 23, 8, 26, 13, 10, 9, 7, 6 }, { 6, 22, 21, 8, 7, 23, 8, 26, 13, 10, 9, 7 }, { 7, 6, 22, 21, 8, 7, 23, 8, 26, 13, 10, 9 }, { 9, 7, 6, 22, 21, 8, 7, 23, 8, 26, 13, 10 }, { 10, 9, 7, 6, 22, 21, 8, 7, 23, 8, 26, 13 }, { 13, 10, 9, 7, 6, 22, 21, 8, 7, 23, 8, 26 }, { 26, 13, 10, 9, 7, 6, 22, 21, 8, 7, 23, 8 }, { 8, 26, 13, 10, 9, 7, 6, 22, 21, 8, 7, 23 }, { 23, 8, 26, 13, 10, 9, 7, 6, 22, 21, 8, 7 }, }; /* * Round constants. */ static const uint64_t ARK1[7][12] = { { 5789762306288267392ULL, 6522564764413701783ULL, 17809893479458208203ULL, 107145243989736508ULL, 6388978042437517382ULL, 15844067734406016715ULL, 9975000513555218239ULL, 3344984123768313364ULL, 9959189626657347191ULL, 12960773468763563665ULL, 9602914297752488475ULL, 16657542370200465908ULL, }, { 12987190162843096997ULL, 653957632802705281ULL, 4441654670647621225ULL, 4038207883745915761ULL, 5613464648874830118ULL, 13222989726778338773ULL, 3037761201230264149ULL, 16683759727265180203ULL, 8337364536491240715ULL, 3227397518293416448ULL, 8110510111539674682ULL, 2872078294163232137ULL, }, { 18072785500942327487ULL, 6200974112677013481ULL, 17682092219085884187ULL, 10599526828986756440ULL, 975003873302957338ULL, 8264241093196931281ULL, 10065763900435475170ULL, 2181131744534710197ULL, 6317303992309418647ULL, 1401440938888741532ULL, 8884468225181997494ULL, 13066900325715521532ULL, }, { 5674685213610121970ULL, 5759084860419474071ULL, 13943282657648897737ULL, 1352748651966375394ULL, 17110913224029905221ULL, 1003883795902368422ULL, 4141870621881018291ULL, 8121410972417424656ULL, 14300518605864919529ULL, 13712227150607670181ULL, 17021852944633065291ULL, 6252096473787587650ULL, }, { 4887609836208846458ULL, 3027115137917284492ULL, 9595098600469470675ULL, 10528569829048484079ULL, 7864689113198939815ULL, 17533723827845969040ULL, 5781638039037710951ULL, 17024078752430719006ULL, 109659393484013511ULL, 7158933660534805869ULL, 2955076958026921730ULL, 7433723648458773977ULL, }, { 16308865189192447297ULL, 11977192855656444890ULL, 12532242556065780287ULL, 14594890931430968898ULL, 7291784239689209784ULL, 5514718540551361949ULL, 10025733853830934803ULL, 7293794580341021693ULL, 6728552937464861756ULL, 6332385040983343262ULL, 13277683694236792804ULL, 2600778905124452676ULL, }, { 7123075680859040534ULL, 1034205548717903090ULL, 7717824418247931797ULL, 3019070937878604058ULL, 11403792746066867460ULL, 10280580802233112374ULL, 337153209462421218ULL, 13333398568519923717ULL, 3596153696935337464ULL, 8104208463525993784ULL, 14345062289456085693ULL, 17036731477169661256ULL, }}; const uint64_t ARK2[7][12] = { { 6077062762357204287ULL, 15277620170502011191ULL, 5358738125714196705ULL, 14233283787297595718ULL, 13792579614346651365ULL, 11614812331536767105ULL, 14871063686742261166ULL, 10148237148793043499ULL, 4457428952329675767ULL, 15590786458219172475ULL, 10063319113072092615ULL, 14200078843431360086ULL, }, { 6202948458916099932ULL, 17690140365333231091ULL, 3595001575307484651ULL, 373995945117666487ULL, 1235734395091296013ULL, 14172757457833931602ULL, 707573103686350224ULL, 15453217512188187135ULL, 219777875004506018ULL, 17876696346199469008ULL, 17731621626449383378ULL, 2897136237748376248ULL, }, { 8023374565629191455ULL, 15013690343205953430ULL, 4485500052507912973ULL, 12489737547229155153ULL, 9500452585969030576ULL, 2054001340201038870ULL, 12420704059284934186ULL, 355990932618543755ULL, 9071225051243523860ULL, 12766199826003448536ULL, 9045979173463556963ULL, 12934431667190679898ULL, }, { 18389244934624494276ULL, 16731736864863925227ULL, 4440209734760478192ULL, 17208448209698888938ULL, 8739495587021565984ULL, 17000774922218161967ULL, 13533282547195532087ULL, 525402848358706231ULL, 16987541523062161972ULL, 5466806524462797102ULL, 14512769585918244983ULL, 10973956031244051118ULL, }, { 6982293561042362913ULL, 14065426295947720331ULL, 16451845770444974180ULL, 7139138592091306727ULL, 9012006439959783127ULL, 14619614108529063361ULL, 1394813199588124371ULL, 4635111139507788575ULL, 16217473952264203365ULL, 10782018226466330683ULL, 6844229992533662050ULL, 7446486531695178711ULL, }, { 3736792340494631448ULL, 577852220195055341ULL, 6689998335515779805ULL, 13886063479078013492ULL, 14358505101923202168ULL, 7744142531772274164ULL, 16135070735728404443ULL, 12290902521256031137ULL, 12059913662657709804ULL, 16456018495793751911ULL, 4571485474751953524ULL, 17200392109565783176ULL, }, { 17130398059294018733ULL, 519782857322261988ULL, 9625384390925085478ULL, 1664893052631119222ULL, 7629576092524553570ULL, 3485239601103661425ULL, 9755891797164033838ULL, 15218148195153269027ULL, 16460604813734957368ULL, 9643968136937729763ULL, 3611348709641382851ULL, 18256379591337759196ULL, }, }; static void apply_sbox(uint64_t *const state) { for (uint64_t i = 0; i < STATE_WIDTH; i++) { uint64_t t2 = mult_mod_p(*(state + i), *(state + i)); uint64_t t4 = mult_mod_p(t2, t2); *(state + i) = mult_mod_p(*(state + i), mult_mod_p(t2, t4)); } } static void apply_mds(uint64_t *state) { uint64_t res[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { res[i] = 0; } for (uint64_t i = 0; i < STATE_WIDTH; i++) { for (uint64_t j = 0; j < STATE_WIDTH; j++) { res[i] = add_mod_p(res[i], mult_mod_p(MDS[i][j], *(state + j))); } } for (uint64_t i = 0; i < STATE_WIDTH; i++) { *(state + i) = res[i]; } } static void apply_constants(uint64_t *const state, const uint64_t *ark) { for (uint64_t i = 0; i < STATE_WIDTH; i++) { *(state + i) = add_mod_p(*(state + i), *(ark + i)); } } static void exp_acc(const uint64_t m, const uint64_t *base, const uint64_t *tail, uint64_t *const res) { for (uint64_t i = 0; i < m; i++) { for (uint64_t j = 0; j < STATE_WIDTH; j++) { if (i == 0) { *(res + j) = mult_mod_p(*(base + j), *(base + j)); } else { *(res + j) = mult_mod_p(*(res + j), *(res + j)); } } } for (uint64_t i = 0; i < STATE_WIDTH; i++) { *(res + i) = mult_mod_p(*(res + i), *(tail + i)); } } static void apply_inv_sbox(uint64_t *const state) { uint64_t t1[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { t1[i] = 0; } for (uint64_t i = 0; i < STATE_WIDTH; i++) { t1[i] = mult_mod_p(*(state + i), *(state + i)); } uint64_t t2[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { t2[i] = 0; } for (uint64_t i = 0; i < STATE_WIDTH; i++) { t2[i] = mult_mod_p(t1[i], t1[i]); } uint64_t t3[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { t3[i] = 0; } exp_acc(3, t2, t2, t3); uint64_t t4[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { t4[i] = 0; } exp_acc(6, t3, t3, t4); uint64_t tmp[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { tmp[i] = 0; } exp_acc(12, t4, t4, tmp); uint64_t t5[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { t5[i] = 0; } exp_acc(6, tmp, t3, t5); uint64_t t6[STATE_WIDTH]; for (uint64_t i = 0; i < STATE_WIDTH; i++) { t6[i] = 0; } exp_acc(31, t5, t5, t6); for (uint64_t i = 0; i < STATE_WIDTH; i++) { uint64_t a = mult_mod_p(mult_mod_p(t6[i], t6[i]), t5[i]); a = mult_mod_p(a, a); a = mult_mod_p(a, a); uint64_t b = mult_mod_p(mult_mod_p(t1[i], t2[i]), *(state + i)); *(state + i) = mult_mod_p(a, b); } } static void apply_round(uint64_t *const state, const uint64_t round) { apply_mds(state); apply_constants(state, ARK1[round]); apply_sbox(state); apply_mds(state); apply_constants(state, ARK2[round]); apply_inv_sbox(state); } static void apply_permutation(uint64_t *state) { for (uint64_t i = 0; i < NUM_ROUNDS; i++) { apply_round(state, i); } } /* ================================================================================================ * RPO128 implementation. This is supposed to substitute SHAKE256 in the hash-to-point algorithm. */ #include "rpo.h" void rpo128_init(rpo128_context *rc) { rc->dptr = 32; memset(rc->st.A, 0, sizeof rc->st.A); } void rpo128_absorb(rpo128_context *rc, const uint8_t *in, size_t len) { size_t dptr; dptr = (size_t)rc->dptr; while (len > 0) { size_t clen, u; /* 136 * 8 = 1088 bit for the rate portion in the case of SHAKE256 * For RPO, this is 64 * 8 = 512 bits * The capacity for SHAKE256 is at the end while for RPO128 it is at the beginning */ clen = 96 - dptr; if (clen > len) { clen = len; } for (u = 0; u < clen; u++) { rc->st.dbuf[dptr + u] = in[u]; } dptr += clen; in += clen; len -= clen; if (dptr == 96) { apply_permutation(rc->st.A); dptr = 32; } } rc->dptr = dptr; } void rpo128_finalize(rpo128_context *rc) { // Set dptr to the end of the buffer, so that first call to extract will call the permutation. rc->dptr = 96; } void rpo128_squeeze(rpo128_context *rc, uint8_t *out, size_t len) { size_t dptr; dptr = (size_t)rc->dptr; while (len > 0) { size_t clen; if (dptr == 96) { apply_permutation(rc->st.A); dptr = 32; } clen = 96 - dptr; if (clen > len) { clen = len; } len -= clen; memcpy(out, rc->st.dbuf + dptr, clen); dptr += clen; out += clen; } rc->dptr = dptr; } void rpo128_release(rpo128_context *rc) { memset(rc->st.A, 0, sizeof rc->st.A); rc->dptr = 32; } /* ================================================================================================ * Hash-to-Point algorithm implementation based on RPO128 */ void PQCLEAN_FALCON512_CLEAN_hash_to_point_rpo(rpo128_context *rc, uint16_t *x, unsigned logn) { /* * This implementation avoids the rejection sampling step needed in the * per-the-spec implementation. It uses a remark in https://falcon-sign.info/falcon.pdf * page 31, which argues that the current variant is secure for the parameters set by NIST. * Avoiding the rejection-sampling step leads to an implementation that is constant-time. * TODO: Check that the current implementation is indeed constant-time. */ size_t n; n = (size_t)1 << logn; while (n > 0) { uint8_t buf[8]; uint64_t w; rpo128_squeeze(rc, (void *)buf, sizeof buf); w = ((uint64_t)(buf[7]) << 56) | ((uint64_t)(buf[6]) << 48) | ((uint64_t)(buf[5]) << 40) | ((uint64_t)(buf[4]) << 32) | ((uint64_t)(buf[3]) << 24) | ((uint64_t)(buf[2]) << 16) | ((uint64_t)(buf[1]) << 8) | ((uint64_t)(buf[0])); w %= M; *x++ = (uint16_t)w; n--; } }