diff --git a/c/buildasm/copy.asm.ejs b/c/buildasm/copy.asm.ejs index d8e3abe..d851c5f 100644 --- a/c/buildasm/copy.asm.ejs +++ b/c/buildasm/copy.asm.ejs @@ -1,3 +1,45 @@ +;;;;;;;;;;;;;;;;;;;;;; +; copy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_copy: +<% for (let i=0; i<=n64; i++) { %> + mov rax, [rsi + <%= i*8 %>] + mov [rdi + <%= i*8 %>], rax +<% } %> + ret + +;;;;;;;;;;;;;;;;;;;;;; +; copy an array of integers +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; rdx <= number of integers to copy +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_copyn: +<%=name%>_copyn_loop: + mov r8, rsi + mov r9, rdi + mov rax, <%= n64+1 %> + mul rdx + mov rcx, rax + cld + rep movsq + mov rsi, r8 + mov rdi, r9 + ret ;;;;;;;;;;;;;;;;;;;;;; ; rawCopyS2L diff --git a/c/buildasm/fr.asm b/c/buildasm/fr.asm new file mode 100644 index 0000000..258fe13 --- /dev/null +++ b/c/buildasm/fr.asm @@ -0,0 +1,5713 @@ + + + global Fr_copy + global Fr_copyn + global Fr_add + global Fr_sub + global Fr_neg + global Fr_mul + global Fr_square + global Fr_band + global Fr_bor + global Fr_bxor + global Fr_bnot + global Fr_eq + global Fr_neq + global Fr_lt + global Fr_gt + global Fr_leq + global Fr_geq + global Fr_land + global Fr_lor + global Fr_lnot + global Fr_toNormal + global Fr_toLongNormal + global Fr_toMontgomery + global Fr_q + DEFAULT REL + + section .text + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; copy +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copy: + + mov rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + mov [rdi + 24], rax + + mov rax, [rsi + 32] + mov [rdi + 32], rax + + ret + +;;;;;;;;;;;;;;;;;;;;;; +; copy an array of integers +;;;;;;;;;;;;;;;;;;;;;; +; Copies +; Params: +; rsi <= the src +; rdi <= the dest +; rdx <= number of integers to copy +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; +Fr_copyn: +Fr_copyn_loop: + mov r8, rsi + mov r9, rdi + mov rax, 5 + mul rdx + mov rcx, rax + cld + rep movsq + mov rsi, r8 + mov rdi, r9 + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawCopyS2L +;;;;;;;;;;;;;;;;;;;;;; +; Convert a 64 bit integer to a long format field element +; Params: +; rsi <= the integer +; rdi <= Pointer to the overwritted element +; +; Nidified registers: +; rax +;;;;;;;;;;;;;;;;;;;;;;; + +rawCopyS2L: + mov al, 0x80 + shl rax, 56 + mov [rdi], rax ; set the result to LONG normal + + cmp rsi, 0 + js u64toLong_adjust_neg + + mov [rdi + 8], rsi + xor rax, rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + mov [rdi + 32], rax + + ret + +u64toLong_adjust_neg: + add rsi, [q] ; Set the first digit + mov [rdi + 8], rsi ; + + mov rsi, -1 ; all ones + + mov rax, rsi ; Add to q + adc rax, [q + 8 ] + mov [rdi + 16], rax + + mov rax, rsi ; Add to q + adc rax, [q + 16 ] + mov [rdi + 24], rax + + mov rax, rsi ; Add to q + adc rax, [q + 24 ] + mov [rdi + 32], rax + + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawMontgomeryMul +;;;;;;;;;;;;;;;;;;;;;; +; Multiply two elements in montgomery form +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= Pointer to the long data of element 2 +; rdi <= Pointer to the long data of result +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; + +rawMontgomeryMul: + sub rsp, 32 ; Reserve space for ms + mov rcx, rdx ; rdx is needed for multiplications so keep it in cx + mov r11, 0xc2e1f593efffffff ; np + xor r8,r8 + xor r9,r9 + xor r10,r10 + + mov rax, [rsi + 0] + mul qword [rcx + 0] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + mov rax, r8 + mul r11 + mov [rsp + 0], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsi + 0] + mul qword [rcx + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsi + 8] + mul qword [rcx + 0] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsp + 0] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, r9 + mul r11 + mov [rsp + 8], rax + mul qword [q] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsi + 0] + mul qword [rcx + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsi + 8] + mul qword [rcx + 8] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsi + 16] + mul qword [rcx + 0] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsp + 8] + mul qword [q + 8] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, r10 + mul r11 + mov [rsp + 16], rax + mul qword [q] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsi + 0] + mul qword [rcx + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsi + 8] + mul qword [rcx + 16] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsi + 16] + mul qword [rcx + 8] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsi + 24] + mul qword [rcx + 0] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsp + 16] + mul qword [q + 8] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 16] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, r8 + mul r11 + mov [rsp + 24], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsi + 8] + mul qword [rcx + 24] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsi + 16] + mul qword [rcx + 16] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsi + 24] + mul qword [rcx + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsp + 24] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 16] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 24] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov [rdi + 0 ], r9 + xor r9,r9 + + + + mov rax, [rsi + 16] + mul qword [rcx + 24] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsi + 24] + mul qword [rcx + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsp + 24] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 24] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov [rdi + 8 ], r10 + xor r10,r10 + + + + mov rax, [rsi + 24] + mul qword [rcx + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsp + 24] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov [rdi + 16 ], r8 + xor r8,r8 + + + + + + + + mov [rdi + 24 ], r9 + xor r9,r9 + + + + test r10, r10 + jnz rawMontgomeryMul_mulM_sq + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawMontgomeryMul_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul_mulM_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawMontgomeryMul_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul_mulM_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawMontgomeryMul_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul_mulM_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawMontgomeryMul_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul_mulM_sq ; q is lower + + ; If equal substract q + +rawMontgomeryMul_mulM_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + + +rawMontgomeryMul_mulM_done: + mov rdx, rcx ; recover rdx to its original place. + add rsp, 32 ; recover rsp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawMontgomerySquare +;;;;;;;;;;;;;;;;;;;;;; +; Square an element +; Params: +; rsi <= Pointer to the long data of element 1 +; rdi <= Pointer to the long data of result +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; + +rawMontgomerySquare: + sub rsp, 32 ; Reserve space for ms + mov rcx, rdx ; rdx is needed for multiplications so keep it in cx + mov r11, 0xc2e1f593efffffff ; np + xor r8,r8 + xor r9,r9 + xor r10,r10 + + + + mov rax, [rsi + 0] + mul rax + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + + + mov rax, r8 + mul r11 + mov [rsp + 0], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsi + 0] + mul qword [rsi + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + + + + + mov rax, [rsp + 0] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, r9 + mul r11 + mov [rsp + 8], rax + mul qword [q] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsi + 0] + mul qword [rsi + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsi + 8] + mul rax + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + + + mov rax, [rsp + 8] + mul qword [q + 8] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, r10 + mul r11 + mov [rsp + 16], rax + mul qword [q] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsi + 0] + mul qword [rsi + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsi + 8] + mul qword [rsi + 16] + add r8, rax + adc r9, rdx + adc r10, 0x0 + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + + + mov rax, [rsp + 16] + mul qword [q + 8] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 16] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, r8 + mul r11 + mov [rsp + 24], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsi + 8] + mul qword [rsi + 24] + add r9, rax + adc r10, rdx + adc r8, 0x0 + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsi + 16] + mul rax + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + + + mov rax, [rsp + 24] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 16] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 24] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov [rdi + 0 ], r9 + xor r9,r9 + + + + mov rax, [rsi + 16] + mul qword [rsi + 24] + add r10, rax + adc r8, rdx + adc r9, 0x0 + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + + + + + mov rax, [rsp + 24] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 24] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov [rdi + 8 ], r10 + xor r10,r10 + + + + + + mov rax, [rsi + 24] + mul rax + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + mov rax, [rsp + 24] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov [rdi + 16 ], r8 + xor r8,r8 + + + + + + + + + + + + mov [rdi + 24 ], r9 + xor r9,r9 + + + + test r10, r10 + jnz rawMontgomerySquare_mulM_sq + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawMontgomerySquare_mulM_done ; q is bigget so done. + jnz rawMontgomerySquare_mulM_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawMontgomerySquare_mulM_done ; q is bigget so done. + jnz rawMontgomerySquare_mulM_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawMontgomerySquare_mulM_done ; q is bigget so done. + jnz rawMontgomerySquare_mulM_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawMontgomerySquare_mulM_done ; q is bigget so done. + jnz rawMontgomerySquare_mulM_sq ; q is lower + + ; If equal substract q + +rawMontgomerySquare_mulM_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + + +rawMontgomerySquare_mulM_done: + mov rdx, rcx ; recover rdx to its original place. + add rsp, 32 ; recover rsp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawMontgomeryMul1 +;;;;;;;;;;;;;;;;;;;;;; +; Multiply two elements in montgomery form +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= second operand +; rdi <= Pointer to the long data of result +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; + +rawMontgomeryMul1: + sub rsp, 32 ; Reserve space for ms + mov rcx, rdx ; rdx is needed for multiplications so keep it in cx + mov r11, 0xc2e1f593efffffff ; np + xor r8,r8 + xor r9,r9 + xor r10,r10 + + mov rax, [rsi + 0] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + mov rax, r8 + mul r11 + mov [rsp + 0], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsi + 8] + mul rcx + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsp + 0] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, r9 + mul r11 + mov [rsp + 8], rax + mul qword [q] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, [rsi + 16] + mul rcx + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsp + 8] + mul qword [q + 8] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, r10 + mul r11 + mov [rsp + 16], rax + mul qword [q] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, [rsi + 24] + mul rcx + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, [rsp + 16] + mul qword [q + 8] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 16] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, r8 + mul r11 + mov [rsp + 24], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + mov rax, [rsp + 24] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 16] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 24] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov [rdi + 0 ], r9 + xor r9,r9 + + + + + + mov rax, [rsp + 24] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 24] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov [rdi + 8 ], r10 + xor r10,r10 + + + + + + mov rax, [rsp + 24] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov [rdi + 16 ], r8 + xor r8,r8 + + + + + + + + mov [rdi + 24 ], r9 + xor r9,r9 + + + + test r10, r10 + jnz rawMontgomeryMul1_mulM_sq + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawMontgomeryMul1_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul1_mulM_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawMontgomeryMul1_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul1_mulM_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawMontgomeryMul1_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul1_mulM_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawMontgomeryMul1_mulM_done ; q is bigget so done. + jnz rawMontgomeryMul1_mulM_sq ; q is lower + + ; If equal substract q + +rawMontgomeryMul1_mulM_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + + +rawMontgomeryMul1_mulM_done: + mov rdx, rcx ; recover rdx to its original place. + add rsp, 32 ; recover rsp + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawFromMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Multiply two elements in montgomery form +; Params: +; rsi <= Pointer to the long data of element 1 +; rdi <= Pointer to the long data of result +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; + +rawFromMontgomery: + sub rsp, 32 ; Reserve space for ms + mov rcx, rdx ; rdx is needed for multiplications so keep it in cx + mov r11, 0xc2e1f593efffffff ; np + xor r8,r8 + xor r9,r9 + xor r10,r10 + + add r8, [rdi + 0] + adc r9, 0x0 + adc r10, 0x0 + + + + + + mov rax, r8 + mul r11 + mov [rsp + 0], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + add r9, [rdi + 8] + adc r10, 0x0 + adc r8, 0x0 + + + + mov rax, [rsp + 0] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov rax, r9 + mul r11 + mov [rsp + 8], rax + mul qword [q] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + add r10, [rdi + 16] + adc r8, 0x0 + adc r9, 0x0 + + + + mov rax, [rsp + 8] + mul qword [q + 8] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov rax, r10 + mul r11 + mov [rsp + 16], rax + mul qword [q] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + add r8, [rdi + 24] + adc r9, 0x0 + adc r10, 0x0 + + + + mov rax, [rsp + 16] + mul qword [q + 8] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 16] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + mov rax, [rsp + 0] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov rax, r8 + mul r11 + mov [rsp + 24], rax + mul qword [q] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + + + mov rax, [rsp + 24] + mul qword [q + 8] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 16] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + mov rax, [rsp + 8] + mul qword [q + 24] + add r9, rax + adc r10, rdx + adc r8, 0x0 + + + + mov [rdi + 0 ], r9 + xor r9,r9 + + + + + + mov rax, [rsp + 24] + mul qword [q + 16] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + mov rax, [rsp + 16] + mul qword [q + 24] + add r10, rax + adc r8, rdx + adc r9, 0x0 + + + + mov [rdi + 8 ], r10 + xor r10,r10 + + + + + + mov rax, [rsp + 24] + mul qword [q + 24] + add r8, rax + adc r9, rdx + adc r10, 0x0 + + + + mov [rdi + 16 ], r8 + xor r8,r8 + + + + + + + + mov [rdi + 24 ], r9 + xor r9,r9 + + + + test r10, r10 + jnz rawFromMontgomery_mulM_sq + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawFromMontgomery_mulM_done ; q is bigget so done. + jnz rawFromMontgomery_mulM_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawFromMontgomery_mulM_done ; q is bigget so done. + jnz rawFromMontgomery_mulM_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawFromMontgomery_mulM_done ; q is bigget so done. + jnz rawFromMontgomery_mulM_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawFromMontgomery_mulM_done ; q is bigget so done. + jnz rawFromMontgomery_mulM_sq ; q is lower + + ; If equal substract q + +rawFromMontgomery_mulM_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + + +rawFromMontgomery_mulM_done: + mov rdx, rcx ; recover rdx to its original place. + add rsp, 32 ; recover rsp + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; toMontgomery +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to Montgomery +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toMontgomery: + mov rax, [rdi] + bts rax, 62 ; check if montgomery + jc toMontgomery_doNothing + bts rax, 63 + jc toMontgomeryLong + +toMontgomeryShort: + mov [rdi], rax + add rdi, 8 + push rsi + lea rsi, [R2] + movsx rdx, eax + cmp rdx, 0 + js negMontgomeryShort +posMontgomeryShort: + call rawMontgomeryMul1 + pop rsi + sub rdi, 8 + ret + +negMontgomeryShort: + neg rdx ; Do the multiplication positive and then negate the result. + call rawMontgomeryMul1 + mov rsi, rdi + call rawNegL + pop rsi + sub rdi, 8 + ret + + +toMontgomeryLong: + mov [rdi], rax + add rdi, 8 + push rsi + mov rdx, rdi + lea rsi, [R2] + call rawMontgomeryMul + pop rsi + sub rdi, 8 + +toMontgomery_doNothing: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number from Montgomery +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toNormal: + mov rax, [rdi] + btc rax, 62 ; check if montgomery + jnc toNormal_doNothing + bt rax, 63 ; if short, it means it's converted + jnc toNormal_doNothing + +toNormalLong: + mov [rdi], rax + add rdi, 8 + call rawFromMontgomery + sub rdi, 8 + +toNormal_doNothing: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; toLongNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to long normal +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +Fr_toLongNormal: + mov rax, [rdi] + bt rax, 62 ; check if montgomery + jc toLongNormal_fromMontgomery + bt rax, 63 ; check if long + jnc toLongNormal_fromShort + ret ; It is already long + +toLongNormal_fromMontgomery: + add rdi, 8 + call rawFromMontgomery + sub rdi, 8 + ret + +toLongNormal_fromShort: + mov r8, rsi ; save rsi + movsx rsi, eax + call rawCopyS2L + mov rsi, r8 ; recover rsi + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; add +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_add: + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is short first operand + jc add_l1 + bt rcx, 63 ; Check if is short second operand + jc add_s1l2 + +add_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + add edx, ecx + jo add_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + ret + +add_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + add rsi, rdx + call rawCopyS2L + pop rsi + ret + +add_l1: + bt rcx, 63 ; Check if is short second operand + jc add_l1l2 + +;;;;;;;; +add_l1s2: + bt rax, 62 ; check if montgomery first + jc add_l1ms2 +add_l1ns2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_1 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + ret +tmp_1: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + ret + + + +add_l1ms2: + bt rcx, 62 ; check if montgomery second + jc add_l1ms2m +add_l1ms2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toMontgomery + mov rdx, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + +add_l1ms2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;; +add_s1l2: + bt rcx, 62 ; check if montgomery first + jc add_s1l2m +add_s1l2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + cmp rdx, 0 + + jns tmp_2 + neg rdx + call rawSubLS + sub rdi, 8 + sub rsi, 8 + ret +tmp_2: + call rawAddLS + sub rdi, 8 + sub rsi, 8 + ret + + +add_s1l2m: + bt rax, 62 ; check if montgomery second + jc add_s1ml2m +add_s1nl2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toMontgomery + mov rdx, rsi + mov rsi, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + +add_s1ml2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + +;;;; +add_l1l2: + bt rax, 62 ; check if montgomery first + jc add_l1ml2 +add_l1nl2: + bt rcx, 62 ; check if montgomery second + jc add_l1nl2m +add_l1nl2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + +add_l1nl2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toMontgomery + mov rdx, rsi + mov rsi, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + +add_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc add_l1ml2m +add_l1ml2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toMontgomery + mov rdx, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + +add_l1ml2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawAddLL + sub rdi, 8 + sub rsi, 8 + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLL +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rsi <= Pointer to the long data of element 1 +; rdx <= Pointer to the long data of element 2 +; rdi <= Pointer to the long data of result +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawAddLL: + ; Add component by component with carry + + mov rax, [rsi + 0] + add rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + adc rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + adc rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + adc rax, [rdx + 24] + mov [rdi + 24], rax + + jc rawAddLL_sq ; if overflow, substract q + + ; Compare with q + + + cmp rax, [q + 24] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 16] + + cmp rax, [q + 16] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 8] + + cmp rax, [q + 8] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + + mov rax, [rdi + 0] + + cmp rax, [q + 0] + jc rawAddLL_done ; q is bigget so done. + jnz rawAddLL_sq ; q is lower + + ; If equal substract q +rawAddLL_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLL_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawAddLS +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of type long +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; rdx <= Value to be added +;;;;;;;;;;;;;;;;;;;;;; +rawAddLS: + ; Add component by component with carry + + add rdx, [rsi] + mov [rdi] ,rdx + + mov rdx, 0 + adc rdx, [rsi + 8] + mov [rdi + 8], rdx + + mov rdx, 0 + adc rdx, [rsi + 16] + mov [rdi + 16], rdx + + mov rdx, 0 + adc rdx, [rsi + 24] + mov [rdi + 24], rdx + + jc rawAddLS_sq ; if overflow, substract q + + ; Compare with q + + mov rax, [rdi + 24] + cmp rax, [q + 24] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 16] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 8] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + mov rax, [rdi + 0] + cmp rax, [q + 0] + jc rawAddLS_done ; q is bigget so done. + jnz rawAddLS_sq ; q is lower + + ; If equal substract q +rawAddLS_sq: + + mov rax, [q + 0] + sub [rdi + 0], rax + + mov rax, [q + 8] + sbb [rdi + 8], rax + + mov rax, [q + 16] + sbb [rdi + 16], rax + + mov rax, [q + 24] + sbb [rdi + 24], rax + +rawAddLS_done: + ret + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; sub +;;;;;;;;;;;;;;;;;;;;;; +; Substracts two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_sub: + mov rax, [rsi] + mov rcx, [rdx] + bt rax, 63 ; Check if is long first operand + jc sub_l1 + bt rcx, 63 ; Check if is long second operand + jc sub_s1l2 + +sub_s1s2: ; Both operands are short + + xor rdx, rdx + mov edx, eax + sub edx, ecx + jo sub_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rdx ; not necessary to adjust so just save and return + ret + +sub_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + movsx rdx, ecx + sub rsi, rdx + call rawCopyS2L + pop rsi + ret + +sub_l1: + bt rcx, 63 ; Check if is short second operand + jc sub_l1l2 + +;;;;;;;; +sub_l1s2: + bt rax, 62 ; check if montgomery first + jc sub_l1ms2 +sub_l1ns2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rsi, 8 + movsx rdx, ecx + add rdi, 8 + cmp rdx, 0 + + jns tmp_3 + neg rdx + call rawAddLS + sub rdi, 8 + sub rsi, 8 + ret +tmp_3: + call rawSubLS + sub rdi, 8 + sub rsi, 8 + ret + + +sub_l1ms2: + bt rcx, 62 ; check if montgomery second + jc sub_l1ms2m +sub_l1ms2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toMontgomery + mov rdx, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + +sub_l1ms2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;; +sub_s1l2: + bt rcx, 62 ; check if montgomery first + jc sub_s1l2m +sub_s1l2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp eax, 0 + + js tmp_4 + + ; First Operand is positive + push rsi + add rdi, 8 + movsx rsi, eax + add rdx, 8 + call rawSubSL + sub rdi, 8 + pop rsi + ret + +tmp_4: ; First operand is negative + push rsi + lea rsi, [rdx + 8] + movsx rdx, eax + add rdi, 8 + neg rdx + call rawNegLS + sub rdi, 8 + pop rsi + ret + + +sub_s1l2m: + bt rax, 62 ; check if montgomery second + jc sub_s1ml2m +sub_s1nl2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toMontgomery + mov rdx, rsi + mov rsi, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + +sub_s1ml2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + +;;;; +sub_l1l2: + bt rax, 62 ; check if montgomery first + jc sub_l1ml2 +sub_l1nl2: + bt rcx, 62 ; check if montgomery second + jc sub_l1nl2m +sub_l1nl2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + +sub_l1nl2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toMontgomery + mov rdx, rsi + mov rsi, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + +sub_l1ml2: + bt rcx, 62 ; check if montgomery seconf + jc sub_l1ml2m +sub_l1ml2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toMontgomery + mov rdx, rdi + pop rdi + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + +sub_l1ml2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawSubLL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a short element from the long element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 where will be substracted +; rdx <= Value to be substracted +; [rdi] = [rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLS: + ; Substract first digit + + mov rax, [rsi] + sub rax, rdx + mov [rdi] ,rax + mov rdx, 0 + + mov rax, [rsi + 8] + sbb rax, rdx + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, rdx + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, rdx + mov [rdi + 24], rax + + jnc rawSubLS_done ; if overflow, add q + + ; Add q +rawSubLS_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLS_done: + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubSL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Value from where will bo substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = rsi - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubSL: + ; Substract first digit + sub rsi, [rdx] + mov [rdi] ,rsi + + + mov rax, 0 + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, 0 + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, 0 + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubSL_done ; if overflow, add q + + ; Add q +rawSubSL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubSL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawSubLL +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element from a short element +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= Pointer to long of the value to be substracted +; +; [rdi] = [rsi] - [rdx] +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawSubLL: + ; Substract first digit + + mov rax, [rsi + 0] + sub rax, [rdx + 0] + mov [rdi + 0], rax + + mov rax, [rsi + 8] + sbb rax, [rdx + 8] + mov [rdi + 8], rax + + mov rax, [rsi + 16] + sbb rax, [rdx + 16] + mov [rdi + 16], rax + + mov rax, [rsi + 24] + sbb rax, [rdx + 24] + mov [rdi + 24], rax + + jnc rawSubLL_done ; if overflow, add q + + ; Add q +rawSubLL_aq: + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + +rawSubLL_done: + ret + +;;;;;;;;;;;;;;;;;;;;;; +; rawNegLS +;;;;;;;;;;;;;;;;;;;;;; +; Substracts a long element and a short element form 0 +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to long from where substracted +; rdx <= short value to be substracted too +; +; [rdi] = -[rsi] - rdx +; Modified Registers: +; rax +;;;;;;;;;;;;;;;;;;;;;; +rawNegLS: + mov rax, [q] + sub rax, rdx + mov [rdi], rax + + mov rax, [q + 8 ] + sbb rax, 0 + mov [rdi + 8], rax + + mov rax, [q + 16 ] + sbb rax, 0 + mov [rdi + 16], rax + + mov rax, [q + 24 ] + sbb rax, 0 + mov [rdi + 24], rax + + setc dl + + + mov rax, [rdi + 0 ] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [rdi + 8 ] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [rdi + 16 ] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [rdi + 24 ] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + + setc dh + or dl, dh + jz rawNegSL_done + + ; it is a negative value, so add q + + mov rax, [q + 0] + add [rdi + 0], rax + + mov rax, [q + 8] + adc [rdi + 8], rax + + mov rax, [q + 16] + adc [rdi + 16], rax + + mov rax, [q + 24] + adc [rdi + 24], rax + + +rawNegSL_done: + ret + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; neg +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element to be negated +; rdi <= Pointer to result +; [rdi] = -[rsi] +;;;;;;;;;;;;;;;;;;;;;; +Fr_neg: + mov rax, [rsi] + bt rax, 63 ; Check if is short first operand + jc neg_l + +neg_s: ; Operand is short + + neg eax + jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) + + mov [rdi], rax ; not necessary to adjust so just save and return + ret + +neg_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rsi, eax + neg rsi + call rawCopyS2L + pop rsi + ret + + + +neg_l: + mov [rdi], rax ; Copy the type + + add rdi, 8 + add rsi, 8 + call rawNegL + sub rdi, 8 + sub rsi, 8 + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; rawNeg +;;;;;;;;;;;;;;;;;;;;;; +; Negates a value +; Params: +; rdi <= Pointer to the long data of result +; rsi <= Pointer to the long data of element 1 +; +; [rdi] = - [rsi] +;;;;;;;;;;;;;;;;;;;;;; +rawNegL: + ; Compare is zero + + xor rax, rax + + cmp [rsi + 0], rax + jnz doNegate + + cmp [rsi + 8], rax + jnz doNegate + + cmp [rsi + 16], rax + jnz doNegate + + cmp [rsi + 24], rax + jnz doNegate + + ; it's zero so just set to zero + + mov [rdi + 0], rax + + mov [rdi + 8], rax + + mov [rdi + 16], rax + + mov [rdi + 24], rax + + ret +doNegate: + + mov rax, [q + 0] + sub rax, [rsi + 0] + mov [rdi + 0], rax + + mov rax, [q + 8] + sbb rax, [rsi + 8] + mov [rdi + 8], rax + + mov rax, [q + 16] + sbb rax, [rsi + 16] + mov [rdi + 16], rax + + mov rax, [q + 24] + sbb rax, [rsi + 24] + mov [rdi + 24], rax + + ret + + + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; square +;;;;;;;;;;;;;;;;;;;;;; +; Squares a field element +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rsi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_square: + mov r8, [rsi] + bt r8, 63 ; Check if is short first operand + jc square_l1 + +square_s1: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul eax + jo square_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +square_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + imul rax + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +square_l1: + bt r8, 62 ; check if montgomery first + jc square_l1m +square_l1n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + call rawMontgomerySquare + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call rawMontgomeryMul + sub rdi, 8 + pop rsi + + ret + +square_l1m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + call rawMontgomerySquare + sub rdi, 8 + sub rsi, 8 + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; mul +;;;;;;;;;;;;;;;;;;;;;; +; Multiplies two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rdi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_mul: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc mul_l1 + bt r9, 63 ; Check if is short second operand + jc mul_s1l2 + +mul_s1s2: ; Both operands are short + + xor rax, rax + mov eax, r8d + imul r9d + jo mul_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +mul_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + movsx rcx, r9d + imul rcx + mov rsi, rax + call rawCopyS2L + pop rsi + + ret + +mul_l1: + bt r9, 63 ; Check if is short second operand + jc mul_l1l2 + +;;;;;;;; +mul_l1s2: + bt r8, 62 ; check if montgomery first + jc mul_l1ms2 +mul_l1ns2: + bt r9, 62 ; check if montgomery first + jc mul_l1ns2m +mul_l1ns2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_5 + neg rdx + call rawMontgomeryMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_6 +tmp_5: + call rawMontgomeryMul1 + sub rdi, 8 + pop rsi +tmp_6: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call rawMontgomeryMul + sub rdi, 8 + pop rsi + + ret + + +mul_l1ns2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + + +mul_l1ms2: + bt r9, 62 ; check if montgomery second + jc mul_l1ms2m +mul_l1ms2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + push rsi + add rsi, 8 + movsx rdx, r9d + add rdi, 8 + cmp rdx, 0 + + jns tmp_7 + neg rdx + call rawMontgomeryMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_8 +tmp_7: + call rawMontgomeryMul1 + sub rdi, 8 + pop rsi +tmp_8: + + + ret + +mul_l1ms2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + + +;;;;;;;; +mul_s1l2: + bt r8, 62 ; check if montgomery first + jc mul_s1ml2 +mul_s1nl2: + bt r9, 62 ; check if montgomery first + jc mul_s1nl2m +mul_s1nl2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_9 + neg rdx + call rawMontgomeryMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_10 +tmp_9: + call rawMontgomeryMul1 + sub rdi, 8 + pop rsi +tmp_10: + + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call rawMontgomeryMul + sub rdi, 8 + pop rsi + + ret + +mul_s1nl2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + push rsi + lea rsi, [rdx + 8] + movsx rdx, r8d + add rdi, 8 + cmp rdx, 0 + + jns tmp_11 + neg rdx + call rawMontgomeryMul1 + mov rsi, rdi + call rawNegL + sub rdi, 8 + pop rsi + + jmp tmp_12 +tmp_11: + call rawMontgomeryMul1 + sub rdi, 8 + pop rsi +tmp_12: + + + ret + +mul_s1ml2: + bt r9, 62 ; check if montgomery first + jc mul_s1ml2m +mul_s1ml2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_s1ml2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + +;;;; +mul_l1l2: + bt r8, 62 ; check if montgomery first + jc mul_l1ml2 +mul_l1nl2: + bt r9, 62 ; check if montgomery second + jc mul_l1nl2m +mul_l1nl2n: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + + push rsi + add rdi, 8 + mov rsi, rdi + lea rdx, [R3] + call rawMontgomeryMul + sub rdi, 8 + pop rsi + + ret + +mul_l1nl2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2: + bt r9, 62 ; check if montgomery seconf + jc mul_l1ml2m +mul_l1ml2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + +mul_l1ml2m: + mov r11b, 0xC0 + shl r11, 56 + mov [rdi], r11 + + add rdi, 8 + add rsi, 8 + add rdx, 8 + call rawMontgomeryMul + sub rdi, 8 + sub rsi, 8 + + ret + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; band +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_band: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc and_l1 + bt r9, 63 ; Check if is short second operand + jc and_s1l2 + +and_s1s2: + + cmp r8d, 0 + + js tmp_13 + + cmp r9d, 0 + js tmp_13 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, r8d + and edx, r9d + mov [rdi], rdx ; not necessary to adjust so just save and return + ret + +tmp_13: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + + +and_l1: + bt r9, 63 ; Check if is short second operand + jc and_l1l2 + + +and_l1s2: + bt r8, 62 ; check if montgomery first + jc and_l1ms2 +and_l1ns2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp r9d, 0 + + js tmp_14 + movsx rax, r9d + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + ret + +tmp_14: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +and_l1ms2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push r9 ; r9 is used in montgomery so we need to save it + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + pop r9 + + cmp r9d, 0 + + js tmp_15 + movsx rax, r9d + and rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + and rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + and rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + ret + +tmp_15: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + +and_s1l2: + bt r9, 62 ; check if montgomery first + jc and_s1l2m +and_s1l2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp r8d, 0 + + js tmp_16 + movsx rax, r8d + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + +tmp_16: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +and_s1l2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push r8 ; r8 is used in montgomery so we need to save it + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + pop r8 + + cmp r8d, 0 + + js tmp_17 + movsx rax, r8d + and rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + +tmp_17: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + +and_l1l2: + bt r8, 62 ; check if montgomery first + jc and_l1ml2 + bt r9, 62 ; check if montgomery first + jc and_l1nl2m +and_l1nl2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +and_l1nl2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +and_l1ml2: + bt r9, 62 ; check if montgomery first + jc and_l1ml2m +and_l1ml2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +and_l1ml2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + + + mov rax, [rsi + 8] + and rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + and rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + and rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + and rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bor: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc or_l1 + bt r9, 63 ; Check if is short second operand + jc or_s1l2 + +or_s1s2: + + cmp r8d, 0 + + js tmp_18 + + cmp r9d, 0 + js tmp_18 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, r8d + or edx, r9d + mov [rdi], rdx ; not necessary to adjust so just save and return + ret + +tmp_18: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + + +or_l1: + bt r9, 63 ; Check if is short second operand + jc or_l1l2 + + +or_l1s2: + bt r8, 62 ; check if montgomery first + jc or_l1ms2 +or_l1ns2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp r9d, 0 + + js tmp_19 + movsx rax, r9d + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + ret + +tmp_19: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +or_l1ms2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push r9 ; r9 is used in montgomery so we need to save it + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + pop r9 + + cmp r9d, 0 + + js tmp_20 + movsx rax, r9d + or rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + or rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + or rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + ret + +tmp_20: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + +or_s1l2: + bt r9, 62 ; check if montgomery first + jc or_s1l2m +or_s1l2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp r8d, 0 + + js tmp_21 + movsx rax, r8d + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + +tmp_21: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +or_s1l2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push r8 ; r8 is used in montgomery so we need to save it + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + pop r8 + + cmp r8d, 0 + + js tmp_22 + movsx rax, r8d + or rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + +tmp_22: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + +or_l1l2: + bt r8, 62 ; check if montgomery first + jc or_l1ml2 + bt r9, 62 ; check if montgomery first + jc or_l1nl2m +or_l1nl2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +or_l1nl2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +or_l1ml2: + bt r9, 62 ; check if montgomery first + jc or_l1ml2m +or_l1ml2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +or_l1ml2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + + + mov rax, [rsi + 8] + or rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + or rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + or rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + or rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + +;;;;;;;;;;;;;;;;;;;;;; +; bxor +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bxor: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc xor_l1 + bt r9, 63 ; Check if is short second operand + jc xor_s1l2 + +xor_s1s2: + + cmp r8d, 0 + + js tmp_23 + + cmp r9d, 0 + js tmp_23 + xor rdx, rdx ; both ops are positive so do the op and return + mov edx, r8d + xor edx, r9d + mov [rdi], rdx ; not necessary to adjust so just save and return + ret + +tmp_23: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + + +xor_l1: + bt r9, 63 ; Check if is short second operand + jc xor_l1l2 + + +xor_l1s2: + bt r8, 62 ; check if montgomery first + jc xor_l1ms2 +xor_l1ns2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp r9d, 0 + + js tmp_24 + movsx rax, r9d + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + ret + +tmp_24: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +xor_l1ms2: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push r9 ; r9 is used in montgomery so we need to save it + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + pop r9 + + cmp r9d, 0 + + js tmp_25 + movsx rax, r9d + xor rax, [rsi +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rsi + 16]; + + mov [rdi + 16 ], rax; + + xor rax, rax + xor rax, [rsi + 24]; + + mov [rdi + 24 ], rax; + + xor rax, rax + xor rax, [rsi + 32]; + + and rax, [lboMask] ; + + mov [rdi + 32 ], rax; + + ret + +tmp_25: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + +xor_s1l2: + bt r9, 62 ; check if montgomery first + jc xor_s1l2m +xor_s1l2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + cmp r8d, 0 + + js tmp_26 + movsx rax, r8d + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + +tmp_26: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +xor_s1l2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push r8 ; r8 is used in montgomery so we need to save it + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + pop r8 + + cmp r8d, 0 + + js tmp_27 + movsx rax, r8d + xor rax, [rdx +8] + mov [rdi+8], rax + + xor rax, rax + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + xor rax, rax + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + xor rax, rax + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + +tmp_27: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + + +xor_l1l2: + bt r8, 62 ; check if montgomery first + jc xor_l1ml2 + bt r9, 62 ; check if montgomery first + jc xor_l1nl2m +xor_l1nl2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +xor_l1nl2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +xor_l1ml2: + bt r9, 62 ; check if montgomery first + jc xor_l1ml2m +xor_l1ml2n: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + +xor_l1ml2m: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bnot: + mov r11b, 0x80 + shl r11, 56 + mov [rdi], r11 + + mov r8, [rsi] + bt r8, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + jmp bnot_l1n + +bnot_l1: + bt r8, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + +bnot_l1n: + + mov rax, [rsi + 8] + not rax + + mov [rdi + 8], rax + + mov rax, [rsi + 16] + not rax + + mov [rdi + 16], rax + + mov rax, [rsi + 24] + not rax + + mov [rdi + 24], rax + + mov rax, [rsi + 32] + not rax + + and rax, [lboMask] + + mov [rdi + 32], rax + + ret + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; eq +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_eq: + sub rsp, 40 ; Save space for the result of the substraction + push rdi ; Save rdi + lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 + call Fr_sub ; Do a substraction + call Fr_toNormal ; Convert it to normal + pop rdi + + mov rax, [rsp] ; We already poped do no need to add 8 + bt rax, 63 ; check is result is long + jc eq_longCmp + +eq_shortCmp: + cmp eax, 0 + je eq_s_eq + js eq_s_lt +eq_s_gt: + + mov qword [rdi], 0 + add rsp, 40 + ret + +eq_s_lt: + + mov qword [rdi], 0 + add rsp, 40 + ret + +eq_s_eq: + + mov qword [rdi], 1 + add rsp, 40 + ret + + +eq_longCmp: + + + cmp qword [rsp + 32], 0 + jnz eq_neq + + cmp qword [rsp + 24], 0 + jnz eq_neq + + cmp qword [rsp + 16], 0 + jnz eq_neq + + cmp qword [rsp + 8], 0 + jnz eq_neq + +eq_eq: + + mov qword [rdi], 1 + add rsp, 40 + ret + +eq_neq: + + mov qword [rdi], 0 + add rsp, 40 + ret + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; neq +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_neq: + sub rsp, 40 ; Save space for the result of the substraction + push rdi ; Save rdi + lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 + call Fr_sub ; Do a substraction + call Fr_toNormal ; Convert it to normal + pop rdi + + mov rax, [rsp] ; We already poped do no need to add 8 + bt rax, 63 ; check is result is long + jc neq_longCmp + +neq_shortCmp: + cmp eax, 0 + je neq_s_eq + js neq_s_lt +neq_s_gt: + + mov qword [rdi], 1 + add rsp, 40 + ret + +neq_s_lt: + + mov qword [rdi], 1 + add rsp, 40 + ret + +neq_s_eq: + + mov qword [rdi], 0 + add rsp, 40 + ret + + +neq_longCmp: + + + cmp qword [rsp + 32], 0 + jnz neq_neq + + cmp qword [rsp + 24], 0 + jnz neq_neq + + cmp qword [rsp + 16], 0 + jnz neq_neq + + cmp qword [rsp + 8], 0 + jnz neq_neq + +neq_eq: + + mov qword [rdi], 0 + add rsp, 40 + ret + +neq_neq: + + mov qword [rdi], 1 + add rsp, 40 + ret + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; lt +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_lt: + sub rsp, 40 ; Save space for the result of the substraction + push rdi ; Save rdi + lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 + call Fr_sub ; Do a substraction + call Fr_toNormal ; Convert it to normal + pop rdi + + mov rax, [rsp] ; We already poped do no need to add 8 + bt rax, 63 ; check is result is long + jc lt_longCmp + +lt_shortCmp: + cmp eax, 0 + je lt_s_eq + js lt_s_lt +lt_s_gt: + + mov qword [rdi], 0 + add rsp, 40 + ret + +lt_s_lt: + + mov qword [rdi], 1 + add rsp, 40 + ret + +lt_s_eq: + + mov qword [rdi], 0 + add rsp, 40 + ret + + +lt_longCmp: + + + cmp qword [rsp + 32], 0 + jnz lt_neq + + cmp qword [rsp + 24], 0 + jnz lt_neq + + cmp qword [rsp + 16], 0 + jnz lt_neq + + cmp qword [rsp + 8], 0 + jnz lt_neq + +lt_eq: + + + + mov qword [rdi], 0 + add rsp, 40 + ret + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_29 ; half e1-e2 is neg => e1 < e2 + jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_29 ; half e1-e2 is neg => e1 < e2 + jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_29 ; half e1-e2 is neg => e1 < e2 + jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_29 ; half e1-e2 is neg => e1 < e2 + jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_28: + + mov qword [rdi], 0 + add rsp, 40 + ret + +tmp_29: + + mov qword [rdi], 1 + add rsp, 40 + ret + + +lt_neq: + + + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_31 ; half e1-e2 is neg => e1 < e2 + jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_31 ; half e1-e2 is neg => e1 < e2 + jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_31 ; half e1-e2 is neg => e1 < e2 + jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_31 ; half e1-e2 is neg => e1 < e2 + jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_30: + + mov qword [rdi], 0 + add rsp, 40 + ret + +tmp_31: + + mov qword [rdi], 1 + add rsp, 40 + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; gt +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_gt: + sub rsp, 40 ; Save space for the result of the substraction + push rdi ; Save rdi + lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 + call Fr_sub ; Do a substraction + call Fr_toNormal ; Convert it to normal + pop rdi + + mov rax, [rsp] ; We already poped do no need to add 8 + bt rax, 63 ; check is result is long + jc gt_longCmp + +gt_shortCmp: + cmp eax, 0 + je gt_s_eq + js gt_s_lt +gt_s_gt: + + mov qword [rdi], 1 + add rsp, 40 + ret + +gt_s_lt: + + mov qword [rdi], 0 + add rsp, 40 + ret + +gt_s_eq: + + mov qword [rdi], 0 + add rsp, 40 + ret + + +gt_longCmp: + + + cmp qword [rsp + 32], 0 + jnz gt_neq + + cmp qword [rsp + 24], 0 + jnz gt_neq + + cmp qword [rsp + 16], 0 + jnz gt_neq + + cmp qword [rsp + 8], 0 + jnz gt_neq + +gt_eq: + + + + mov qword [rdi], 0 + add rsp, 40 + ret + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_33 ; half e1-e2 is neg => e1 < e2 + jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_33 ; half e1-e2 is neg => e1 < e2 + jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_33 ; half e1-e2 is neg => e1 < e2 + jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_33 ; half e1-e2 is neg => e1 < e2 + jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_32: + + mov qword [rdi], 1 + add rsp, 40 + ret + +tmp_33: + + mov qword [rdi], 0 + add rsp, 40 + ret + + +gt_neq: + + + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_35 ; half e1-e2 is neg => e1 < e2 + jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_35 ; half e1-e2 is neg => e1 < e2 + jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_35 ; half e1-e2 is neg => e1 < e2 + jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_35 ; half e1-e2 is neg => e1 < e2 + jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_34: + + mov qword [rdi], 1 + add rsp, 40 + ret + +tmp_35: + + mov qword [rdi], 0 + add rsp, 40 + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; leq +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_leq: + sub rsp, 40 ; Save space for the result of the substraction + push rdi ; Save rdi + lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 + call Fr_sub ; Do a substraction + call Fr_toNormal ; Convert it to normal + pop rdi + + mov rax, [rsp] ; We already poped do no need to add 8 + bt rax, 63 ; check is result is long + jc leq_longCmp + +leq_shortCmp: + cmp eax, 0 + je leq_s_eq + js leq_s_lt +leq_s_gt: + + mov qword [rdi], 0 + add rsp, 40 + ret + +leq_s_lt: + + mov qword [rdi], 1 + add rsp, 40 + ret + +leq_s_eq: + + mov qword [rdi], 1 + add rsp, 40 + ret + + +leq_longCmp: + + + cmp qword [rsp + 32], 0 + jnz leq_neq + + cmp qword [rsp + 24], 0 + jnz leq_neq + + cmp qword [rsp + 16], 0 + jnz leq_neq + + cmp qword [rsp + 8], 0 + jnz leq_neq + +leq_eq: + + + + mov qword [rdi], 1 + add rsp, 40 + ret + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_37 ; half e1-e2 is neg => e1 < e2 + jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_37 ; half e1-e2 is neg => e1 < e2 + jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_37 ; half e1-e2 is neg => e1 < e2 + jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_37 ; half e1-e2 is neg => e1 < e2 + jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_36: + + mov qword [rdi], 0 + add rsp, 40 + ret + +tmp_37: + + mov qword [rdi], 1 + add rsp, 40 + ret + + +leq_neq: + + + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_39 ; half e1-e2 is neg => e1 < e2 + jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_39 ; half e1-e2 is neg => e1 < e2 + jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_39 ; half e1-e2 is neg => e1 < e2 + jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_39 ; half e1-e2 is neg => e1 < e2 + jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_38: + + mov qword [rdi], 0 + add rsp, 40 + ret + +tmp_39: + + mov qword [rdi], 1 + add rsp, 40 + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; geq +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_geq: + sub rsp, 40 ; Save space for the result of the substraction + push rdi ; Save rdi + lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 + call Fr_sub ; Do a substraction + call Fr_toNormal ; Convert it to normal + pop rdi + + mov rax, [rsp] ; We already poped do no need to add 8 + bt rax, 63 ; check is result is long + jc geq_longCmp + +geq_shortCmp: + cmp eax, 0 + je geq_s_eq + js geq_s_lt +geq_s_gt: + + mov qword [rdi], 1 + add rsp, 40 + ret + +geq_s_lt: + + mov qword [rdi], 0 + add rsp, 40 + ret + +geq_s_eq: + + mov qword [rdi], 1 + add rsp, 40 + ret + + +geq_longCmp: + + + cmp qword [rsp + 32], 0 + jnz geq_neq + + cmp qword [rsp + 24], 0 + jnz geq_neq + + cmp qword [rsp + 16], 0 + jnz geq_neq + + cmp qword [rsp + 8], 0 + jnz geq_neq + +geq_eq: + + + + mov qword [rdi], 1 + add rsp, 40 + ret + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_41 ; half e1-e2 is neg => e1 < e2 + jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_41 ; half e1-e2 is neg => e1 < e2 + jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_41 ; half e1-e2 is neg => e1 < e2 + jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_41 ; half e1-e2 is neg => e1 < e2 + jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_40: + + mov qword [rdi], 1 + add rsp, 40 + ret + +tmp_41: + + mov qword [rdi], 0 + add rsp, 40 + ret + + +geq_neq: + + + + + + + + + mov rax, [rsp + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc tmp_43 ; half e1-e2 is neg => e1 < e2 + jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc tmp_43 ; half e1-e2 is neg => e1 < e2 + jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc tmp_43 ; half e1-e2 is neg => e1 < e2 + jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2 + + mov rax, [rsp + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc tmp_43 ; half e1-e2 is neg => e1 < e2 + jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2 + + ; half == rax => e1-e2 is pos => e1 > e2 +tmp_42: + + mov qword [rdi], 1 + add rsp, 40 + ret + +tmp_43: + + mov qword [rdi], 0 + add rsp, 40 + ret + + + + + + + + + + + + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; land +;;;;;;;;;;;;;;;;;;;;;; +; Logical and between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_land: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_44 + + test eax, eax + jz retZero_46 + jmp retOne_45 + +tmp_44: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_45 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_45 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_45 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_45 + + +retZero_46: + mov qword r8, 0 + jmp done_47 + +retOne_45: + mov qword r8, 1 + +done_47: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_48 + + test eax, eax + jz retZero_50 + jmp retOne_49 + +tmp_48: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_49 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_49 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_49 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_49 + + +retZero_50: + mov qword rcx, 0 + jmp done_51 + +retOne_49: + mov qword rcx, 1 + +done_51: + + and rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lor +;;;;;;;;;;;;;;;;;;;;;; +; Logical or between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lor: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_52 + + test eax, eax + jz retZero_54 + jmp retOne_53 + +tmp_52: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_53 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_53 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_53 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_53 + + +retZero_54: + mov qword r8, 0 + jmp done_55 + +retOne_53: + mov qword r8, 1 + +done_55: + + + + + + + + mov rax, [rdx] + bt rax, 63 + jc tmp_56 + + test eax, eax + jz retZero_58 + jmp retOne_57 + +tmp_56: + + mov rax, [rdx + 8] + test rax, rax + jnz retOne_57 + + mov rax, [rdx + 16] + test rax, rax + jnz retOne_57 + + mov rax, [rdx + 24] + test rax, rax + jnz retOne_57 + + mov rax, [rdx + 32] + test rax, rax + jnz retOne_57 + + +retZero_58: + mov qword rcx, 0 + jmp done_59 + +retOne_57: + mov qword rcx, 1 + +done_59: + + or rcx, r8 + mov [rdi], rcx + ret + + +;;;;;;;;;;;;;;;;;;;;;; +; lnot +;;;;;;;;;;;;;;;;;;;;;; +; Do the logical not of an element +; Params: +; rsi <= Pointer to element to be tested +; rdi <= Pointer to result one if element1 is zero and zero otherwise +; Modified Registers: +; rax, rax, r8 +;;;;;;;;;;;;;;;;;;;;;; +Fr_lnot: + + + + + + + mov rax, [rsi] + bt rax, 63 + jc tmp_60 + + test eax, eax + jz retZero_62 + jmp retOne_61 + +tmp_60: + + mov rax, [rsi + 8] + test rax, rax + jnz retOne_61 + + mov rax, [rsi + 16] + test rax, rax + jnz retOne_61 + + mov rax, [rsi + 24] + test rax, rax + jnz retOne_61 + + mov rax, [rsi + 32] + test rax, rax + jnz retOne_61 + + +retZero_62: + mov qword rcx, 0 + jmp done_63 + +retOne_61: + mov qword rcx, 1 + +done_63: + + test rcx, rcx + + jz lnot_retOne +lnot_retZero: + mov qword [rdi], 0 + ret +lnot_retOne: + mov qword [rdi], 1 + ret + + + + + section .data +Fr_q: + dd 0 + dd 0x80000000 +q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029 +half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014 +R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5 +R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c +lboMask dq 0x1fffffffffffffff + diff --git a/c/buildasm/fr.asm.ejs b/c/buildasm/fr.asm.ejs index 16066d1..83c76a9 100644 --- a/c/buildasm/fr.asm.ejs +++ b/c/buildasm/fr.asm.ejs @@ -1,5 +1,7 @@ + global <%=name%>_copy + global <%=name%>_copyn global <%=name%>_add global <%=name%>_sub global <%=name%>_neg @@ -19,6 +21,7 @@ global <%=name%>_lor global <%=name%>_lnot global <%=name%>_toNormal + global <%=name%>_toLongNormal global <%=name%>_toMontgomery global <%=name%>_q DEFAULT REL diff --git a/c/buildasm/fr.c b/c/buildasm/fr.c new file mode 100644 index 0000000..37daa3a --- /dev/null +++ b/c/buildasm/fr.c @@ -0,0 +1,210 @@ +#include "fr.h" +#include +#include +#include +#include + +mpz_t q; +mpz_t zero; +mpz_t one; +mpz_t mask; +size_t nBits; + + +void Fr_toMpz(mpz_t r, PFrElement pE) { + Fr_toNormal(pE); + if (!(pE->type & Fr_LONG)) { + mpz_set_si(r, pE->shortVal); + if (pE->shortVal<0) { + mpz_add(r, r, q); + } + } else { + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)pE->longVal); + } +} + +void Fr_fromMpz(PFrElement pE, mpz_t v) { + if (mpz_fits_sint_p(v)) { + pE->type = Fr_SHORT; + pE->shortVal = mpz_get_si(v); + } else { + pE->type = Fr_LONG; + for (int i=0; ilongVal[i] = 0; + mpz_export((void *)(pE->longVal), NULL, -1, 8, -1, 0, v); + } +} + + +void Fr_init() { + mpz_init(q); + mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_init_set_ui(zero, 0); + mpz_init_set_ui(one, 1); + nBits = mpz_sizeinbase (q, 2); + mpz_init(mask); + mpz_mul_2exp(mask, one, nBits-1); + mpz_sub(mask, mask, one); + +} + +void Fr_str2element(PFrElement pE, char const *s) { + mpz_t mr; + mpz_init_set_str(mr, s, 10); + Fr_fromMpz(pE, mr); +} + +char *Fr_element2str(PFrElement pE) { + mpz_t r; + if (!(pE->type & Fr_LONG)) { + if (pE->shortVal>=0) { + char *r = new char[32]; + sprintf(r, "%d", pE->shortVal); + return r; + } else { + mpz_init_set_si(r, pE->shortVal); + mpz_add(r, r, q); + mpz_clear(q); + } + } else { + Fr_toNormal(pE); + mpz_init(r); + mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)pE->longVal); + } + char *res = mpz_get_str (0, 10, r); + mpz_clear(r); + return res; +} + +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + // char *s1 = mpz_get_str (0, 10, ma); + // printf("s1 %s\n", s1); + Fr_toMpz(mb, b); + // char *s2 = mpz_get_str (0, 10, mb); + // printf("s2 %s\n", s2); + mpz_fdiv_q(mr, ma, mb); + // char *sr = mpz_get_str (0, 10, mr); + // printf("r %s\n", sr); + Fr_fromMpz(r, mr); +} + +void Fr_mod(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_fdiv_r(mr, ma, mb); + Fr_fromMpz(r, mr); +} + +void Fr_shl(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + if (mpz_cmp_ui(mb, nBits) >= 0) { + mpz_set(mr, zero); + } else { + mpz_mul_2exp(mr, ma, mpz_get_ui(mb)); + mpz_and(mr, mr, mask); + } + Fr_fromMpz(r, mr); +} + +void Fr_shr(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + if (mpz_cmp_ui(mb, nBits) >= 0) { + mpz_set(mr, zero); + } else { + mpz_tdiv_q_2exp(mr, ma, mpz_get_ui(mb)); + mpz_and(mr, mr, mask); + } + Fr_fromMpz(r, mr); +} + + +void Fr_pow(PFrElement r, PFrElement a, PFrElement b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + Fr_toMpz(ma, a); + Fr_toMpz(mb, b); + mpz_powm(mr, ma, mb, q); + Fr_fromMpz(r, mr); +} + +void Fr_inv(PFrElement r, PFrElement a) { + mpz_t ma; + mpz_t mr; + mpz_init(ma); + mpz_init(mr); + + Fr_toMpz(ma, a); + mpz_invert(mr, ma, q); + Fr_fromMpz(r, mr); +} + +void Fr_div(PFrElement r, PFrElement a, PFrElement b) { + FrElement tmp; + Fr_inv(&tmp, b); + Fr_mul(r, a, &tmp); +} + +int Fr_isTrue(PFrElement pE) { + if (!(pE->type & Fr_LONG)) return pE->shortVal != 0; + for (int i=0; i< Fr_N64; i++) { + if (pE->longVal[i]) return 1; + } + return 0; +} + +int Fr_toInt(PFrElement pE) { + Fr_toNormal(pE); + if (!(pE->type & Fr_LONG)) { + return pE->shortVal; + } else { + mpz_t ma; + mpz_init(ma); + Fr_toMpz(ma, pE); + if (mpz_fits_sint_p(ma)) { + return mpz_get_si(ma); + } + + mpz_sub(ma, ma, q); + if (mpz_fits_sint_p(ma)) { + return mpz_get_si(ma); + } else { + assert(false); + } + } +} + diff --git a/c/buildasm/fr.c.ejs b/c/buildasm/fr.c.ejs index 5ff7cfe..4ab9200 100644 --- a/c/buildasm/fr.c.ejs +++ b/c/buildasm/fr.c.ejs @@ -2,49 +2,20 @@ #include #include #include +#include -void <%=name%>_str2element(P<%=name%>Element pE, char *s) { - mpz_t r; - mpz_init(r); - mpz_set_str(r, s, 10); - pE->type = <%=name%>_LONG; - for (int i=0; i<<%=name%>_N64; i++) pE->longVal[i] = 0; - mpz_export((void *)pE->longVal, NULL, -1, 8, -1, 0, r); -} +mpz_t q; +mpz_t zero; +mpz_t one; +mpz_t mask; +size_t nBits; -char *<%=name%>_element2str(P<%=name%>Element pE) { - mpz_t r; - mpz_t q; - if (!(pE->type & <%=name%>_LONG)) { - if (pE->shortVal>=0) { - char *r = new char[32]; - sprintf(r, "%d", pE->shortVal); - return r; - } else { - mpz_init(q); - mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); - mpz_init_set_si(r, pE->shortVal); - mpz_add(r, r, q); - mpz_clear(q); - } - } else { - <%=name%>_toNormal(pE); - mpz_init(r); - mpz_import(r, <%=name%>_N64, -1, 8, -1, 0, (const void *)pE->longVal); - } - char *res = mpz_get_str (0, 10, r); - mpz_clear(r); - return res; -} void <%=name%>_toMpz(mpz_t r, P<%=name%>Element pE) { - mpz_t q; <%=name%>_toNormal(pE); if (!(pE->type & <%=name%>_LONG)) { mpz_set_si(r, pE->shortVal); if (pE->shortVal<0) { - mpz_init(q); - mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); mpz_add(r, r, q); } } else { @@ -63,6 +34,47 @@ void <%=name%>_fromMpz(P<%=name%>Element pE, mpz_t v) { } } + +void <%=name%>_init() { + mpz_init(q); + mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_init_set_ui(zero, 0); + mpz_init_set_ui(one, 1); + nBits = mpz_sizeinbase (q, 2); + mpz_init(mask); + mpz_mul_2exp(mask, one, nBits-1); + mpz_sub(mask, mask, one); + +} + +void <%=name%>_str2element(P<%=name%>Element pE, char const *s) { + mpz_t mr; + mpz_init_set_str(mr, s, 10); + <%=name%>_fromMpz(pE, mr); +} + +char *<%=name%>_element2str(P<%=name%>Element pE) { + mpz_t r; + if (!(pE->type & <%=name%>_LONG)) { + if (pE->shortVal>=0) { + char *r = new char[32]; + sprintf(r, "%d", pE->shortVal); + return r; + } else { + mpz_init_set_si(r, pE->shortVal); + mpz_add(r, r, q); + mpz_clear(q); + } + } else { + <%=name%>_toNormal(pE); + mpz_init(r); + mpz_import(r, <%=name%>_N64, -1, 8, -1, 0, (const void *)pE->longVal); + } + char *res = mpz_get_str (0, 10, r); + mpz_clear(r); + return res; +} + void <%=name%>_idiv(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { mpz_t ma; mpz_t mb; @@ -83,15 +95,78 @@ void <%=name%>_idiv(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element <%=name%>_fromMpz(r, mr); } -void <%=name%>_inv(P<%=name%>Element r, P<%=name%>Element a) { +void <%=name%>_mod(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { mpz_t ma; + mpz_t mb; mpz_t mr; - mpz_t q; mpz_init(ma); + mpz_init(mb); mpz_init(mr); - mpz_init(q); - mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + <%=name%>_toMpz(ma, a); + <%=name%>_toMpz(mb, b); + mpz_fdiv_r(mr, ma, mb); + <%=name%>_fromMpz(r, mr); +} + +void <%=name%>_shl(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + <%=name%>_toMpz(ma, a); + <%=name%>_toMpz(mb, b); + if (mpz_cmp_ui(mb, nBits) >= 0) { + mpz_set(mr, zero); + } else { + mpz_mul_2exp(mr, ma, mpz_get_ui(mb)); + mpz_and(mr, mr, mask); + } + <%=name%>_fromMpz(r, mr); +} + +void <%=name%>_shr(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + <%=name%>_toMpz(ma, a); + <%=name%>_toMpz(mb, b); + if (mpz_cmp_ui(mb, nBits) >= 0) { + mpz_set(mr, zero); + } else { + mpz_tdiv_q_2exp(mr, ma, mpz_get_ui(mb)); + mpz_and(mr, mr, mask); + } + <%=name%>_fromMpz(r, mr); +} + + +void <%=name%>_pow(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + <%=name%>_toMpz(ma, a); + <%=name%>_toMpz(mb, b); + mpz_powm(mr, ma, mb, q); + <%=name%>_fromMpz(r, mr); +} + +void <%=name%>_inv(P<%=name%>Element r, P<%=name%>Element a) { + mpz_t ma; + mpz_t mr; + mpz_init(ma); + mpz_init(mr); <%=name%>_toMpz(ma, a); mpz_invert(mr, ma, q); @@ -104,3 +179,32 @@ void <%=name%>_div(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b <%=name%>_mul(r, a, &tmp); } +int <%=name%>_isTrue(P<%=name%>Element pE) { + if (!(pE->type & <%=name%>_LONG)) return pE->shortVal != 0; + for (int i=0; i< <%=name%>_N64; i++) { + if (pE->longVal[i]) return 1; + } + return 0; +} + +int <%=name%>_toInt(P<%=name%>Element pE) { + Fr_toNormal(pE); + if (!(pE->type & <%=name%>_LONG)) { + return pE->shortVal; + } else { + mpz_t ma; + mpz_init(ma); + <%=name%>_toMpz(ma, pE); + if (mpz_fits_sint_p(ma)) { + return mpz_get_si(ma); + } + + mpz_sub(ma, ma, q); + if (mpz_fits_sint_p(ma)) { + return mpz_get_si(ma); + } else { + assert(false); + } + } +} + diff --git a/c/buildasm/fr.h b/c/buildasm/fr.h new file mode 100644 index 0000000..6bb0760 --- /dev/null +++ b/c/buildasm/fr.h @@ -0,0 +1,60 @@ +#ifndef __FR_H +#define __FR_H + +#include +#define Fr_N64 4 +#define Fr_SHORT 0x00000000 +#define Fr_LONG 0x80000000 +#define Fr_LONGMONTGOMERY 0xC0000000 +typedef struct __attribute__((__packed__)) { + int32_t shortVal; + u_int32_t type; + u_int64_t longVal[Fr_N64]; +} FrElement; +typedef FrElement *PFrElement; +extern FrElement Fr_q; +extern "C" void Fr_copy(PFrElement r, PFrElement a); +extern "C" void Fr_copyn(PFrElement r, PFrElement a, int n); +extern "C" void Fr_add(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_sub(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neg(PFrElement r, PFrElement a); +extern "C" void Fr_mul(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_square(PFrElement r, PFrElement a); +extern "C" void Fr_band(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bxor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_bnot(PFrElement r, PFrElement a); +extern "C" void Fr_eq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_neq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_gt(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_leq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_geq(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_land(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lor(PFrElement r, PFrElement a, PFrElement b); +extern "C" void Fr_lnot(PFrElement r, PFrElement a); +extern "C" void Fr_toNormal(PFrElement pE); +extern "C" void Fr_toLongNormal(PFrElement pE); +extern "C" void Fr_toMontgomery(PFrElement pE); +void Fr_str2element(PFrElement pE, char const*s); +char *Fr_element2str(PFrElement pE); +void Fr_idiv(PFrElement r, PFrElement a, PFrElement b); +void Fr_mod(PFrElement r, PFrElement a, PFrElement b); +void Fr_inv(PFrElement r, PFrElement a); +void Fr_div(PFrElement r, PFrElement a, PFrElement b); +void Fr_shl(PFrElement r, PFrElement a, PFrElement b); +void Fr_shr(PFrElement r, PFrElement a, PFrElement b); +void Fr_pow(PFrElement r, PFrElement a, PFrElement b); + +int Fr_isTrue(PFrElement pE); +int Fr_toInt(PFrElement pE); + +void Fr_init(); + +extern FrElement Fr_q; + + +#endif // __FR_H + + + diff --git a/c/buildasm/fr.h.ejs b/c/buildasm/fr.h.ejs index 3352b8f..ad5fd0f 100644 --- a/c/buildasm/fr.h.ejs +++ b/c/buildasm/fr.h.ejs @@ -1,3 +1,6 @@ +#ifndef __<%=name.toUpperCase()%>_H +#define __<%=name.toUpperCase()%>_H + #include #define <%=name%>_N64 <%= n64 %> #define <%=name%>_SHORT 0x00000000 @@ -10,6 +13,8 @@ typedef struct __attribute__((__packed__)) { } <%=name%>Element; typedef <%=name%>Element *P<%=name%>Element; extern <%=name%>Element <%=name%>_q; +extern "C" void <%=name%>_copy(P<%=name%>Element r, P<%=name%>Element a); +extern "C" void <%=name%>_copyn(P<%=name%>Element r, P<%=name%>Element a, int n); extern "C" void <%=name%>_add(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_sub(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_neg(P<%=name%>Element r, P<%=name%>Element a); @@ -29,12 +34,27 @@ extern "C" void <%=name%>_land(P<%=name%>Element r, P<%=name%>Element a, P<%=nam extern "C" void <%=name%>_lor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_lnot(P<%=name%>Element r, P<%=name%>Element a); extern "C" void <%=name%>_toNormal(P<%=name%>Element pE); +extern "C" void <%=name%>_toLongNormal(P<%=name%>Element pE); extern "C" void <%=name%>_toMontgomery(P<%=name%>Element pE); -void <%=name%>_str2element(P<%=name%>Element pE, char *s); +void <%=name%>_str2element(P<%=name%>Element pE, char const*s); char *<%=name%>_element2str(P<%=name%>Element pE); void <%=name%>_idiv(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_mod(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); void <%=name%>_inv(P<%=name%>Element r, P<%=name%>Element a); void <%=name%>_div(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_shl(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_shr(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_pow(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + +int <%=name%>_isTrue(P<%=name%>Element pE); +int <%=name%>_toInt(P<%=name%>Element pE); + +void <%=name%>_init(); extern <%=name%>Element <%=name%>_q; + +#endif // __<%=name.toUpperCase()%>_H + + + diff --git a/c/buildasm/fr.o b/c/buildasm/fr.o new file mode 100644 index 0000000..3255705 Binary files /dev/null and b/c/buildasm/fr.o differ diff --git a/c/buildasm/main.c b/c/buildasm/main.c index 62d3123..7eb607f 100644 --- a/c/buildasm/main.c +++ b/c/buildasm/main.c @@ -1,4 +1,5 @@ -#include "stdio.h" +#include +#include #include "fr.h" int main() { @@ -9,16 +10,39 @@ int main() { /* FrElement a={0x43e1f593f0000000ULL,0x2833e84879b97091ULL,0xb85045b68181585dULL,0x30644e72e131a029ULL}; FrElement b = {3,0,0,0}; -*/ - FrElement c; + FrElement c; +*/ // Fr_add(&(c[0]), a, a); // Fr_add(&(c[0]), c, b); +/* for (int i=0; i<1000000000; i++) { Fr_mul(&c, &a, &b); } Fr_mul(&c,&a, &b); - printf("%llu, %llu, %llu, %llu\n", c.longVal[0], c.longVal[1], c.longVal[2], c.longVal[3]); +*/ + + FrElement a1[10]; + FrElement a2[10]; + for (int i=0; i<10; i++) { + a1[i].type = Fr_LONGMONTGOMERY; + a1[i].shortVal =0; + for (int j=0; j_toNormal: mov rax, [rdi] btc rax, 62 ; check if montgomery - jnc fromMontgomery_doNothing + jnc toNormal_doNothing bt rax, 63 ; if short, it means it's converted - jnc fromMontgomery_doNothing + jnc toNormal_doNothing -fromMontgomeryLong: +toNormalLong: mov [rdi], rax add rdi, 8 call rawFromMontgomery sub rdi, 8 -fromMontgomery_doNothing: +toNormal_doNothing: ret +;;;;;;;;;;;;;;;;;;;;;; +; toLongNormal +;;;;;;;;;;;;;;;;;;;;;; +; Convert a number to long normal +; rdi <= Pointer element to convert +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;; +<%=name%>_toLongNormal: + mov rax, [rdi] + bt rax, 62 ; check if montgomery + jc toLongNormal_fromMontgomery + bt rax, 63 ; check if long + jnc toLongNormal_fromShort + ret ; It is already long + +toLongNormal_fromMontgomery: + add rdi, 8 + call rawFromMontgomery + sub rdi, 8 + ret + +toLongNormal_fromShort: + mov r8, rsi ; save rsi + movsx rsi, eax + call rawCopyS2L + mov rsi, r8 ; recover rsi diff --git a/c/calcwit.cpp b/c/calcwit.cpp index 0c50fe5..04c77a1 100644 --- a/c/calcwit.cpp +++ b/c/calcwit.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include "calcwit.h" @@ -21,25 +20,16 @@ Circom_CalcWit::Circom_CalcWit(Circom_Circuit *aCircuit) { mutexes = new std::mutex[NMUTEXES]; cvs = new std::condition_variable[NMUTEXES]; inputSignalsToTrigger = new int[circuit->NComponents]; - signalValues = new BigInt[circuit->NSignals]; + signalValues = new FrElement[circuit->NSignals]; // Set one signal - mpz_init_set_ui(signalValues[0], 1); - - // Initialize remaining signals - for (int i=1; iNSignals; i++) mpz_init2(signalValues[i], 256); - - BigInt p; - mpz_init_set_str(p, circuit->P, 10); - field = new ZqField(&p); - mpz_clear(p); + Fr_copy(&signalValues[0], circuit->constants + 1); reset(); } Circom_CalcWit::~Circom_CalcWit() { - delete field; #ifdef SANITY_CHECK delete signalAssigned; @@ -48,8 +38,6 @@ Circom_CalcWit::~Circom_CalcWit() { delete[] cvs; delete[] mutexes; - for (int i=0; iNSignals; i++) mpz_clear(signalValues[i]); - delete[] signalValues; delete[] inputSignalsToTrigger; @@ -128,18 +116,7 @@ Circom_Sizes Circom_CalcWit::getSignalSizes(int cIdx, u64 hash) { return circuit->components[cIdx].entries[entryPos].sizes; } -PBigInt Circom_CalcWit::allocBigInts(int n) { - PBigInt res = new BigInt[n]; - for (int i=0; icomponents[cIdx].newThread)&&(currentComponentIdx != cIdx)) { std::unique_lock lk(mutexes[cIdx % NMUTEXES]); @@ -155,7 +132,7 @@ void Circom_CalcWit::getSignal(int currentComponentIdx, int cIdx, int sIdx, PBig assert(false); } #endif - mpz_set(*value, signalValues[sIdx]); + Fr_copy(value, signalValues + sIdx); /* char *valueStr = mpz_get_str(0, 10, *value); syncPrintf("%d, Get %d --> %s\n", currentComponentIdx, sIdx, valueStr); @@ -172,7 +149,7 @@ void Circom_CalcWit::finished(int cIdx) { cvs[cIdx % NMUTEXES].notify_all(); } -void Circom_CalcWit::setSignal(int currentComponentIdx, int cIdx, int sIdx, PBigInt value) { +void Circom_CalcWit::setSignal(int currentComponentIdx, int cIdx, int sIdx, PFrElement value) { // syncPrintf("setSignal: %d\n", sIdx); #ifdef SANITY_CHECK @@ -188,7 +165,7 @@ void Circom_CalcWit::setSignal(int currentComponentIdx, int cIdx, int sIdx, PBig syncPrintf("%d, Set %d --> %s\n", currentComponentIdx, sIdx, valueStr); free(valueStr); */ - mpz_set(signalValues[sIdx], *value); + Fr_copy(signalValues + sIdx, value); if ( BITMAP_ISSET(circuit->mapIsInput, sIdx) ) { if (inputSignalsToTrigger[cIdx]>0) { inputSignalsToTrigger[cIdx]--; @@ -198,11 +175,13 @@ void Circom_CalcWit::setSignal(int currentComponentIdx, int cIdx, int sIdx, PBig } -void Circom_CalcWit::checkConstraint(int currentComponentIdx, PBigInt value1, PBigInt value2, char const *err) { +void Circom_CalcWit::checkConstraint(int currentComponentIdx, PFrElement value1, PFrElement value2, char const *err) { #ifdef SANITY_CHECK - if (mpz_cmp(*value1, *value2) != 0) { - char *pcV1 = mpz_get_str(0, 10, *value1); - char *pcV2 = mpz_get_str(0, 10, *value2); + FrElement tmp; + Fr_eq(&tmp, value1, value2); + if (!Fr_isTrue(&tmp)) { + char *pcV1 = Fr_element2str(value1); + char *pcV2 = Fr_element2str(value2); // throw std::runtime_error(std::to_string(currentComponentIdx) + std::string(", Constraint doesn't match, ") + err + ". " + sV1 + " != " + sV2 ); fprintf(stderr, "Constraint doesn't match, %s: %s != %s", err, pcV1, pcV2); free(pcV1); @@ -227,8 +206,8 @@ void Circom_CalcWit::triggerComponent(int newCIdx) { // cIdx = oldCIdx; } -void Circom_CalcWit::log(PBigInt value) { - char *pcV = mpz_get_str(0, 10, *value); +void Circom_CalcWit::log(PFrElement value) { + char *pcV = Fr_element2str(value); syncPrintf("Log: %s\n", pcV); free(pcV); } diff --git a/c/calcwit.h b/c/calcwit.h index 1a02ee6..00f0445 100644 --- a/c/calcwit.h +++ b/c/calcwit.h @@ -2,7 +2,7 @@ #define CIRCOM_CALCWIT_H #include "circom.h" -#include "zqfield.h" +#include "fr.h" #include #include @@ -24,9 +24,8 @@ class Circom_CalcWit { std::mutex printf_mutex; - BigInt *signalValues; + FrElement *signalValues; - Circom_Circuit *circuit; void triggerComponent(int newCIdx); void calculateWitness(void *input, void *output); @@ -35,7 +34,8 @@ class Circom_CalcWit { public: - ZqField *field; + Circom_Circuit *circuit; + // Functions called by the circuit Circom_CalcWit(Circom_Circuit *aCircuit); ~Circom_CalcWit(); @@ -45,26 +45,23 @@ public: int getSignalOffset(int cIdx, u64 hash); Circom_Sizes getSignalSizes(int cIdx, u64 hash); - PBigInt allocBigInts(int n); - void freeBigInts(PBigInt bi, int n); - - void getSignal(int currentComponentIdx, int cIdx, int sIdx, PBigInt value); - void setSignal(int currentComponentIdx, int cIdx, int sIdx, PBigInt value); + void getSignal(int currentComponentIdx, int cIdx, int sIdx, PFrElement value); + void setSignal(int currentComponentIdx, int cIdx, int sIdx, PFrElement value); - void checkConstraint(int currentComponentIdx, PBigInt value1, PBigInt value2, char const *err); + void checkConstraint(int currentComponentIdx, PFrElement value1, PFrElement value2, char const *err); - void log(PBigInt value); + void log(PFrElement value); void finished(int cIdx); void join(); // Public functions - inline void setInput(int idx, PBigInt val) { + inline void setInput(int idx, PFrElement val) { setSignal(0, 0, circuit->wit2sig[idx], val); } - inline void getWitness(int idx, PBigInt val) { - mpz_set(*val, signalValues[circuit->wit2sig[idx]]); + inline void getWitness(int idx, PFrElement val) { + Fr_copy(val, &signalValues[circuit->wit2sig[idx]]); } void reset(); diff --git a/c/circom.h b/c/circom.h index da48033..258c02e 100644 --- a/c/circom.h +++ b/c/circom.h @@ -3,13 +3,12 @@ #include #include +#include "fr.h" class Circom_CalcWit; typedef unsigned long long u64; typedef uint32_t u32; typedef uint8_t u8; -typedef mpz_t BigInt; -typedef BigInt *PBigInt; typedef int Circom_Size; typedef Circom_Size *Circom_Sizes; @@ -49,6 +48,7 @@ public: int *wit2sig; Circom_Component *components; u32 *mapIsInput; + PFrElement constants; const char *P; }; diff --git a/c/fr.c b/c/fr.c new file mode 120000 index 0000000..b3a3f61 --- /dev/null +++ b/c/fr.c @@ -0,0 +1 @@ +buildasm/fr.c \ No newline at end of file diff --git a/c/fr.h b/c/fr.h new file mode 120000 index 0000000..f682bb8 --- /dev/null +++ b/c/fr.h @@ -0,0 +1 @@ +buildasm/fr.h \ No newline at end of file diff --git a/c/fr.o b/c/fr.o new file mode 120000 index 0000000..14a80cb --- /dev/null +++ b/c/fr.o @@ -0,0 +1 @@ +buildasm/fr.o \ No newline at end of file diff --git a/c/main.cpp b/c/main.cpp index 55ef180..37f9587 100644 --- a/c/main.cpp +++ b/c/main.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #include #include using json = nlohmann::json; @@ -40,14 +39,14 @@ void loadBin(Circom_CalcWit *ctx, std::string filename) { close(fd); - BigInt v; - mpz_init2(v, 256); + FrElement v; u8 *p = in; for (int i=0; i<_circuit.NInputs; i++) { - int len = *(u8 *)p; - p++; - mpz_import(v,len , -1 , 1, 0, 0, p); - p+=len; + v.type = Fr_LONG; + for (int j=0; jsetSignal(0, 0, _circuit.wit2sig[1 + _circuit.NOutputs + i], &v); } } @@ -69,8 +68,7 @@ void iterateArr(Circom_CalcWit *ctx, int o, Circom_Sizes sizes, json jarr, ItFun void itFunc(Circom_CalcWit *ctx, int o, json val) { - BigInt v; - mpz_init2(v, 256); + FrElement v; std::string s; @@ -86,7 +84,7 @@ void itFunc(Circom_CalcWit *ctx, int o, json val) { handle_error("Invalid JSON type"); } - mpz_set_str (v, s.c_str(), 10); + Fr_str2element (&v, s.c_str()); ctx->setSignal(0, 0, o, &v); } @@ -120,16 +118,14 @@ void writeOutBin(Circom_CalcWit *ctx, std::string filename) { write_ptr = fopen(filename.c_str(),"wb"); - BigInt v; - mpz_init2(v, 256); + FrElement v; u8 buffOut[256]; for (int i=0;i<_circuit.NVars;i++) { size_t size=256; ctx->getWitness(i, &v); - mpz_export(buffOut+1, &size, -1, 1, -1, 0, v); - *buffOut = (u8)size; - fwrite(buffOut, size+1, 1, write_ptr); + Fr_toLongNormal(&v); + fwrite(v.longVal, Fr_N64*8, 1, write_ptr); } fclose(write_ptr); @@ -143,16 +139,14 @@ void writeOutJson(Circom_CalcWit *ctx, std::string filename) { outFile << "[\n"; - BigInt v; - mpz_init2(v, 256); - - char pcV[256]; + FrElement v; for (int i=0;i<_circuit.NVars;i++) { ctx->getWitness(i, &v); - mpz_get_str(pcV, 10, v); + char *pcV = Fr_element2str(&v); std::string sV = std::string(pcV); outFile << (i ? "," : " ") << "\"" << sV << "\"\n"; + free(pcV); } outFile << "]\n"; @@ -168,6 +162,7 @@ bool hasEnding (std::string const &fullString, std::string const &ending) { } int main(int argc, char *argv[]) { + Fr_init(); if (argc!=3) { std::string cl = argv[0]; std::string base_filename = cl.substr(cl.find_last_of("/\\") + 1); diff --git a/src/c_build.js b/src/c_build.js index d302909..12d6ea2 100644 --- a/src/c_build.js +++ b/src/c_build.js @@ -35,6 +35,11 @@ function buildC(ctx) { ctx.codes_sizes = []; ctx.definedSizes = {}; ctx.addSizes = addSizes; + ctx.constants = []; + ctx.constantsMap = {}; + ctx.addConstant = addConstant; + ctx.addConstant(bigInt.zero); + ctx.addConstant(bigInt.one); const entryTables = buildEntryTables(ctx); ctx.globalNames = ctx.uniqueNames; @@ -45,6 +50,7 @@ function buildC(ctx) { const headder = buildHeader(ctx); const sizes = buildSizes(ctx); + const constants = buildConstants(ctx); const mapIsInput = buildMapIsInput(ctx); const wit2Sig = buildWit2Sig(ctx); const circuitVar = buildCircuitVar(ctx); @@ -52,6 +58,7 @@ function buildC(ctx) { return "" + headder + "\n" + sizes + "\n" + + constants + "\n" + entryTables + "\n" + functions + "\n" + code + "\n" + @@ -244,6 +251,69 @@ function buildSizes(ctx) { ctx.codes_sizes.join("\n"); } +function buildConstants(ctx) { + const n64 = Math.floor((ctx.field.p.bitLength() - 1) / 64)+1; + const R = bigInt.one.shiftLeft(n64*64); + + const lines = []; + lines.push("// Constants"); + lines.push(`FrElement _constants[${ctx.constants.length}] = {`); + for (let i=0; i0 ? "," : " ") + "{" + number2Code(ctx.constants[i]) + "}"); + } + lines.push("};"); + + return lines.join("\n"); + + function number2Code(n) { + if (n.lt(bigInt("80000000", 16)) ) { + return addShortMontgomeryPositive(n); + } + if (n.geq(ctx.field.p.minus(bigInt("80000000", 16))) ) { + return addShortMontgomeryNegative(n); + } + return addLongMontgomery(n); + + + function addShortMontgomeryPositive(a) { + return `${a.toString()}, 0x40000000, { ${getLongString(toMontgomery(a))} }`; + } + + + function addShortMontgomeryNegative(a) { + const b = a.minus(ctx.field.p); + return `${b.toString()}, 0x40000000, { ${getLongString(toMontgomery(a))} }`; + } + + function addLongMontgomery(a) { + return `0, 0xC0000000, { ${getLongString(toMontgomery(a))} }`; + } + + function getLongString(a) { + let r = bigInt(a); + let S = ""; + let i = 0; + while (!r.isZero()) { + if (S!= "") S = S+","; + S += "0x" + r.and(bigInt("FFFFFFFFFFFFFFFF", 16)).toString(16) + "LL"; + i++; + r = r.shiftRight(64); + } + while (iallocBigInts(${v.sizes[0]});\n`; - ctx.codeFooter += `ctx->freeBigInts(${v.label}, ${v.sizes[0]});\n`; + ctx.codeHeader += `FrElement ${v.label}[${v.sizes[0]}];\n`; } else if (v.type=="INT") { ctx.codeHeader += `int ${v.label};\n`; } else if (v.type=="SIZES") { @@ -55,8 +54,8 @@ function instantiateRef(ctx, refId, initValue) { if (v.type == "BIGINT") { for (let i=0; icircuit->constants +${idConstant});\n`; } } } @@ -67,12 +66,11 @@ function instantiateConstant(ctx, value) { const sizes = utils.accSizes(utils.extractSizes(value)); const flatedValue = utils.flatArray(value); const label = ctx.getUniqueName("_const"); - ctx.codeHeader += `PBigInt ${label};\n`; - ctx.codeHeader += `${label} = ctx->allocBigInts(${sizes[0]});\n`; + ctx.codeHeader += `FrElement ${label}[${sizes[0]}];\n`; for (let i=0; icircuit->constants +${idConstant});\n`; } - ctx.codeFooter += `ctx->freeBigInts(${label}, ${sizes[0]});\n`; const refId = ctx.refs.length; ctx.refs.push({ type: "BIGINT", @@ -392,7 +390,7 @@ function genGetOffset(ctx, refOffset, vSizes, sels) { if (rStr != "") rStr += " + "; if (iIdx.used) { - rStr += `ctx->field->toInt(${iIdx.label})`; + rStr += `Fr_toInt(${iIdx.label})`; } else { rStr += iIdx.value[0].toString(); } @@ -451,14 +449,14 @@ function genVariable(ctx, ast) { const refRes = newRef(ctx, "BIGINT", "_v", null, v.sizes.slice(l)); const res = ctx.refs[refRes]; res.used = true; - ctx.codeHeader += `PBigInt ${res.label};\n`; + ctx.codeHeader += `PFrElement ${res.label};\n`; ctx.code += `${res.label} = ${v.label} + ${offset.label};\n`; return refRes; } else if ((offset.value[0])||(l>0)) { const refRes = newRef(ctx, "BIGINT", "_v", null, v.sizes.slice(l)); const res = ctx.refs[refRes]; res.used = true; - ctx.codeHeader += `PBigInt ${res.label};\n`; + ctx.codeHeader += `PFrElement ${res.label};\n`; ctx.code += `${res.label} = ${v.label} + ${offset.value[0]};\n`; return refRes; } else { @@ -470,7 +468,7 @@ function genVariable(ctx, ast) { const resRef = newRef(ctx, "BIGINT", "_v", null, v.sizes.slice(l)); const res = ctx.refs[resRef]; res.used = true; - ctx.codeHeader += `PBigInt ${res.label};\n`; + ctx.codeHeader += `PFrElement ${res.label};\n`; ctx.code += `${res.label} = ${v.label} + ${offset.label};\n`; return resRef; } else { @@ -685,12 +683,12 @@ function genVarAssignment(ctx, ast, lRef, sels, rRef) { if (instantiated) { if (offset.used) { - ctx.code += `ctx->field->copyn(${left.label} + ${offset.label}, ${right.label}, ${right.sizes[0]});\n`; + ctx.code += `Fr_copyn(${left.label} + ${offset.label}, ${right.label}, ${right.sizes[0]});\n`; } else { if (offset.value[0]>0) { - ctx.code += `ctx->field->copyn(${left.label} + ${offset.value[0]}, ${right.label}, ${right.sizes[0]});\n`; + ctx.code += `Fr_copyn(${left.label} + ${offset.value[0]}, ${right.label}, ${right.sizes[0]});\n`; } else { - ctx.code += `ctx->field->copyn(${left.label}, ${right.label}, ${right.sizes[0]});\n`; + ctx.code += `Fr_copyn(${left.label}, ${right.label}, ${right.sizes[0]});\n`; } } } else { @@ -783,7 +781,7 @@ function genArray(ctx, ast) { for (let i=0; ifield->copyn(${r.label}+${i*subSizes[0]}, ${v.label}, ${subSizes[0]});\n`; + ctx.code += `Fr_copyn(${r.label}+${i*subSizes[0]}, ${v.label}, ${subSizes[0]});\n`; } return rRef; } else { @@ -883,7 +881,7 @@ function genLoop(ctx, ast) { instantiateRef(ctx, condVarRef); ctx.code += - `${condVar.label} = ctx->field->isTrue(${cond.label});\n` + + `${condVar.label} = Fr_isTrue(${cond.label});\n` + `while (${condVar.label}) {\n`; } else { if (!utils.isDefined(cond.value)) return ctx.throwError(ast, "condition value not assigned"); @@ -924,7 +922,7 @@ function genLoop(ctx, ast) { instantiateRef(ctx, condVarRef); ctx.code += - `${condVar.label} = ctx->field->isTrue(${cond2.label});\n` + + `${condVar.label} = Fr_isTrue(${cond2.label});\n` + `while (${condVar.label}) {\n`; } else { ctx.code = oldCode + ctx.code; @@ -935,7 +933,7 @@ function genLoop(ctx, ast) { oldCode + utils.ident( ctx.code + - `${condVar.label} = ctx->field->isTrue(${cond2.label});\n`); + `${condVar.label} = Fr_isTrue(${cond2.label});\n`); end=true; } } @@ -960,7 +958,7 @@ function genIf(ctx, ast) { if (cond.used) { enterConditionalCode(ctx, ast); - ctx.code += `if (ctx->field->isTrue(${cond.label})) {\n`; + ctx.code += `if (Fr_isTrue(${cond.label})) {\n`; const oldCode = ctx.code; ctx.code = ""; @@ -1007,7 +1005,7 @@ function genReturn(ctx, ast) { instantiateRef(ctx, vRef, v.value); } if (v.used) { - ctx.code += `ctx->field->copyn(__retValue, ${v.label}, ${v.sizes[0]});\n`; + ctx.code += `Fr_copyn(__retValue, ${v.label}, ${v.sizes[0]});\n`; } else { if (!utils.isDefined(v.value)) return ctx.throwError(ast, "Returning an unknown value"); if (!utils.isDefined(ctx.returnValue)) { @@ -1051,7 +1049,7 @@ function genOpOp(ctx, ast, op, lr) { const res = ctx.refs[resRef]; if (veval.used) { instantiateRef(ctx, resRef); - ctx.code += `ctx->field->${op}(${res.label}, ${veval.label}, &(ctx->field->one));\n`; + ctx.code += `Fr_${op}(${res.label}, ${veval.label}, ctx->circuit->constants + ${ctx.addConstant(bigInt.one)});\n`; } else { res.value = [ctx.field[op](veval.value[0], bigInt(1))]; } @@ -1097,7 +1095,7 @@ function genOp(ctx, ast, op, nOps) { rRef = newRef(ctx, "BIGINT", "_tmp"); instantiateRef(ctx, rRef); const r = ctx.refs[rRef]; - let c = `ctx->field->${op}(${r.label}`; + let c = `Fr_${op}(${r.label}`; for (let i=0; ifield->isTrue(${cond.label})) {\n`; + ctx.codeHeader += `PFrElement ${rLabel};\n`; + ctx.code += `if (Fr_isTrue(${cond.label})) {\n`; oldCode = ctx.code; ctx.code = ""; diff --git a/src/c_tester.js b/src/c_tester.js index dfadafb..0cc1b46 100644 --- a/src/c_tester.js +++ b/src/c_tester.js @@ -23,6 +23,8 @@ async function c_tester(circomFile, _options) { const dir = await tmp.dir({prefix: "circom_", unsafeCleanup: true }); + // console.log(dir.path); + const baseName = path.basename(circomFile, ".circom"); const options = Object.assign({}, _options); @@ -40,7 +42,8 @@ async function c_tester(circomFile, _options) { ` ${path.join(cdir, "main.cpp")}` + ` ${path.join(cdir, "calcwit.cpp")}` + ` ${path.join(cdir, "utils.cpp")}` + - ` ${path.join(cdir, "zqfield.cpp")}` + + ` ${path.join(cdir, "fr.c")}` + + ` ${path.join(cdir, "fr.o")}` + ` ${path.join(dir.path, baseName + ".cpp")} ` + ` -o ${path.join(dir.path, baseName)}` + ` -I ${cdir}` + diff --git a/test/basiccases.js b/test/basiccases.js index b5619e7..edd3224 100644 --- a/test/basiccases.js +++ b/test/basiccases.js @@ -44,7 +44,6 @@ async function doTest(circuit, testVectors) { describe("basic cases", function () { this.timeout(100000); -/* it("inout", async () => { await doTest( "inout.circom", @@ -112,6 +111,7 @@ describe("basic cases", function () { ] ); }); + it("function1", async () => { await doTest( "function1.circom", @@ -239,7 +239,7 @@ describe("basic cases", function () { [{in: [ 8, 9]}, {lt: 1, leq: 1, eq:0, neq:1, geq: 0, gt:0}], [{in: [-2,-2]}, {lt: 0, leq: 1, eq:1, neq:0, geq: 1, gt:0}], [{in: [-1,-2]}, {lt: 0, leq: 0, eq:0, neq:1, geq: 1, gt:1}], - [{in: [ 1,-1]}, {lt: 1, leq: 1, eq:0, neq:1, geq: 0, gt:0}], // In mod, negative values are higher than positive. + [{in: [ 1,-1]}, {lt: 0, leq: 0, eq:0, neq:1, geq: 1, gt:1}], // In mod, negative values are higher than positive. ] ); }); @@ -299,7 +299,6 @@ describe("basic cases", function () { ] ); }); -*/ it("Component array 2d", async () => { await doTest( "componentarray2.circom", @@ -309,7 +308,6 @@ describe("basic cases", function () { ] ); }); -/* it("Constant circuit", async () => { await doTest( "constantcircuit.circom", @@ -339,5 +337,4 @@ describe("basic cases", function () { ] ); }); -*/ }); diff --git a/test/circuits/inc.json b/test/circuits/inc.json new file mode 100644 index 0000000..04cb7f5 --- /dev/null +++ b/test/circuits/inc.json @@ -0,0 +1 @@ +{"x": "3"}