diff --git a/c/buildasm/binops.asm.ejs b/c/buildasm/binops.asm.ejs index 1ca5068..43a06fe 100644 --- a/c/buildasm/binops.asm.ejs +++ b/c/buildasm/binops.asm.ejs @@ -85,7 +85,7 @@ <% function binOp(op) { %> ;;;;;;;;;;;;;;;;;;;;;; -; <%= op %> +; b<%= op %> ;;;;;;;;;;;;;;;;;;;;;; ; Adds two elements of any kind ; Params: @@ -176,3 +176,42 @@ <%= binOp("xor") %> +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_bnot: + <%= global.setTypeDest("0x80"); %> + + mov r8, [rsi] + bt r8, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + <%= global.toLong_a() %> + jmp bnot_l1n + +bnot_l1: + bt r8, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: +<%= global.fromMont_a() %> + +bnot_l1n: +<% for (let i=0; i + mov rax, [rsi + <%= i*8 + 8 %>] + not rax +<% if (i== n64-1) { %> + and rax, [lboMask] +<% } %> + mov [rdi + <%= i*8 + 8 %>], rax +<% } %> + ret + + diff --git a/c/buildasm/buildzqfield.js b/c/buildasm/buildzqfield.js index 9a2aeaf..e223bac 100644 --- a/c/buildasm/buildzqfield.js +++ b/c/buildasm/buildzqfield.js @@ -15,9 +15,10 @@ class ZqBuilder { this.bigInt = bigInt; this.lastTmp=0; this.global = {}; - this.global.tmpLabel = function() { + this.global.tmpLabel = function(label) { self.lastTmp++; - return "tmp"+self.lastTmp; + label = label || "tmp"; + return label+"_"+self.lastTmp; }; } diff --git a/c/buildasm/fr.asm b/c/buildasm/fr.asm deleted file mode 100644 index 201a730..0000000 --- a/c/buildasm/fr.asm +++ /dev/null @@ -1,4854 +0,0 @@ - - - global Fr_add - global Fr_sub - global Fr_neg - global Fr_mul - global Fr_band - global Fr_bor - global Fr_bxor - global Fr_eq - global Fr_neq - global Fr_lt - global Fr_gt - global Fr_leq - global Fr_geq - global Fr_toNormal - global Fr_toMontgomery - global Fr_q - DEFAULT REL - - section .text - - - - - - - - - - - - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawCopyS2L -;;;;;;;;;;;;;;;;;;;;;; -; Convert a 64 bit integer to a long format field element -; Params: -; rsi <= the integer -; rdi <= Pointer to the overwritted element -; -; Nidified registers: -; rax -;;;;;;;;;;;;;;;;;;;;;;; - -rawCopyS2L: - mov al, 0x80 - shl rax, 56 - mov [rdi], rax ; set the result to LONG normal - - cmp rsi, 0 - js u64toLong_adjust_neg - - mov [rdi + 8], rsi - xor rax, rax - - mov [rdi + 16], rax - - mov [rdi + 24], rax - - mov [rdi + 32], rax - - ret - -u64toLong_adjust_neg: - add rsi, [q] ; Set the first digit - mov [rdi + 8], rsi ; - - mov rsi, -1 ; all ones - - mov rax, rsi ; Add to q - adc rax, [q + 8 ] - mov [rdi + 16], rax - - mov rax, rsi ; Add to q - adc rax, [q + 16 ] - mov [rdi + 24], rax - - mov rax, rsi ; Add to q - adc rax, [q + 24 ] - mov [rdi + 32], rax - - ret - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawMontgomeryMul -;;;;;;;;;;;;;;;;;;;;;; -; Multiply two elements in montgomery form -; Params: -; rsi <= Pointer to the long data of element 1 -; rdx <= Pointer to the long data of element 2 -; rdi <= Pointer to the long data of result -; Modified registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; - -rawMontgomeryMul: - sub rsp, 32 ; Reserve space for ms - mov rcx, rdx ; rdx is needed for multiplications so keep it in cx - mov r11, 0xc2e1f593efffffff ; np - xor r8,r8 - xor r9,r9 - xor r10,r10 - - mov rax, [rsi + 0] - mul qword [rcx + 0] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - - - mov rax, r8 - mul r11 - mov [rsp + 0], rax - mul qword [q] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, [rsi + 0] - mul qword [rcx + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsi + 8] - mul qword [rcx + 0] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, [rsp + 0] - mul qword [q + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, r9 - mul r11 - mov [rsp + 8], rax - mul qword [q] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, [rsi + 0] - mul qword [rcx + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsi + 8] - mul qword [rcx + 8] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsi + 16] - mul qword [rcx + 0] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, [rsp + 8] - mul qword [q + 8] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsp + 0] - mul qword [q + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, r10 - mul r11 - mov [rsp + 16], rax - mul qword [q] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, [rsi + 0] - mul qword [rcx + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsi + 8] - mul qword [rcx + 16] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsi + 16] - mul qword [rcx + 8] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsi + 24] - mul qword [rcx + 0] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, [rsp + 16] - mul qword [q + 8] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsp + 8] - mul qword [q + 16] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsp + 0] - mul qword [q + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, r8 - mul r11 - mov [rsp + 24], rax - mul qword [q] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, [rsi + 8] - mul qword [rcx + 24] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsi + 16] - mul qword [rcx + 16] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsi + 24] - mul qword [rcx + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, [rsp + 24] - mul qword [q + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsp + 16] - mul qword [q + 16] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsp + 8] - mul qword [q + 24] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov [rdi + 0 ], r9 - xor r9,r9 - - - - mov rax, [rsi + 16] - mul qword [rcx + 24] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsi + 24] - mul qword [rcx + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, [rsp + 24] - mul qword [q + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsp + 16] - mul qword [q + 24] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov [rdi + 8 ], r10 - xor r10,r10 - - - - mov rax, [rsi + 24] - mul qword [rcx + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, [rsp + 24] - mul qword [q + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov [rdi + 16 ], r8 - xor r8,r8 - - - - - - - - mov [rdi + 24 ], r9 - xor r9,r9 - - - - test r10, r10 - jnz rawMontgomeryMul_mulM_sq - ; Compare with q - - mov rax, [rdi + 24] - cmp rax, [q + 24] - jc rawMontgomeryMul_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul_mulM_sq ; q is lower - - mov rax, [rdi + 16] - cmp rax, [q + 16] - jc rawMontgomeryMul_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul_mulM_sq ; q is lower - - mov rax, [rdi + 8] - cmp rax, [q + 8] - jc rawMontgomeryMul_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul_mulM_sq ; q is lower - - mov rax, [rdi + 0] - cmp rax, [q + 0] - jc rawMontgomeryMul_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul_mulM_sq ; q is lower - - ; If equal substract q - -rawMontgomeryMul_mulM_sq: - - mov rax, [q + 0] - sub [rdi + 0], rax - - mov rax, [q + 8] - sbb [rdi + 8], rax - - mov rax, [q + 16] - sbb [rdi + 16], rax - - mov rax, [q + 24] - sbb [rdi + 24], rax - - -rawMontgomeryMul_mulM_done: - mov rdx, rcx ; recover rdx to its original place. - add rsp, 32 ; recover rsp - ret - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawMontgomeryMul1 -;;;;;;;;;;;;;;;;;;;;;; -; Multiply two elements in montgomery form -; Params: -; rsi <= Pointer to the long data of element 1 -; rdx <= second operand -; rdi <= Pointer to the long data of result -; Modified registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; - -rawMontgomeryMul1: - sub rsp, 32 ; Reserve space for ms - mov rcx, rdx ; rdx is needed for multiplications so keep it in cx - mov r11, 0xc2e1f593efffffff ; np - xor r8,r8 - xor r9,r9 - xor r10,r10 - - mov rax, [rsi + 0] - mul rcx - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - - - mov rax, r8 - mul r11 - mov [rsp + 0], rax - mul qword [q] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, [rsi + 8] - mul rcx - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, [rsp + 0] - mul qword [q + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, r9 - mul r11 - mov [rsp + 8], rax - mul qword [q] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, [rsi + 16] - mul rcx - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, [rsp + 8] - mul qword [q + 8] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsp + 0] - mul qword [q + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, r10 - mul r11 - mov [rsp + 16], rax - mul qword [q] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, [rsi + 24] - mul rcx - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, [rsp + 16] - mul qword [q + 8] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsp + 8] - mul qword [q + 16] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsp + 0] - mul qword [q + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, r8 - mul r11 - mov [rsp + 24], rax - mul qword [q] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - - - mov rax, [rsp + 24] - mul qword [q + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsp + 16] - mul qword [q + 16] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsp + 8] - mul qword [q + 24] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov [rdi + 0 ], r9 - xor r9,r9 - - - - - - mov rax, [rsp + 24] - mul qword [q + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsp + 16] - mul qword [q + 24] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov [rdi + 8 ], r10 - xor r10,r10 - - - - - - mov rax, [rsp + 24] - mul qword [q + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov [rdi + 16 ], r8 - xor r8,r8 - - - - - - - - mov [rdi + 24 ], r9 - xor r9,r9 - - - - test r10, r10 - jnz rawMontgomeryMul1_mulM_sq - ; Compare with q - - mov rax, [rdi + 24] - cmp rax, [q + 24] - jc rawMontgomeryMul1_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul1_mulM_sq ; q is lower - - mov rax, [rdi + 16] - cmp rax, [q + 16] - jc rawMontgomeryMul1_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul1_mulM_sq ; q is lower - - mov rax, [rdi + 8] - cmp rax, [q + 8] - jc rawMontgomeryMul1_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul1_mulM_sq ; q is lower - - mov rax, [rdi + 0] - cmp rax, [q + 0] - jc rawMontgomeryMul1_mulM_done ; q is bigget so done. - jnz rawMontgomeryMul1_mulM_sq ; q is lower - - ; If equal substract q - -rawMontgomeryMul1_mulM_sq: - - mov rax, [q + 0] - sub [rdi + 0], rax - - mov rax, [q + 8] - sbb [rdi + 8], rax - - mov rax, [q + 16] - sbb [rdi + 16], rax - - mov rax, [q + 24] - sbb [rdi + 24], rax - - -rawMontgomeryMul1_mulM_done: - mov rdx, rcx ; recover rdx to its original place. - add rsp, 32 ; recover rsp - ret - - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawFromMontgomery -;;;;;;;;;;;;;;;;;;;;;; -; Multiply two elements in montgomery form -; Params: -; rsi <= Pointer to the long data of element 1 -; rdi <= Pointer to the long data of result -; Modified registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; - -rawFromMontgomery: - sub rsp, 32 ; Reserve space for ms - mov rcx, rdx ; rdx is needed for multiplications so keep it in cx - mov r11, 0xc2e1f593efffffff ; np - xor r8,r8 - xor r9,r9 - xor r10,r10 - - add r8, [rdi + 0] - adc r9, 0x0 - adc r10, 0x0 - - - - - - mov rax, r8 - mul r11 - mov [rsp + 0], rax - mul qword [q] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - add r9, [rdi + 8] - adc r10, 0x0 - adc r8, 0x0 - - - - mov rax, [rsp + 0] - mul qword [q + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov rax, r9 - mul r11 - mov [rsp + 8], rax - mul qword [q] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - add r10, [rdi + 16] - adc r8, 0x0 - adc r9, 0x0 - - - - mov rax, [rsp + 8] - mul qword [q + 8] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsp + 0] - mul qword [q + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov rax, r10 - mul r11 - mov [rsp + 16], rax - mul qword [q] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - add r8, [rdi + 24] - adc r9, 0x0 - adc r10, 0x0 - - - - mov rax, [rsp + 16] - mul qword [q + 8] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsp + 8] - mul qword [q + 16] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - mov rax, [rsp + 0] - mul qword [q + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov rax, r8 - mul r11 - mov [rsp + 24], rax - mul qword [q] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - - - mov rax, [rsp + 24] - mul qword [q + 8] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsp + 16] - mul qword [q + 16] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - mov rax, [rsp + 8] - mul qword [q + 24] - add r9, rax - adc r10, rdx - adc r8, 0x0 - - - - mov [rdi + 0 ], r9 - xor r9,r9 - - - - - - mov rax, [rsp + 24] - mul qword [q + 16] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - mov rax, [rsp + 16] - mul qword [q + 24] - add r10, rax - adc r8, rdx - adc r9, 0x0 - - - - mov [rdi + 8 ], r10 - xor r10,r10 - - - - - - mov rax, [rsp + 24] - mul qword [q + 24] - add r8, rax - adc r9, rdx - adc r10, 0x0 - - - - mov [rdi + 16 ], r8 - xor r8,r8 - - - - - - - - mov [rdi + 24 ], r9 - xor r9,r9 - - - - test r10, r10 - jnz rawFromMontgomery_mulM_sq - ; Compare with q - - mov rax, [rdi + 24] - cmp rax, [q + 24] - jc rawFromMontgomery_mulM_done ; q is bigget so done. - jnz rawFromMontgomery_mulM_sq ; q is lower - - mov rax, [rdi + 16] - cmp rax, [q + 16] - jc rawFromMontgomery_mulM_done ; q is bigget so done. - jnz rawFromMontgomery_mulM_sq ; q is lower - - mov rax, [rdi + 8] - cmp rax, [q + 8] - jc rawFromMontgomery_mulM_done ; q is bigget so done. - jnz rawFromMontgomery_mulM_sq ; q is lower - - mov rax, [rdi + 0] - cmp rax, [q + 0] - jc rawFromMontgomery_mulM_done ; q is bigget so done. - jnz rawFromMontgomery_mulM_sq ; q is lower - - ; If equal substract q - -rawFromMontgomery_mulM_sq: - - mov rax, [q + 0] - sub [rdi + 0], rax - - mov rax, [q + 8] - sbb [rdi + 8], rax - - mov rax, [q + 16] - sbb [rdi + 16], rax - - mov rax, [q + 24] - sbb [rdi + 24], rax - - -rawFromMontgomery_mulM_done: - mov rdx, rcx ; recover rdx to its original place. - add rsp, 32 ; recover rsp - ret - - - -;;;;;;;;;;;;;;;;;;;;;; -; toMontgomery -;;;;;;;;;;;;;;;;;;;;;; -; Convert a number to Montgomery -; rdi <= Pointer element to convert -; Modified registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;; -Fr_toMontgomery: - mov rax, [rdi] - bts rax, 62 ; check if montgomery - jc toMontgomery_doNothing - bts rax, 63 - jc toMontgomeryLong - -toMontgomeryShort: - mov [rdi], rax - add rdi, 8 - push rsi - lea rsi, [R2] - movsx rdx, eax - cmp rdx, 0 - js negMontgomeryShort -posMontgomeryShort: - call rawMontgomeryMul1 - pop rsi - sub rdi, 8 - ret - -negMontgomeryShort: - neg rdx ; Do the multiplication positive and then negate the result. - call rawMontgomeryMul1 - mov rsi, rdi - call rawNegL - pop rsi - sub rdi, 8 - ret - - -toMontgomeryLong: - mov [rdi], rax - add rdi, 8 - push rsi - mov rdx, rdi - lea rsi, [R2] - call rawMontgomeryMul - pop rsi - sub rdi, 8 - -toMontgomery_doNothing: - ret - -;;;;;;;;;;;;;;;;;;;;;; -; toNormal -;;;;;;;;;;;;;;;;;;;;;; -; Convert a number from Montgomery -; rdi <= Pointer element to convert -; Modified registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;; -Fr_toNormal: - mov rax, [rdi] - btc rax, 62 ; check if montgomery - jnc fromMontgomery_doNothing - bt rax, 63 ; if short, it means it's converted - jnc fromMontgomery_doNothing - -fromMontgomeryLong: - mov [rdi], rax - add rdi, 8 - call rawFromMontgomery - sub rdi, 8 - -fromMontgomery_doNothing: - ret - - - - - - - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; add -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_add: - mov rax, [rsi] - mov rcx, [rdx] - bt rax, 63 ; Check if is short first operand - jc add_l1 - bt rcx, 63 ; Check if is short second operand - jc add_s1l2 - -add_s1s2: ; Both operands are short - - xor rdx, rdx - mov edx, eax - add edx, ecx - jo add_manageOverflow ; rsi already is the 64bits result - - mov [rdi], rdx ; not necessary to adjust so just save and return - ret - -add_manageOverflow: ; Do the operation in 64 bits - push rsi - movsx rsi, eax - movsx rdx, ecx - add rsi, rdx - call rawCopyS2L - pop rsi - ret - -add_l1: - bt rcx, 63 ; Check if is short second operand - jc add_l1l2 - -;;;;;;;; -add_l1s2: - bt rax, 62 ; check if montgomery first - jc add_l1ms2 -add_l1ns2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rsi, 8 - movsx rdx, ecx - add rdi, 8 - cmp rdx, 0 - - jns tmp1 - neg rdx - call rawSubLS - sub rdi, 8 - sub rsi, 8 - ret -tmp1: - call rawAddLS - sub rdi, 8 - sub rsi, 8 - ret - - - -add_l1ms2: - bt rcx, 62 ; check if montgomery second - jc add_l1ms2m -add_l1ms2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toMontgomery - mov rdx, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - -add_l1ms2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - - -;;;;;;;; -add_s1l2: - bt rcx, 62 ; check if montgomery first - jc add_s1l2m -add_s1l2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - lea rsi, [rdx + 8] - movsx rdx, eax - add rdi, 8 - cmp rdx, 0 - - jns tmp2 - neg rdx - call rawSubLS - sub rdi, 8 - sub rsi, 8 - ret -tmp2: - call rawAddLS - sub rdi, 8 - sub rsi, 8 - ret - - -add_s1l2m: - bt rax, 62 ; check if montgomery second - jc add_s1ml2m -add_s1nl2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toMontgomery - mov rdx, rsi - mov rsi, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - -add_s1ml2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - -;;;; -add_l1l2: - bt rax, 62 ; check if montgomery first - jc add_l1ml2 -add_l1nl2: - bt rcx, 62 ; check if montgomery second - jc add_l1nl2m -add_l1nl2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - -add_l1nl2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toMontgomery - mov rdx, rsi - mov rsi, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - -add_l1ml2: - bt rcx, 62 ; check if montgomery seconf - jc add_l1ml2m -add_l1ml2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toMontgomery - mov rdx, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - -add_l1ml2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawAddLL - sub rdi, 8 - sub rsi, 8 - ret - - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawAddLL -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of type long -; Params: -; rsi <= Pointer to the long data of element 1 -; rdx <= Pointer to the long data of element 2 -; rdi <= Pointer to the long data of result -; Modified Registers: -; rax -;;;;;;;;;;;;;;;;;;;;;; -rawAddLL: - ; Add component by component with carry - - mov rax, [rsi + 0] - add rax, [rdx + 0] - mov [rdi + 0], rax - - mov rax, [rsi + 8] - adc rax, [rdx + 8] - mov [rdi + 8], rax - - mov rax, [rsi + 16] - adc rax, [rdx + 16] - mov [rdi + 16], rax - - mov rax, [rsi + 24] - adc rax, [rdx + 24] - mov [rdi + 24], rax - - jc rawAddLL_sq ; if overflow, substract q - - ; Compare with q - - - cmp rax, [q + 24] - jc rawAddLL_done ; q is bigget so done. - jnz rawAddLL_sq ; q is lower - - - mov rax, [rdi + 16] - - cmp rax, [q + 16] - jc rawAddLL_done ; q is bigget so done. - jnz rawAddLL_sq ; q is lower - - - mov rax, [rdi + 8] - - cmp rax, [q + 8] - jc rawAddLL_done ; q is bigget so done. - jnz rawAddLL_sq ; q is lower - - - mov rax, [rdi + 0] - - cmp rax, [q + 0] - jc rawAddLL_done ; q is bigget so done. - jnz rawAddLL_sq ; q is lower - - ; If equal substract q -rawAddLL_sq: - - mov rax, [q + 0] - sub [rdi + 0], rax - - mov rax, [q + 8] - sbb [rdi + 8], rax - - mov rax, [q + 16] - sbb [rdi + 16], rax - - mov rax, [q + 24] - sbb [rdi + 24], rax - -rawAddLL_done: - ret - - -;;;;;;;;;;;;;;;;;;;;;; -; rawAddLS -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of type long -; Params: -; rdi <= Pointer to the long data of result -; rsi <= Pointer to the long data of element 1 -; rdx <= Value to be added -;;;;;;;;;;;;;;;;;;;;;; -rawAddLS: - ; Add component by component with carry - - add rdx, [rsi] - mov [rdi] ,rdx - - mov rdx, 0 - adc rdx, [rsi + 8] - mov [rdi + 8], rdx - - mov rdx, 0 - adc rdx, [rsi + 16] - mov [rdi + 16], rdx - - mov rdx, 0 - adc rdx, [rsi + 24] - mov [rdi + 24], rdx - - jc rawAddLS_sq ; if overflow, substract q - - ; Compare with q - - mov rax, [rdi + 24] - cmp rax, [q + 24] - jc rawAddLS_done ; q is bigget so done. - jnz rawAddLS_sq ; q is lower - - mov rax, [rdi + 16] - cmp rax, [q + 16] - jc rawAddLS_done ; q is bigget so done. - jnz rawAddLS_sq ; q is lower - - mov rax, [rdi + 8] - cmp rax, [q + 8] - jc rawAddLS_done ; q is bigget so done. - jnz rawAddLS_sq ; q is lower - - mov rax, [rdi + 0] - cmp rax, [q + 0] - jc rawAddLS_done ; q is bigget so done. - jnz rawAddLS_sq ; q is lower - - ; If equal substract q -rawAddLS_sq: - - mov rax, [q + 0] - sub [rdi + 0], rax - - mov rax, [q + 8] - sbb [rdi + 8], rax - - mov rax, [q + 16] - sbb [rdi + 16], rax - - mov rax, [q + 24] - sbb [rdi + 24], rax - -rawAddLS_done: - ret - - - - - - - - - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; sub -;;;;;;;;;;;;;;;;;;;;;; -; Substracts two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_sub: - mov rax, [rsi] - mov rcx, [rdx] - bt rax, 63 ; Check if is long first operand - jc sub_l1 - bt rcx, 63 ; Check if is long second operand - jc sub_s1l2 - -sub_s1s2: ; Both operands are short - - xor rdx, rdx - mov edx, eax - sub edx, ecx - jo sub_manageOverflow ; rsi already is the 64bits result - - mov [rdi], rdx ; not necessary to adjust so just save and return - ret - -sub_manageOverflow: ; Do the operation in 64 bits - push rsi - movsx rsi, eax - movsx rdx, ecx - sub rsi, rdx - call rawCopyS2L - pop rsi - ret - -sub_l1: - bt rcx, 63 ; Check if is short second operand - jc sub_l1l2 - -;;;;;;;; -sub_l1s2: - bt rax, 62 ; check if montgomery first - jc sub_l1ms2 -sub_l1ns2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rsi, 8 - movsx rdx, ecx - add rdi, 8 - cmp rdx, 0 - - jns tmp3 - neg rdx - call rawAddLS - sub rdi, 8 - sub rsi, 8 - ret -tmp3: - call rawSubLS - sub rdi, 8 - sub rsi, 8 - ret - - -sub_l1ms2: - bt rcx, 62 ; check if montgomery second - jc sub_l1ms2m -sub_l1ms2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toMontgomery - mov rdx, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - -sub_l1ms2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - - -;;;;;;;; -sub_s1l2: - bt rcx, 62 ; check if montgomery first - jc sub_s1l2m -sub_s1l2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp eax, 0 - - js tmp4 - - ; First Operand is positive - push rsi - add rdi, 8 - movsx rsi, eax - add rdx, 8 - call rawSubSL - sub rdi, 8 - pop rsi - ret - -tmp4: ; First operand is negative - push rsi - lea rsi, [rdx + 8] - movsx rdx, eax - add rdi, 8 - neg rdx - call rawNegLS - sub rdi, 8 - pop rsi - ret - - -sub_s1l2m: - bt rax, 62 ; check if montgomery second - jc sub_s1ml2m -sub_s1nl2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toMontgomery - mov rdx, rsi - mov rsi, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - -sub_s1ml2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - -;;;; -sub_l1l2: - bt rax, 62 ; check if montgomery first - jc sub_l1ml2 -sub_l1nl2: - bt rcx, 62 ; check if montgomery second - jc sub_l1nl2m -sub_l1nl2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - -sub_l1nl2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toMontgomery - mov rdx, rsi - mov rsi, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - -sub_l1ml2: - bt rcx, 62 ; check if montgomery seconf - jc sub_l1ml2m -sub_l1ml2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toMontgomery - mov rdx, rdi - pop rdi - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - -sub_l1ml2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawSubLL - sub rdi, 8 - sub rsi, 8 - ret - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawSubLS -;;;;;;;;;;;;;;;;;;;;;; -; Substracts a short element from the long element -; Params: -; rdi <= Pointer to the long data of result -; rsi <= Pointer to the long data of element 1 where will be substracted -; rdx <= Value to be substracted -; [rdi] = [rsi] - rdx -; Modified Registers: -; rax -;;;;;;;;;;;;;;;;;;;;;; -rawSubLS: - ; Substract first digit - - mov rax, [rsi] - sub rax, rdx - mov [rdi] ,rax - mov rdx, 0 - - mov rax, [rsi + 8] - sbb rax, rdx - mov [rdi + 8], rax - - mov rax, [rsi + 16] - sbb rax, rdx - mov [rdi + 16], rax - - mov rax, [rsi + 24] - sbb rax, rdx - mov [rdi + 24], rax - - jnc rawSubLS_done ; if overflow, add q - - ; Add q -rawSubLS_aq: - - mov rax, [q + 0] - add [rdi + 0], rax - - mov rax, [q + 8] - adc [rdi + 8], rax - - mov rax, [q + 16] - adc [rdi + 16], rax - - mov rax, [q + 24] - adc [rdi + 24], rax - -rawSubLS_done: - ret - - -;;;;;;;;;;;;;;;;;;;;;; -; rawSubSL -;;;;;;;;;;;;;;;;;;;;;; -; Substracts a long element from a short element -; Params: -; rdi <= Pointer to the long data of result -; rsi <= Value from where will bo substracted -; rdx <= Pointer to long of the value to be substracted -; -; [rdi] = rsi - [rdx] -; Modified Registers: -; rax -;;;;;;;;;;;;;;;;;;;;;; -rawSubSL: - ; Substract first digit - sub rsi, [rdx] - mov [rdi] ,rsi - - - mov rax, 0 - sbb rax, [rdx + 8] - mov [rdi + 8], rax - - mov rax, 0 - sbb rax, [rdx + 16] - mov [rdi + 16], rax - - mov rax, 0 - sbb rax, [rdx + 24] - mov [rdi + 24], rax - - jnc rawSubSL_done ; if overflow, add q - - ; Add q -rawSubSL_aq: - - mov rax, [q + 0] - add [rdi + 0], rax - - mov rax, [q + 8] - adc [rdi + 8], rax - - mov rax, [q + 16] - adc [rdi + 16], rax - - mov rax, [q + 24] - adc [rdi + 24], rax - -rawSubSL_done: - ret - -;;;;;;;;;;;;;;;;;;;;;; -; rawSubLL -;;;;;;;;;;;;;;;;;;;;;; -; Substracts a long element from a short element -; Params: -; rdi <= Pointer to the long data of result -; rsi <= Pointer to long from where substracted -; rdx <= Pointer to long of the value to be substracted -; -; [rdi] = [rsi] - [rdx] -; Modified Registers: -; rax -;;;;;;;;;;;;;;;;;;;;;; -rawSubLL: - ; Substract first digit - - mov rax, [rsi + 0] - sub rax, [rdx + 0] - mov [rdi + 0], rax - - mov rax, [rsi + 8] - sbb rax, [rdx + 8] - mov [rdi + 8], rax - - mov rax, [rsi + 16] - sbb rax, [rdx + 16] - mov [rdi + 16], rax - - mov rax, [rsi + 24] - sbb rax, [rdx + 24] - mov [rdi + 24], rax - - jnc rawSubLL_done ; if overflow, add q - - ; Add q -rawSubLL_aq: - - mov rax, [q + 0] - add [rdi + 0], rax - - mov rax, [q + 8] - adc [rdi + 8], rax - - mov rax, [q + 16] - adc [rdi + 16], rax - - mov rax, [q + 24] - adc [rdi + 24], rax - -rawSubLL_done: - ret - -;;;;;;;;;;;;;;;;;;;;;; -; rawNegLS -;;;;;;;;;;;;;;;;;;;;;; -; Substracts a long element and a short element form 0 -; Params: -; rdi <= Pointer to the long data of result -; rsi <= Pointer to long from where substracted -; rdx <= short value to be substracted too -; -; [rdi] = -[rsi] - rdx -; Modified Registers: -; rax -;;;;;;;;;;;;;;;;;;;;;; -rawNegLS: - mov rax, [q] - sub rax, rdx - mov [rdi], rax - - mov rax, [q + 8 ] - sbb rax, 0 - mov [rdi + 8], rax - - mov rax, [q + 16 ] - sbb rax, 0 - mov [rdi + 16], rax - - mov rax, [q + 24 ] - sbb rax, 0 - mov [rdi + 24], rax - - setc dl - - - mov rax, [rdi + 0 ] - sub rax, [rsi + 0] - mov [rdi + 0], rax - - mov rax, [rdi + 8 ] - sbb rax, [rsi + 8] - mov [rdi + 8], rax - - mov rax, [rdi + 16 ] - sbb rax, [rsi + 16] - mov [rdi + 16], rax - - mov rax, [rdi + 24 ] - sbb rax, [rsi + 24] - mov [rdi + 24], rax - - - setc dh - or dl, dh - jz rawNegSL_done - - ; it is a negative value, so add q - - mov rax, [q + 0] - add [rdi + 0], rax - - mov rax, [q + 8] - adc [rdi + 8], rax - - mov rax, [q + 16] - adc [rdi + 16], rax - - mov rax, [q + 24] - adc [rdi + 24], rax - - -rawNegSL_done: - ret - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; neg -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element to be negated -; rdi <= Pointer to result -; [rdi] = -[rsi] -;;;;;;;;;;;;;;;;;;;;;; -Fr_neg: - mov rax, [rsi] - bt rax, 63 ; Check if is short first operand - jc neg_l - -neg_s: ; Operand is short - - neg eax - jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) - - mov [rdi], rax ; not necessary to adjust so just save and return - ret - -neg_manageOverflow: ; Do the operation in 64 bits - push rsi - movsx rsi, eax - neg rsi - call rawCopyS2L - pop rsi - ret - - - -neg_l: - mov [rdi], rax ; Copy the type - - add rdi, 8 - add rsi, 8 - call rawNegL - sub rdi, 8 - sub rsi, 8 - ret - - - -;;;;;;;;;;;;;;;;;;;;;; -; rawNeg -;;;;;;;;;;;;;;;;;;;;;; -; Negates a value -; Params: -; rdi <= Pointer to the long data of result -; rsi <= Pointer to the long data of element 1 -; -; [rdi] = - [rsi] -;;;;;;;;;;;;;;;;;;;;;; -rawNegL: - ; Compare is zero - - xor rax, rax - - cmp [rsi + 0], rax - jnz doNegate - - cmp [rsi + 8], rax - jnz doNegate - - cmp [rsi + 16], rax - jnz doNegate - - cmp [rsi + 24], rax - jnz doNegate - - ; it's zero so just set to zero - - mov [rdi + 0], rax - - mov [rdi + 8], rax - - mov [rdi + 16], rax - - mov [rdi + 24], rax - - ret -doNegate: - - mov rax, [q + 0] - sub rax, [rsi + 0] - mov [rdi + 0], rax - - mov rax, [q + 8] - sbb rax, [rsi + 8] - mov [rdi + 8], rax - - mov rax, [q + 16] - sbb rax, [rsi + 16] - mov [rdi + 16], rax - - mov rax, [q + 24] - sbb rax, [rsi + 24] - mov [rdi + 24], rax - - ret - - - - - - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; mul -;;;;;;;;;;;;;;;;;;;;;; -; Multiplies two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result -; [rdi] = [rsi] * [rdi] -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_mul: - mov r8, [rsi] - mov r9, [rdx] - bt r8, 63 ; Check if is short first operand - jc mul_l1 - bt r9, 63 ; Check if is short second operand - jc mul_s1l2 - -mul_s1s2: ; Both operands are short - - xor rax, rax - mov eax, r8d - imul r9d - jo mul_manageOverflow ; rsi already is the 64bits result - - mov [rdi], rax ; not necessary to adjust so just save and return - -mul_manageOverflow: ; Do the operation in 64 bits - push rsi - movsx rax, r8d - movsx rcx, r9d - imul rcx - mov rsi, rax - call rawCopyS2L - pop rsi - - ret - -mul_l1: - bt r9, 63 ; Check if is short second operand - jc mul_l1l2 - -;;;;;;;; -mul_l1s2: - bt r8, 62 ; check if montgomery first - jc mul_l1ms2 -mul_l1ns2: - bt r9, 62 ; check if montgomery first - jc mul_l1ns2m -mul_l1ns2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - push rsi - add rsi, 8 - movsx rdx, r9d - add rdi, 8 - cmp rdx, 0 - - jns tmp5 - neg rdx - call rawMontgomeryMul1 - mov rsi, rdi - call rawNegL - sub rdi, 8 - pop rsi - - jmp tmp6 -tmp5: - call rawMontgomeryMul1 - sub rdi, 8 - pop rsi -tmp6: - - - - push rsi - add rdi, 8 - mov rsi, rdi - lea rdx, [R3] - call rawMontgomeryMul - sub rdi, 8 - pop rsi - - ret - - -mul_l1ns2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - - -mul_l1ms2: - bt r9, 62 ; check if montgomery second - jc mul_l1ms2m -mul_l1ms2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - push rsi - add rsi, 8 - movsx rdx, r9d - add rdi, 8 - cmp rdx, 0 - - jns tmp7 - neg rdx - call rawMontgomeryMul1 - mov rsi, rdi - call rawNegL - sub rdi, 8 - pop rsi - - jmp tmp8 -tmp7: - call rawMontgomeryMul1 - sub rdi, 8 - pop rsi -tmp8: - - - ret - -mul_l1ms2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - - -;;;;;;;; -mul_s1l2: - bt r8, 62 ; check if montgomery first - jc mul_s1ml2 -mul_s1nl2: - bt r9, 62 ; check if montgomery first - jc mul_s1nl2m -mul_s1nl2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - push rsi - lea rsi, [rdx + 8] - movsx rdx, r8d - add rdi, 8 - cmp rdx, 0 - - jns tmp9 - neg rdx - call rawMontgomeryMul1 - mov rsi, rdi - call rawNegL - sub rdi, 8 - pop rsi - - jmp tmp10 -tmp9: - call rawMontgomeryMul1 - sub rdi, 8 - pop rsi -tmp10: - - - - push rsi - add rdi, 8 - mov rsi, rdi - lea rdx, [R3] - call rawMontgomeryMul - sub rdi, 8 - pop rsi - - ret - -mul_s1nl2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - push rsi - lea rsi, [rdx + 8] - movsx rdx, r8d - add rdi, 8 - cmp rdx, 0 - - jns tmp11 - neg rdx - call rawMontgomeryMul1 - mov rsi, rdi - call rawNegL - sub rdi, 8 - pop rsi - - jmp tmp12 -tmp11: - call rawMontgomeryMul1 - sub rdi, 8 - pop rsi -tmp12: - - - ret - -mul_s1ml2: - bt r9, 62 ; check if montgomery first - jc mul_s1ml2m -mul_s1ml2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - -mul_s1ml2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - -;;;; -mul_l1l2: - bt r8, 62 ; check if montgomery first - jc mul_l1ml2 -mul_l1nl2: - bt r9, 62 ; check if montgomery second - jc mul_l1nl2m -mul_l1nl2n: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - - push rsi - add rdi, 8 - mov rsi, rdi - lea rdx, [R3] - call rawMontgomeryMul - sub rdi, 8 - pop rsi - - ret - -mul_l1nl2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - -mul_l1ml2: - bt r9, 62 ; check if montgomery seconf - jc mul_l1ml2m -mul_l1ml2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - -mul_l1ml2m: - mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 - - add rdi, 8 - add rsi, 8 - add rdx, 8 - call rawMontgomeryMul - sub rdi, 8 - sub rsi, 8 - - ret - - - - - - - - - - - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; and -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_band: - mov r8, [rsi] - mov r9, [rdx] - bt r8, 63 ; Check if is short first operand - jc and_l1 - bt r9, 63 ; Check if is short second operand - jc and_s1l2 - -and_s1s2: - - cmp r8d, 0 - - js tmp13 - - cmp r9d, 0 - js tmp13 - xor rdx, rdx ; both ops are positive so do the op and return - mov edx, r8d - and edx, r9d - mov [rdi], rdx ; not necessary to adjust so just save and return - ret - -tmp13: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - - -and_l1: - bt r9, 63 ; Check if is short second operand - jc and_l1l2 - - -and_l1s2: - bt r8, 62 ; check if montgomery first - jc and_l1ms2 -and_l1ns2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp r9d, 0 - - js tmp14 - movsx rax, r9d - and rax, [rsi +8] - mov [rdi+8], rax - - xor rax, rax - and rax, [rsi + 16]; - - mov [rdi + 16 ], rax; - - xor rax, rax - and rax, [rsi + 24]; - - mov [rdi + 24 ], rax; - - xor rax, rax - and rax, [rsi + 32]; - - and rax, [lboMask] ; - - mov [rdi + 32 ], rax; - - ret - -tmp14: - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - -and_l1ms2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push r9 ; r9 is used in montgomery so we need to save it - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - pop r9 - - cmp r9d, 0 - - js tmp15 - movsx rax, r9d - and rax, [rsi +8] - mov [rdi+8], rax - - xor rax, rax - and rax, [rsi + 16]; - - mov [rdi + 16 ], rax; - - xor rax, rax - and rax, [rsi + 24]; - - mov [rdi + 24 ], rax; - - xor rax, rax - and rax, [rsi + 32]; - - and rax, [lboMask] ; - - mov [rdi + 32 ], rax; - - ret - -tmp15: - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - -and_s1l2: - bt r9, 62 ; check if montgomery first - jc and_s1l2m -and_s1l2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp r8d, 0 - - js tmp16 - movsx rax, r8d - and rax, [rdx +8] - mov [rdi+8], rax - - xor rax, rax - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - xor rax, rax - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - xor rax, rax - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - -tmp16: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - -and_s1l2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push r8 ; r8 is used in montgomery so we need to save it - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - pop r8 - - cmp r8d, 0 - - js tmp17 - movsx rax, r8d - and rax, [rdx +8] - mov [rdi+8], rax - - xor rax, rax - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - xor rax, rax - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - xor rax, rax - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - -tmp17: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - -and_l1l2: - bt r8, 62 ; check if montgomery first - jc and_l1ml2 - bt r9, 62 ; check if montgomery first - jc and_l1nl2m -and_l1nl2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -and_l1nl2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -and_l1ml2: - bt r9, 62 ; check if montgomery first - jc and_l1ml2m -and_l1ml2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -and_l1ml2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - - - mov rax, [rsi + 8] - and rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - and rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - and rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - and rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - -;;;;;;;;;;;;;;;;;;;;;; -; or -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_bor: - mov r8, [rsi] - mov r9, [rdx] - bt r8, 63 ; Check if is short first operand - jc or_l1 - bt r9, 63 ; Check if is short second operand - jc or_s1l2 - -or_s1s2: - - cmp r8d, 0 - - js tmp18 - - cmp r9d, 0 - js tmp18 - xor rdx, rdx ; both ops are positive so do the op and return - mov edx, r8d - or edx, r9d - mov [rdi], rdx ; not necessary to adjust so just save and return - ret - -tmp18: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - - -or_l1: - bt r9, 63 ; Check if is short second operand - jc or_l1l2 - - -or_l1s2: - bt r8, 62 ; check if montgomery first - jc or_l1ms2 -or_l1ns2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp r9d, 0 - - js tmp19 - movsx rax, r9d - or rax, [rsi +8] - mov [rdi+8], rax - - xor rax, rax - or rax, [rsi + 16]; - - mov [rdi + 16 ], rax; - - xor rax, rax - or rax, [rsi + 24]; - - mov [rdi + 24 ], rax; - - xor rax, rax - or rax, [rsi + 32]; - - and rax, [lboMask] ; - - mov [rdi + 32 ], rax; - - ret - -tmp19: - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - -or_l1ms2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push r9 ; r9 is used in montgomery so we need to save it - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - pop r9 - - cmp r9d, 0 - - js tmp20 - movsx rax, r9d - or rax, [rsi +8] - mov [rdi+8], rax - - xor rax, rax - or rax, [rsi + 16]; - - mov [rdi + 16 ], rax; - - xor rax, rax - or rax, [rsi + 24]; - - mov [rdi + 24 ], rax; - - xor rax, rax - or rax, [rsi + 32]; - - and rax, [lboMask] ; - - mov [rdi + 32 ], rax; - - ret - -tmp20: - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - -or_s1l2: - bt r9, 62 ; check if montgomery first - jc or_s1l2m -or_s1l2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp r8d, 0 - - js tmp21 - movsx rax, r8d - or rax, [rdx +8] - mov [rdi+8], rax - - xor rax, rax - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - xor rax, rax - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - xor rax, rax - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - -tmp21: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - -or_s1l2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push r8 ; r8 is used in montgomery so we need to save it - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - pop r8 - - cmp r8d, 0 - - js tmp22 - movsx rax, r8d - or rax, [rdx +8] - mov [rdi+8], rax - - xor rax, rax - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - xor rax, rax - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - xor rax, rax - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - -tmp22: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - -or_l1l2: - bt r8, 62 ; check if montgomery first - jc or_l1ml2 - bt r9, 62 ; check if montgomery first - jc or_l1nl2m -or_l1nl2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -or_l1nl2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -or_l1ml2: - bt r9, 62 ; check if montgomery first - jc or_l1ml2m -or_l1ml2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -or_l1ml2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - - - mov rax, [rsi + 8] - or rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - or rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - or rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - or rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - -;;;;;;;;;;;;;;;;;;;;;; -; xor -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_bxor: - mov r8, [rsi] - mov r9, [rdx] - bt r8, 63 ; Check if is short first operand - jc xor_l1 - bt r9, 63 ; Check if is short second operand - jc xor_s1l2 - -xor_s1s2: - - cmp r8d, 0 - - js tmp23 - - cmp r9d, 0 - js tmp23 - xor rdx, rdx ; both ops are positive so do the op and return - mov edx, r8d - xor edx, r9d - mov [rdi], rdx ; not necessary to adjust so just save and return - ret - -tmp23: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - - -xor_l1: - bt r9, 63 ; Check if is short second operand - jc xor_l1l2 - - -xor_l1s2: - bt r8, 62 ; check if montgomery first - jc xor_l1ms2 -xor_l1ns2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp r9d, 0 - - js tmp24 - movsx rax, r9d - xor rax, [rsi +8] - mov [rdi+8], rax - - xor rax, rax - xor rax, [rsi + 16]; - - mov [rdi + 16 ], rax; - - xor rax, rax - xor rax, [rsi + 24]; - - mov [rdi + 24 ], rax; - - xor rax, rax - xor rax, [rsi + 32]; - - and rax, [lboMask] ; - - mov [rdi + 32 ], rax; - - ret - -tmp24: - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - -xor_l1ms2: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push r9 ; r9 is used in montgomery so we need to save it - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - pop r9 - - cmp r9d, 0 - - js tmp25 - movsx rax, r9d - xor rax, [rsi +8] - mov [rdi+8], rax - - xor rax, rax - xor rax, [rsi + 16]; - - mov [rdi + 16 ], rax; - - xor rax, rax - xor rax, [rsi + 24]; - - mov [rdi + 24 ], rax; - - xor rax, rax - xor rax, [rsi + 32]; - - and rax, [lboMask] ; - - mov [rdi + 32 ], rax; - - ret - -tmp25: - push rdi - push rsi - mov rdi, rdx - movsx rsi, r9d - call rawCopyS2L - mov rdx, rdi - pop rsi - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - -xor_s1l2: - bt r9, 62 ; check if montgomery first - jc xor_s1l2m -xor_s1l2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - cmp r8d, 0 - - js tmp26 - movsx rax, r8d - xor rax, [rdx +8] - mov [rdi+8], rax - - xor rax, rax - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - xor rax, rax - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - xor rax, rax - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - -tmp26: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - -xor_s1l2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push r8 ; r8 is used in montgomery so we need to save it - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - pop r8 - - cmp r8d, 0 - - js tmp27 - movsx rax, r8d - xor rax, [rdx +8] - mov [rdi+8], rax - - xor rax, rax - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - xor rax, rax - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - xor rax, rax - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - -tmp27: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - -xor_l1l2: - bt r8, 62 ; check if montgomery first - jc xor_l1ml2 - bt r9, 62 ; check if montgomery first - jc xor_l1nl2m -xor_l1nl2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -xor_l1nl2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -xor_l1ml2: - bt r9, 62 ; check if montgomery first - jc xor_l1ml2m -xor_l1ml2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - -xor_l1ml2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - - - mov rax, [rsi + 8] - xor rax, [rdx + 8] - - mov [rdi + 8 ], rax - - mov rax, [rsi + 16] - xor rax, [rdx + 16] - - mov [rdi + 16 ], rax - - mov rax, [rsi + 24] - xor rax, [rdx + 24] - - mov [rdi + 24 ], rax - - mov rax, [rsi + 32] - xor rax, [rdx + 32] - - and rax, [lboMask] - - mov [rdi + 32 ], rax - - ret - - - - - - - - - - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; eq -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result can be zero or one. -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_eq: - sub rsp, 40 ; Save space for the result of the substraction - push rdi ; Save rdi - lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 - call Fr_sub ; Do a substraction - call Fr_toNormal ; Convert it to normal - pop rdi - - mov rax, [rsp] ; We already poped do no need to add 8 - bt rax, 63 ; check is result is long - jc eq_longCmp - -eq_shortCmp: - cmp eax, 0 - je eq_s_eq - js eq_s_lt -eq_s_gt: - - mov qword [rdi], 0 - add rsp, 40 - ret - -eq_s_lt: - - mov qword [rdi], 0 - add rsp, 40 - ret - -eq_s_eq: - - mov qword [rdi], 1 - add rsp, 40 - ret - - -eq_longCmp: - - - cmp qword [rsp + 32], 0 - jnz eq_neq - - cmp qword [rsp + 24], 0 - jnz eq_neq - - cmp qword [rsp + 16], 0 - jnz eq_neq - - cmp qword [rsp + 8], 0 - jnz eq_neq - -eq_eq: - - mov qword [rdi], 1 - add rsp, 40 - ret - -eq_neq: - - mov qword [rdi], 0 - add rsp, 40 - ret - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; neq -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result can be zero or one. -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_neq: - sub rsp, 40 ; Save space for the result of the substraction - push rdi ; Save rdi - lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 - call Fr_sub ; Do a substraction - call Fr_toNormal ; Convert it to normal - pop rdi - - mov rax, [rsp] ; We already poped do no need to add 8 - bt rax, 63 ; check is result is long - jc neq_longCmp - -neq_shortCmp: - cmp eax, 0 - je neq_s_eq - js neq_s_lt -neq_s_gt: - - mov qword [rdi], 1 - add rsp, 40 - ret - -neq_s_lt: - - mov qword [rdi], 1 - add rsp, 40 - ret - -neq_s_eq: - - mov qword [rdi], 0 - add rsp, 40 - ret - - -neq_longCmp: - - - cmp qword [rsp + 32], 0 - jnz neq_neq - - cmp qword [rsp + 24], 0 - jnz neq_neq - - cmp qword [rsp + 16], 0 - jnz neq_neq - - cmp qword [rsp + 8], 0 - jnz neq_neq - -neq_eq: - - mov qword [rdi], 0 - add rsp, 40 - ret - -neq_neq: - - mov qword [rdi], 1 - add rsp, 40 - ret - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; lt -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result can be zero or one. -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_lt: - sub rsp, 40 ; Save space for the result of the substraction - push rdi ; Save rdi - lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 - call Fr_sub ; Do a substraction - call Fr_toNormal ; Convert it to normal - pop rdi - - mov rax, [rsp] ; We already poped do no need to add 8 - bt rax, 63 ; check is result is long - jc lt_longCmp - -lt_shortCmp: - cmp eax, 0 - je lt_s_eq - js lt_s_lt -lt_s_gt: - - mov qword [rdi], 0 - add rsp, 40 - ret - -lt_s_lt: - - mov qword [rdi], 1 - add rsp, 40 - ret - -lt_s_eq: - - mov qword [rdi], 0 - add rsp, 40 - ret - - -lt_longCmp: - - - cmp qword [rsp + 32], 0 - jnz lt_neq - - cmp qword [rsp + 24], 0 - jnz lt_neq - - cmp qword [rsp + 16], 0 - jnz lt_neq - - cmp qword [rsp + 8], 0 - jnz lt_neq - -lt_eq: - - - - mov qword [rdi], 0 - add rsp, 40 - ret - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp29 ; half e1-e2 is neg => e1 < e2 - jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp29 ; half e1-e2 is neg => e1 < e2 - jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp29 ; half e1-e2 is neg => e1 < e2 - jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp29 ; half e1-e2 is neg => e1 < e2 - jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp28: - - mov qword [rdi], 0 - add rsp, 40 - ret - -tmp29: - - mov qword [rdi], 1 - add rsp, 40 - ret - - -lt_neq: - - - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp31 ; half e1-e2 is neg => e1 < e2 - jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp31 ; half e1-e2 is neg => e1 < e2 - jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp31 ; half e1-e2 is neg => e1 < e2 - jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp31 ; half e1-e2 is neg => e1 < e2 - jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp30: - - mov qword [rdi], 0 - add rsp, 40 - ret - -tmp31: - - mov qword [rdi], 1 - add rsp, 40 - ret - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; gt -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result can be zero or one. -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_gt: - sub rsp, 40 ; Save space for the result of the substraction - push rdi ; Save rdi - lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 - call Fr_sub ; Do a substraction - call Fr_toNormal ; Convert it to normal - pop rdi - - mov rax, [rsp] ; We already poped do no need to add 8 - bt rax, 63 ; check is result is long - jc gt_longCmp - -gt_shortCmp: - cmp eax, 0 - je gt_s_eq - js gt_s_lt -gt_s_gt: - - mov qword [rdi], 1 - add rsp, 40 - ret - -gt_s_lt: - - mov qword [rdi], 0 - add rsp, 40 - ret - -gt_s_eq: - - mov qword [rdi], 0 - add rsp, 40 - ret - - -gt_longCmp: - - - cmp qword [rsp + 32], 0 - jnz gt_neq - - cmp qword [rsp + 24], 0 - jnz gt_neq - - cmp qword [rsp + 16], 0 - jnz gt_neq - - cmp qword [rsp + 8], 0 - jnz gt_neq - -gt_eq: - - - - mov qword [rdi], 0 - add rsp, 40 - ret - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp33 ; half e1-e2 is neg => e1 < e2 - jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp33 ; half e1-e2 is neg => e1 < e2 - jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp33 ; half e1-e2 is neg => e1 < e2 - jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp33 ; half e1-e2 is neg => e1 < e2 - jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp32: - - mov qword [rdi], 1 - add rsp, 40 - ret - -tmp33: - - mov qword [rdi], 0 - add rsp, 40 - ret - - -gt_neq: - - - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp35 ; half e1-e2 is neg => e1 < e2 - jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp35 ; half e1-e2 is neg => e1 < e2 - jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp35 ; half e1-e2 is neg => e1 < e2 - jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp35 ; half e1-e2 is neg => e1 < e2 - jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp34: - - mov qword [rdi], 1 - add rsp, 40 - ret - -tmp35: - - mov qword [rdi], 0 - add rsp, 40 - ret - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; leq -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result can be zero or one. -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_leq: - sub rsp, 40 ; Save space for the result of the substraction - push rdi ; Save rdi - lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 - call Fr_sub ; Do a substraction - call Fr_toNormal ; Convert it to normal - pop rdi - - mov rax, [rsp] ; We already poped do no need to add 8 - bt rax, 63 ; check is result is long - jc leq_longCmp - -leq_shortCmp: - cmp eax, 0 - je leq_s_eq - js leq_s_lt -leq_s_gt: - - mov qword [rdi], 0 - add rsp, 40 - ret - -leq_s_lt: - - mov qword [rdi], 1 - add rsp, 40 - ret - -leq_s_eq: - - mov qword [rdi], 1 - add rsp, 40 - ret - - -leq_longCmp: - - - cmp qword [rsp + 32], 0 - jnz leq_neq - - cmp qword [rsp + 24], 0 - jnz leq_neq - - cmp qword [rsp + 16], 0 - jnz leq_neq - - cmp qword [rsp + 8], 0 - jnz leq_neq - -leq_eq: - - - - mov qword [rdi], 1 - add rsp, 40 - ret - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp37 ; half e1-e2 is neg => e1 < e2 - jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp37 ; half e1-e2 is neg => e1 < e2 - jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp37 ; half e1-e2 is neg => e1 < e2 - jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp37 ; half e1-e2 is neg => e1 < e2 - jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp36: - - mov qword [rdi], 0 - add rsp, 40 - ret - -tmp37: - - mov qword [rdi], 1 - add rsp, 40 - ret - - -leq_neq: - - - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp39 ; half e1-e2 is neg => e1 < e2 - jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp39 ; half e1-e2 is neg => e1 < e2 - jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp39 ; half e1-e2 is neg => e1 < e2 - jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp39 ; half e1-e2 is neg => e1 < e2 - jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp38: - - mov qword [rdi], 0 - add rsp, 40 - ret - -tmp39: - - mov qword [rdi], 1 - add rsp, 40 - ret - - - - - - -;;;;;;;;;;;;;;;;;;;;;; -; geq -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdx <= Pointer to element 2 -; rdi <= Pointer to result can be zero or one. -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_geq: - sub rsp, 40 ; Save space for the result of the substraction - push rdi ; Save rdi - lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 - call Fr_sub ; Do a substraction - call Fr_toNormal ; Convert it to normal - pop rdi - - mov rax, [rsp] ; We already poped do no need to add 8 - bt rax, 63 ; check is result is long - jc geq_longCmp - -geq_shortCmp: - cmp eax, 0 - je geq_s_eq - js geq_s_lt -geq_s_gt: - - mov qword [rdi], 1 - add rsp, 40 - ret - -geq_s_lt: - - mov qword [rdi], 0 - add rsp, 40 - ret - -geq_s_eq: - - mov qword [rdi], 1 - add rsp, 40 - ret - - -geq_longCmp: - - - cmp qword [rsp + 32], 0 - jnz geq_neq - - cmp qword [rsp + 24], 0 - jnz geq_neq - - cmp qword [rsp + 16], 0 - jnz geq_neq - - cmp qword [rsp + 8], 0 - jnz geq_neq - -geq_eq: - - - - mov qword [rdi], 1 - add rsp, 40 - ret - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp41 ; half e1-e2 is neg => e1 < e2 - jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp41 ; half e1-e2 is neg => e1 < e2 - jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp41 ; half e1-e2 is neg => e1 < e2 - jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp41 ; half e1-e2 is neg => e1 < e2 - jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp40: - - mov qword [rdi], 1 - add rsp, 40 - ret - -tmp41: - - mov qword [rdi], 0 - add rsp, 40 - ret - - -geq_neq: - - - - - - - - - mov rax, [rsp + 32] - cmp [half + 24], rax ; comare with (q-1)/2 - jc tmp43 ; half e1-e2 is neg => e1 < e2 - jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 24] - cmp [half + 16], rax ; comare with (q-1)/2 - jc tmp43 ; half e1-e2 is neg => e1 < e2 - jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 16] - cmp [half + 8], rax ; comare with (q-1)/2 - jc tmp43 ; half e1-e2 is neg => e1 < e2 - jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2 - - mov rax, [rsp + 8] - cmp [half + 0], rax ; comare with (q-1)/2 - jc tmp43 ; half e1-e2 is neg => e1 < e2 - jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2 - - ; half == rax => e1-e2 is pos => e1 > e2 -tmp42: - - mov qword [rdi], 1 - add rsp, 40 - ret - -tmp43: - - mov qword [rdi], 0 - add rsp, 40 - ret - - - - - - - - - section .data -Fr_q: - dd 0 - dd 0x80000000 -q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029 -half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014 -R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5 -R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c -lboMask dq 0x1fffffffffffffff - diff --git a/c/buildasm/fr.asm.ejs b/c/buildasm/fr.asm.ejs index b46b618..16066d1 100644 --- a/c/buildasm/fr.asm.ejs +++ b/c/buildasm/fr.asm.ejs @@ -4,15 +4,20 @@ global <%=name%>_sub global <%=name%>_neg global <%=name%>_mul + global <%=name%>_square global <%=name%>_band global <%=name%>_bor global <%=name%>_bxor + global <%=name%>_bnot global <%=name%>_eq global <%=name%>_neq global <%=name%>_lt global <%=name%>_gt global <%=name%>_leq global <%=name%>_geq + global <%=name%>_land + global <%=name%>_lor + global <%=name%>_lnot global <%=name%>_toNormal global <%=name%>_toMontgomery global <%=name%>_q @@ -28,6 +33,7 @@ <%- include('mul.asm.ejs'); %> <%- include('binops.asm.ejs'); %> <%- include('cmpops.asm.ejs'); %> +<%- include('logicalops.asm.ejs'); %> section .data <%=name%>_q: diff --git a/c/buildasm/fr.c b/c/buildasm/fr.c deleted file mode 100644 index bfd54cb..0000000 --- a/c/buildasm/fr.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "fr.h" -#include -#include -#include - -void Fr_str2element(PFrElement pE, char *s) { - mpz_t r; - mpz_init(r); - mpz_set_str(r, s, 10); - pE->type = Fr_LONG; - for (int i=0; ilongVal[i] = 0; - mpz_export((void *)pE->longVal, NULL, -1, 8, -1, 0, r); -} - -char *Fr_element2str(PFrElement pE) { - mpz_t r; - mpz_t q; - if (pE->type == Fr_SHORT) { - if (pE->shortVal>=0) { - char *r = new char[32]; - sprintf(r, "%d", pE->shortVal); - return r; - } else { - mpz_init(q); - mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); - mpz_init_set_si(r, pE->shortVal); - mpz_add(r, r, q); - mpz_clear(q); - } - } else { - Fr_toNormal(pE); - mpz_init(r); - mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)pE->longVal); - } - char *res = mpz_get_str (0, 10, r); - mpz_clear(r); - return res; -} - diff --git a/c/buildasm/fr.c.ejs b/c/buildasm/fr.c.ejs index 8149ba8..5ff7cfe 100644 --- a/c/buildasm/fr.c.ejs +++ b/c/buildasm/fr.c.ejs @@ -15,7 +15,7 @@ void <%=name%>_str2element(P<%=name%>Element pE, char *s) { char *<%=name%>_element2str(P<%=name%>Element pE) { mpz_t r; mpz_t q; - if (pE->type == <%=name%>_SHORT) { + if (!(pE->type & <%=name%>_LONG)) { if (pE->shortVal>=0) { char *r = new char[32]; sprintf(r, "%d", pE->shortVal); @@ -37,3 +37,70 @@ char *<%=name%>_element2str(P<%=name%>Element pE) { return res; } +void <%=name%>_toMpz(mpz_t r, P<%=name%>Element pE) { + mpz_t q; + <%=name%>_toNormal(pE); + if (!(pE->type & <%=name%>_LONG)) { + mpz_set_si(r, pE->shortVal); + if (pE->shortVal<0) { + mpz_init(q); + mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + mpz_add(r, r, q); + } + } else { + mpz_import(r, <%=name%>_N64, -1, 8, -1, 0, (const void *)pE->longVal); + } +} + +void <%=name%>_fromMpz(P<%=name%>Element pE, mpz_t v) { + if (mpz_fits_sint_p(v)) { + pE->type = <%=name%>_SHORT; + pE->shortVal = mpz_get_si(v); + } else { + pE->type = <%=name%>_LONG; + for (int i=0; i<<%=name%>_N64; i++) pE->longVal[i] = 0; + mpz_export((void *)(pE->longVal), NULL, -1, 8, -1, 0, v); + } +} + +void <%=name%>_idiv(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { + mpz_t ma; + mpz_t mb; + mpz_t mr; + mpz_init(ma); + mpz_init(mb); + mpz_init(mr); + + <%=name%>_toMpz(ma, a); + // char *s1 = mpz_get_str (0, 10, ma); + // printf("s1 %s\n", s1); + <%=name%>_toMpz(mb, b); + // char *s2 = mpz_get_str (0, 10, mb); + // printf("s2 %s\n", s2); + mpz_fdiv_q(mr, ma, mb); + // char *sr = mpz_get_str (0, 10, mr); + // printf("r %s\n", sr); + <%=name%>_fromMpz(r, mr); +} + +void <%=name%>_inv(P<%=name%>Element r, P<%=name%>Element a) { + mpz_t ma; + mpz_t mr; + mpz_t q; + mpz_init(ma); + mpz_init(mr); + + mpz_init(q); + mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); + + <%=name%>_toMpz(ma, a); + mpz_invert(mr, ma, q); + <%=name%>_fromMpz(r, mr); +} + +void <%=name%>_div(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b) { + <%=name%>Element tmp; + <%=name%>_inv(&tmp, b); + <%=name%>_mul(r, a, &tmp); +} + diff --git a/c/buildasm/fr.h.ejs b/c/buildasm/fr.h.ejs index eda5d73..3352b8f 100644 --- a/c/buildasm/fr.h.ejs +++ b/c/buildasm/fr.h.ejs @@ -14,18 +14,27 @@ extern "C" void <%=name%>_add(P<%=name%>Element r, P<%=name%>Element a, P<%=name extern "C" void <%=name%>_sub(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_neg(P<%=name%>Element r, P<%=name%>Element a); extern "C" void <%=name%>_mul(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +extern "C" void <%=name%>_square(P<%=name%>Element r, P<%=name%>Element a); extern "C" void <%=name%>_band(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_bor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_bxor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +extern "C" void <%=name%>_bnot(P<%=name%>Element r, P<%=name%>Element a); extern "C" void <%=name%>_eq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_neq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_lt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_gt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_leq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); extern "C" void <%=name%>_geq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +extern "C" void <%=name%>_land(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +extern "C" void <%=name%>_lor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +extern "C" void <%=name%>_lnot(P<%=name%>Element r, P<%=name%>Element a); extern "C" void <%=name%>_toNormal(P<%=name%>Element pE); extern "C" void <%=name%>_toMontgomery(P<%=name%>Element pE); void <%=name%>_str2element(P<%=name%>Element pE, char *s); char *<%=name%>_element2str(P<%=name%>Element pE); +void <%=name%>_idiv(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); +void <%=name%>_inv(P<%=name%>Element r, P<%=name%>Element a); +void <%=name%>_div(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); + extern <%=name%>Element <%=name%>_q; diff --git a/c/buildasm/fr.o b/c/buildasm/fr.o deleted file mode 100644 index 3b81456..0000000 Binary files a/c/buildasm/fr.o and /dev/null differ diff --git a/c/buildasm/logicalops.asm.ejs b/c/buildasm/logicalops.asm.ejs new file mode 100644 index 0000000..1fbbed3 --- /dev/null +++ b/c/buildasm/logicalops.asm.ejs @@ -0,0 +1,82 @@ + + +<% function isTrue(resReg, srcPtrReg) { %> +<% const longIsZero = global.tmpLabel() %> +<% const retOne = global.tmpLabel("retOne") %> +<% const retZero = global.tmpLabel("retZero") %> +<% const done = global.tmpLabel("done") %> + + mov rax, [<%=srcPtrReg%>] + bt rax, 63 + jc <%= longIsZero %> + + test eax, eax + jz <%= retZero %> + jmp <%= retOne %> + +<%= longIsZero %>: +<% for (let i=0; i + mov rax, [<%= srcPtrReg + " + " +(i*8+8) %>] + test rax, rax + jnz <%= retOne %> +<% } %> + +<%= retZero %>: + mov qword <%=resReg%>, 0 + jmp <%= done %> + +<%= retOne %>: + mov qword <%=resReg%>, 1 + +<%= done %>: +<% } %> + + + + +<% function logicalOp(op) { %> +;;;;;;;;;;;;;;;;;;;;;; +; l<%= op %> +;;;;;;;;;;;;;;;;;;;;;; +; Logical <%= op %> between two elements +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result zero or one +; Modified Registers: +; rax, rcx, r8 +;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_l<%=op%>: +<%= isTrue("r8", "rsi") %> +<%= isTrue("rcx", "rdx") %> + <%=op%> rcx, r8 + mov [rdi], rcx + ret +<% } %> + +<% logicalOp("and"); %> +<% logicalOp("or"); %> + +;;;;;;;;;;;;;;;;;;;;;; +; lnot +;;;;;;;;;;;;;;;;;;;;;; +; Do the logical not of an element +; Params: +; rsi <= Pointer to element to be tested +; rdi <= Pointer to result one if element1 is zero and zero otherwise +; Modified Registers: +; rax, rax, r8 +;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_lnot: +<%= isTrue("rcx", "rsi") %> + test rcx, rcx + + jz lnot_retOne +lnot_retZero: + mov qword [rdi], 0 + ret +lnot_retOne: + mov qword [rdi], 1 + ret + + diff --git a/c/buildasm/main b/c/buildasm/main deleted file mode 100755 index 129bb83..0000000 Binary files a/c/buildasm/main and /dev/null differ diff --git a/c/buildasm/main.dSYM/Contents/Info.plist b/c/buildasm/main.dSYM/Contents/Info.plist deleted file mode 100644 index fe7fecd..0000000 --- a/c/buildasm/main.dSYM/Contents/Info.plist +++ /dev/null @@ -1,20 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleIdentifier - com.apple.xcode.dsym.main - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - dSYM - CFBundleSignature - ???? - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - - diff --git a/c/buildasm/main.dSYM/Contents/Resources/DWARF/main b/c/buildasm/main.dSYM/Contents/Resources/DWARF/main deleted file mode 100644 index c6a803b..0000000 Binary files a/c/buildasm/main.dSYM/Contents/Resources/DWARF/main and /dev/null differ diff --git a/c/buildasm/montgomery.asm.ejs b/c/buildasm/montgomery.asm.ejs index 1652c7a..48e7a1a 100644 --- a/c/buildasm/montgomery.asm.ejs +++ b/c/buildasm/montgomery.asm.ejs @@ -144,6 +144,47 @@ montgomeryTemplate("rawMontgomeryMul", function(i, r0, r1, r2) { }) %> +;;;;;;;;;;;;;;;;;;;;;; +; rawMontgomerySquare +;;;;;;;;;;;;;;;;;;;;;; +; Square an element +; Params: +; rsi <= Pointer to the long data of element 1 +; rdi <= Pointer to the long data of result +; Modified registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +<% +montgomeryTemplate("rawMontgomerySquare", function(i, r0, r1, r2) { + // Same Digit + for (let o1=Math.max(0, i-n64+1); (o1<((i+1)>>1) )&&(o1 + mov rax, [rsi + <%= 8*o1 %>] + mul qword [rsi + <%= 8*o2 %>] + add <%= r0 %>, rax + adc <%= r1 %>, rdx + adc <%= r2 %>, 0x0 + add <%= r0 %>, rax + adc <%= r1 %>, rdx + adc <%= r2 %>, 0x0 +<% + } // Same digit +%> + +<% if (i%2 == 0) { %> + mov rax, [rsi + <%= 8*(i/2) %>] + mul rax + add <%= r0 %>, rax + adc <%= r1 %>, rdx + adc <%= r2 %>, 0x0 +<% } %> + +<% +}) +%> + + ;;;;;;;;;;;;;;;;;;;;;; ; rawMontgomeryMul1 ;;;;;;;;;;;;;;;;;;;;;; diff --git a/c/buildasm/mul.asm.ejs b/c/buildasm/mul.asm.ejs index 88ecd48..fca655d 100644 --- a/c/buildasm/mul.asm.ejs +++ b/c/buildasm/mul.asm.ejs @@ -16,6 +16,24 @@ mul_manageOverflow: ; Do the operation in 64 bits pop rsi <% } %> +<% function squareS1() { %> + xor rax, rax + mov eax, r8d + imul eax + jo square_manageOverflow ; rsi already is the 64bits result + + mov [rdi], rax ; not necessary to adjust so just save and return + +square_manageOverflow: ; Do the operation in 64 bits + push rsi + movsx rax, r8d + imul rax + mov rsi, rax + call rawCopyS2L + pop rsi +<% } %> + + <% function mulL1S2(t) { %> push rsi add rsi, 8 @@ -73,6 +91,15 @@ mul_manageOverflow: ; Do the operation in 64 bits sub rsi, 8 <% } %> + +<% function squareL1() { %> + add rdi, 8 + add rsi, 8 + call rawMontgomerySquare + sub rdi, 8 + sub rsi, 8 +<% } %> + <% function mulR3() { %> push rsi add rdi, 8 @@ -84,6 +111,43 @@ mul_manageOverflow: ; Do the operation in 64 bits <% } %> + +;;;;;;;;;;;;;;;;;;;;;; +; square +;;;;;;;;;;;;;;;;;;;;;; +; Squares a field element +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; [rdi] = [rsi] * [rsi] +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +<%=name%>_square: + mov r8, [rsi] + bt r8, 63 ; Check if is short first operand + jc square_l1 + +square_s1: ; Both operands are short +<%= squareS1() %> + ret + +square_l1: + bt r8, 62 ; check if montgomery first + jc square_l1m +square_l1n: +<%= global.setTypeDest("0xC0"); %> +<%= squareL1() %> +<%= mulR3() %> + ret + +square_l1m: +<%= global.setTypeDest("0xC0"); %> +<%= squareL1() %> + ret + + + ;;;;;;;;;;;;;;;;;;;;;; ; mul ;;;;;;;;;;;;;;;;;;;;;; diff --git a/c/buildasm/out.map b/c/buildasm/out.map deleted file mode 100644 index 0792914..0000000 --- a/c/buildasm/out.map +++ /dev/null @@ -1,219 +0,0 @@ -# Path: main -# Arch: x86_64 -# Object files: -[ 0] linker synthesized -[ 1] /var/folders/g_/74y0ll3503d4sm0c64jw432r0000gn/T//cczqYl2H.o -[ 2] fr.o -[ 3] /var/folders/g_/74y0ll3503d4sm0c64jw432r0000gn/T//cc5nHggh.o -[ 4] /usr/local/lib/libgmp.dylib -[ 5] /usr/local/Cellar/gcc/9.2.0_2/lib/gcc/9/libstdc++.dylib -[ 6] /Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/lib/libSystem.tbd -# Sections: -# Address Size Segment Section -0x1000011BD 0x00001C99 __TEXT __text -0x100002E56 0x00000042 __TEXT __stubs -0x100002E98 0x00000074 __TEXT __stub_helper -0x100002F0C 0x0000001B __TEXT __cstring -0x100002F28 0x000000D8 __TEXT __eh_frame -0x100003000 0x00000008 __DATA_CONST __got -0x100004000 0x00000058 __DATA __la_symbol_ptr -0x100004058 0x00000078 __DATA __data -# Symbols: -# Address Size File Name -0x1000011BD 0x000000E2 [ 1] _main -0x10000129F 0x00000023 [ 2] rawCopyS2L -0x1000012C2 0x0000003D [ 2] u64toLong_adjust_neg -0x1000012FF 0x00000301 [ 2] rawMontgomeryMul -0x100001600 0x0000002B [ 2] rawMontgomeryMul_mulM_sq -0x10000162B 0x00000005 [ 2] rawMontgomeryMul_mulM_done -0x100001630 0x0000022C [ 2] rawMontgomeryMul1 -0x10000185C 0x0000002B [ 2] rawMontgomeryMul1_mulM_sq -0x100001887 0x00000005 [ 2] rawMontgomeryMul1_mulM_done -0x10000188C 0x00000218 [ 2] rawFromMontgomery -0x100001AA4 0x0000002B [ 2] rawFromMontgomery_mulM_sq -0x100001ACF 0x00000005 [ 2] rawFromMontgomery_mulM_done -0x100001AD4 0x00000011 [ 2] _Fr_toMontgomery -0x100001AE5 0x00000018 [ 2] toMontgomeryShort -0x100001AFD 0x0000000B [ 2] posMontgomeryShort -0x100001B08 0x00000016 [ 2] negMontgomeryShort -0x100001B1E 0x0000001C [ 2] toMontgomeryLong -0x100001B3A 0x00000001 [ 2] toMontgomery_doNothing -0x100001B3B 0x00000011 [ 2] _Fr_toNormal -0x100001B4C 0x00000010 [ 2] fromMontgomeryLong -0x100001B5C 0x00000001 [ 2] fromMontgomery_doNothing -0x100001B5D 0x00000018 [ 2] _Fr_add -0x100001B75 0x0000000D [ 2] add_s1s2 -0x100001B82 0x00000011 [ 2] add_manageOverflow -0x100001B93 0x0000000B [ 2] add_l1 -0x100001B9E 0x00000007 [ 2] add_l1s2 -0x100001BA5 0x0000002C [ 2] add_l1ns2 -0x100001BD1 0x0000000E [ 2] tmp1 -0x100001BDF 0x00000007 [ 2] add_l1ms2 -0x100001BE6 0x00000031 [ 2] add_l1ms2n -0x100001C17 0x00000024 [ 2] add_l1ms2m -0x100001C3B 0x00000007 [ 2] add_s1l2 -0x100001C42 0x0000002C [ 2] add_s1l2n -0x100001C6E 0x0000000E [ 2] tmp2 -0x100001C7C 0x00000007 [ 2] add_s1l2m -0x100001C83 0x00000037 [ 2] add_s1nl2m -0x100001CBA 0x00000024 [ 2] add_s1ml2m -0x100001CDE 0x00000007 [ 2] add_l1l2 -0x100001CE5 0x00000007 [ 2] add_l1nl2 -0x100001CEC 0x00000024 [ 2] add_l1nl2n -0x100001D10 0x00000037 [ 2] add_l1nl2m -0x100001D47 0x00000007 [ 2] add_l1ml2 -0x100001D4E 0x00000031 [ 2] add_l1ml2n -0x100001D7F 0x00000024 [ 2] add_l1ml2m -0x100001DA3 0x00000066 [ 2] rawAddLL -0x100001E09 0x0000002B [ 2] rawAddLL_sq -0x100001E34 0x00000001 [ 2] rawAddLL_done -0x100001E35 0x0000006A [ 2] rawAddLS -0x100001E9F 0x0000002B [ 2] rawAddLS_sq -0x100001ECA 0x00000001 [ 2] rawAddLS_done -0x100001ECB 0x00000018 [ 2] _Fr_sub -0x100001EE3 0x0000000D [ 2] sub_s1s2 -0x100001EF0 0x00000011 [ 2] sub_manageOverflow -0x100001F01 0x0000000B [ 2] sub_l1 -0x100001F0C 0x00000007 [ 2] sub_l1s2 -0x100001F13 0x0000002C [ 2] sub_l1ns2 -0x100001F3F 0x0000000E [ 2] tmp3 -0x100001F4D 0x00000007 [ 2] sub_l1ms2 -0x100001F54 0x00000031 [ 2] sub_l1ms2n -0x100001F85 0x00000024 [ 2] sub_l1ms2m -0x100001FA9 0x00000007 [ 2] sub_s1l2 -0x100001FB0 0x00000026 [ 2] sub_s1l2n -0x100001FD6 0x0000001A [ 2] tmp4 -0x100001FF0 0x00000007 [ 2] sub_s1l2m -0x100001FF7 0x00000037 [ 2] sub_s1nl2m -0x10000202E 0x00000024 [ 2] sub_s1ml2m -0x100002052 0x00000007 [ 2] sub_l1l2 -0x100002059 0x00000007 [ 2] sub_l1nl2 -0x100002060 0x00000024 [ 2] sub_l1nl2n -0x100002084 0x00000037 [ 2] sub_l1nl2m -0x1000020BB 0x00000007 [ 2] sub_l1ml2 -0x1000020C2 0x00000031 [ 2] sub_l1ml2n -0x1000020F3 0x00000024 [ 2] sub_l1ml2m -0x100002117 0x00000031 [ 2] rawSubLS -0x100002148 0x0000002B [ 2] rawSubLS_aq -0x100002173 0x00000001 [ 2] rawSubLS_done -0x100002174 0x0000002F [ 2] rawSubSL -0x1000021A3 0x0000002B [ 2] rawSubSL_aq -0x1000021CE 0x00000001 [ 2] rawSubSL_done -0x1000021CF 0x0000002F [ 2] rawSubLL -0x1000021FE 0x0000002B [ 2] rawSubLL_aq -0x100002229 0x00000001 [ 2] rawSubLL_done -0x10000222A 0x0000009C [ 2] rawNegLS -0x1000022C6 0x00000001 [ 2] rawNegSL_done -0x1000022C7 0x0000000A [ 2] _Fr_neg -0x1000022D1 0x00000008 [ 2] neg_s -0x1000022D9 0x0000000E [ 2] neg_manageOverflow -0x1000022E7 0x00000019 [ 2] neg_l -0x100002300 0x0000002A [ 2] rawNegL -0x10000232A 0x0000003B [ 2] doNegate -0x100002365 0x00000018 [ 2] _Fr_mul -0x10000237D 0x0000000E [ 2] mul_s1s2 -0x10000238B 0x00000014 [ 2] mul_manageOverflow -0x10000239F 0x0000000B [ 2] mul_l1 -0x1000023AA 0x0000000B [ 2] mul_l1s2 -0x1000023B5 0x00000007 [ 2] mul_l1ns2 -0x1000023BC 0x00000033 [ 2] mul_l1ns2n -0x1000023EF 0x0000000A [ 2] tmp5 -0x1000023F9 0x0000001A [ 2] tmp6 -0x100002413 0x00000024 [ 2] mul_l1ns2m -0x100002437 0x00000007 [ 2] mul_l1ms2 -0x10000243E 0x00000033 [ 2] mul_l1ms2n -0x100002471 0x0000000A [ 2] tmp7 -0x10000247B 0x00000001 [ 2] tmp8 -0x10000247C 0x00000024 [ 2] mul_l1ms2m -0x1000024A0 0x0000000B [ 2] mul_s1l2 -0x1000024AB 0x00000007 [ 2] mul_s1nl2 -0x1000024B2 0x00000033 [ 2] mul_s1nl2n -0x1000024E5 0x0000000A [ 2] tmp9 -0x1000024EF 0x0000001A [ 2] tmp10 -0x100002509 0x00000033 [ 2] mul_s1nl2m -0x10000253C 0x0000000A [ 2] tmp11 -0x100002546 0x00000001 [ 2] tmp12 -0x100002547 0x00000007 [ 2] mul_s1ml2 -0x10000254E 0x00000024 [ 2] mul_s1ml2n -0x100002572 0x00000024 [ 2] mul_s1ml2m -0x100002596 0x00000007 [ 2] mul_l1l2 -0x10000259D 0x00000007 [ 2] mul_l1nl2 -0x1000025A4 0x0000003D [ 2] mul_l1nl2n -0x1000025E1 0x00000024 [ 2] mul_l1nl2m -0x100002605 0x00000007 [ 2] mul_l1ml2 -0x10000260C 0x00000024 [ 2] mul_l1ml2n -0x100002630 0x00000024 [ 2] mul_l1ml2m -0x100002654 0x0000001C [ 2] _Fr_band -0x100002670 0x00000019 [ 2] and_s1s2 -0x100002689 0x00000012 [ 2] tmp13 -0x10000269B 0x00000054 [ 2] tmp14 -0x1000026EF 0x0000000B [ 2] and_l1 -0x1000026FA 0x0000000B [ 2] and_l1s2 -0x100002705 0x00000044 [ 2] and_l1ns2 -0x100002749 0x00000054 [ 2] tmp15 -0x10000279D 0x00000059 [ 2] and_l1ms2 -0x1000027F6 0x00000054 [ 2] tmp16 -0x10000284A 0x0000000B [ 2] and_s1l2 -0x100002855 0x00000044 [ 2] and_s1l2n -0x100002899 0x00000054 [ 2] tmp17 -0x1000028ED 0x00000053 [ 2] and_s1l2m -0x100002940 0x00000054 [ 2] tmp18 -0x100002994 0x00000016 [ 2] and_l1l2 -0x1000029AA 0x00000044 [ 2] and_l1nl2n -0x1000029EE 0x00000054 [ 2] tmp19 -0x100002A42 0x00000053 [ 2] and_l1nl2m -0x100002A95 0x00000054 [ 2] tmp20 -0x100002AE9 0x0000000B [ 2] and_l1ml2 -0x100002AF4 0x00000059 [ 2] and_l1ml2n -0x100002B4D 0x00000054 [ 2] tmp21 -0x100002BA1 0x00000068 [ 2] and_l1ml2m -0x100002C09 0x00000054 [ 2] tmp22 -0x100002C5D 0x0000009F [ 3] __Z14Fr_str2elementP9FrElementPc -0x100002CFC 0x0000015A [ 3] __Z14Fr_element2strP9FrElement -0x100002E56 0x00000006 [ 5] __Znam -0x100002E5C 0x00000006 [ 4] ___gmpz_add -0x100002E62 0x00000006 [ 4] ___gmpz_clear -0x100002E68 0x00000006 [ 4] ___gmpz_export -0x100002E6E 0x00000006 [ 4] ___gmpz_get_str -0x100002E74 0x00000006 [ 4] ___gmpz_import -0x100002E7A 0x00000006 [ 4] ___gmpz_init -0x100002E80 0x00000006 [ 4] ___gmpz_init_set_si -0x100002E86 0x00000006 [ 4] ___gmpz_set_str -0x100002E8C 0x00000006 [ 6] _printf -0x100002E92 0x00000006 [ 6] _sprintf -0x100002E98 0x00000010 [ 0] helper helper -0x100002EA8 0x0000000A [ 4] ___gmpz_add -0x100002EB2 0x0000000A [ 4] ___gmpz_clear -0x100002EBC 0x0000000A [ 4] ___gmpz_export -0x100002EC6 0x0000000A [ 4] ___gmpz_get_str -0x100002ED0 0x0000000A [ 4] ___gmpz_import -0x100002EDA 0x0000000A [ 4] ___gmpz_init -0x100002EE4 0x0000000A [ 4] ___gmpz_init_set_si -0x100002EEE 0x0000000A [ 4] ___gmpz_set_str -0x100002EF8 0x0000000A [ 6] _printf -0x100002F02 0x0000000A [ 6] _sprintf -0x100002F0C 0x00000018 [ 1] literal string: %llu, %llu, %llu, %llu\n -0x100002F24 0x00000003 [ 3] literal string: %d -0x100002F28 0x00000018 [ 1] CIE -0x100002F40 0x00000038 [ 1] FDE for: _main -0x100002F78 0x00000018 [ 3] CIE -0x100002F90 0x00000038 [ 3] FDE for: __Z14Fr_str2elementP9FrElementPc -0x100002FC8 0x00000038 [ 3] FDE for: __Z14Fr_element2strP9FrElement -0x100003000 0x00000008 [ 0] non-lazy-pointer-to-local: dyld_stub_binder -0x100004000 0x00000008 [ 5] __Znam -0x100004008 0x00000008 [ 4] ___gmpz_add -0x100004010 0x00000008 [ 4] ___gmpz_clear -0x100004018 0x00000008 [ 4] ___gmpz_export -0x100004020 0x00000008 [ 4] ___gmpz_get_str -0x100004028 0x00000008 [ 4] ___gmpz_import -0x100004030 0x00000008 [ 4] ___gmpz_init -0x100004038 0x00000008 [ 4] ___gmpz_init_set_si -0x100004040 0x00000008 [ 4] ___gmpz_set_str -0x100004048 0x00000008 [ 6] _printf -0x100004050 0x00000008 [ 6] _sprintf -0x100004058 0x00000008 [ 0] __dyld_private -0x100004060 0x00000008 [ 2] _Fr_q -0x100004068 0x00000020 [ 2] q -0x100004088 0x00000020 [ 2] R2 -0x1000040A8 0x00000020 [ 2] R3 -0x1000040C8 0x00000008 [ 2] lboMask diff --git a/c/buildasm/tester b/c/buildasm/tester deleted file mode 100755 index 93428e6..0000000 Binary files a/c/buildasm/tester and /dev/null differ diff --git a/c/buildasm/tester.cpp b/c/buildasm/tester.cpp index 04d2d83..07792ce 100644 --- a/c/buildasm/tester.cpp +++ b/c/buildasm/tester.cpp @@ -39,15 +39,23 @@ void fillMap() { addFunction("sub", (FuncAny)Fr_sub, 2); addFunction("neg", (FuncAny)Fr_neg, 1); addFunction("mul", (FuncAny)Fr_mul, 2); + addFunction("square", (FuncAny)Fr_square, 1); + addFunction("idiv", (FuncAny)Fr_idiv, 2); + addFunction("inv", (FuncAny)Fr_inv, 1); + addFunction("div", (FuncAny)Fr_div, 2); addFunction("band", (FuncAny)Fr_band, 2); addFunction("bor", (FuncAny)Fr_bor, 2); addFunction("bxor", (FuncAny)Fr_bxor, 2); + addFunction("bnot", (FuncAny)Fr_bnot, 1); addFunction("eq", (FuncAny)Fr_eq, 2); addFunction("neq", (FuncAny)Fr_neq, 2); addFunction("lt", (FuncAny)Fr_lt, 2); addFunction("gt", (FuncAny)Fr_gt, 2); addFunction("leq", (FuncAny)Fr_leq, 2); addFunction("geq", (FuncAny)Fr_geq, 2); + addFunction("land", (FuncAny)Fr_land, 2); + addFunction("lor", (FuncAny)Fr_lor, 2); + addFunction("lnot", (FuncAny)Fr_lnot, 1); } u_int64_t readInt(std::string &s) { diff --git a/c/buildasm/tester.dSYM/Contents/Info.plist b/c/buildasm/tester.dSYM/Contents/Info.plist deleted file mode 100644 index c78a483..0000000 --- a/c/buildasm/tester.dSYM/Contents/Info.plist +++ /dev/null @@ -1,20 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleIdentifier - com.apple.xcode.dsym.tester - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - dSYM - CFBundleSignature - ???? - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - - diff --git a/c/buildasm/tester.dSYM/Contents/Resources/DWARF/tester b/c/buildasm/tester.dSYM/Contents/Resources/DWARF/tester deleted file mode 100644 index 1f72fd4..0000000 Binary files a/c/buildasm/tester.dSYM/Contents/Resources/DWARF/tester and /dev/null differ diff --git a/test/fieldasm.js b/test/fieldasm.js index 2f9437b..20d5a75 100644 --- a/test/fieldasm.js +++ b/test/fieldasm.js @@ -79,7 +79,6 @@ describe("field asm test", function () { const tv = buildTestVector2(bn128r, "bor"); await tester(bn128r, tv); }); - it("secp256k1q binary or", async () => { const tv = buildTestVector2(secp256k1q, "bor"); await tester(secp256k1q, tv); @@ -100,6 +99,18 @@ describe("field asm test", function () { const tv = buildTestVector2(mnt6753q, "bxor"); await tester(mnt6753q, tv); }); + it("bn128r binary not", async () => { + const tv = buildTestVector1(bn128r, "bnot"); + await tester(bn128r, tv); + }); + it("secp256k1q binary not", async () => { + const tv = buildTestVector1(secp256k1q, "bnot"); + await tester(secp256k1q, tv); + }); + it("mnt6753q binary not", async () => { + const tv = buildTestVector1(mnt6753q, "bnot"); + await tester(mnt6753q, tv); + }); it("bn128r eq", async () => { const tv = buildTestVector2(bn128r, "eq"); await tester(bn128r, tv); @@ -108,12 +119,10 @@ describe("field asm test", function () { const tv = buildTestVector2(secp256k1q, "eq"); await tester(secp256k1q, tv); }); -*/ it("mnt6753q eq", async () => { const tv = buildTestVector2(mnt6753q, "eq"); await tester(mnt6753q, tv); }); -/* it("bn128r neq", async () => { const tv = buildTestVector2(bn128r, "neq"); await tester(bn128r, tv); @@ -122,12 +131,10 @@ describe("field asm test", function () { const tv = buildTestVector2(secp256k1q, "neq"); await tester(secp256k1q, tv); }); -*/ it("mnt6753q neq", async () => { const tv = buildTestVector2(mnt6753q, "neq"); await tester(mnt6753q, tv); }); -/* it("bn128r lt", async () => { const tv = buildTestVector2(bn128r, "lt"); await tester(bn128r, tv); @@ -136,12 +143,10 @@ describe("field asm test", function () { const tv = buildTestVector2(secp256k1q, "lt"); await tester(secp256k1q, tv); }); -*/ it("mnt6753q lt", async () => { const tv = buildTestVector2(mnt6753q, "lt"); await tester(mnt6753q, tv); }); -/* it("bn128r gt", async () => { const tv = buildTestVector2(bn128r, "gt"); await tester(bn128r, tv); @@ -150,12 +155,10 @@ describe("field asm test", function () { const tv = buildTestVector2(secp256k1q, "gt"); await tester(secp256k1q, tv); }); -*/ it("mnt6753q gt", async () => { const tv = buildTestVector2(mnt6753q, "gt"); await tester(mnt6753q, tv); }); -/* it("bn128r leq", async () => { const tv = buildTestVector2(bn128r, "leq"); await tester(bn128r, tv); @@ -164,12 +167,10 @@ describe("field asm test", function () { const tv = buildTestVector2(secp256k1q, "leq"); await tester(secp256k1q, tv); }); -*/ it("mnt6753q leq", async () => { const tv = buildTestVector2(mnt6753q, "leq"); await tester(mnt6753q, tv); }); -/* it("bn128r geq", async () => { const tv = buildTestVector2(bn128r, "geq"); await tester(bn128r, tv); @@ -178,11 +179,96 @@ describe("field asm test", function () { const tv = buildTestVector2(secp256k1q, "geq"); await tester(secp256k1q, tv); }); -*/ it("mnt6753q geq", async () => { const tv = buildTestVector2(mnt6753q, "geq"); await tester(mnt6753q, tv); }); + it("bn128r logical and", async () => { + const tv = buildTestVector2(bn128r, "land"); + await tester(bn128r, tv); + }); + it("secp256k1q logical and", async () => { + const tv = buildTestVector2(secp256k1q, "land"); + await tester(secp256k1q, tv); + }); + it("mnt6753q logical and", async () => { + const tv = buildTestVector2(mnt6753q, "land"); + await tester(mnt6753q, tv); + }); + it("bn128r logical or", async () => { + const tv = buildTestVector2(bn128r, "lor"); + await tester(bn128r, tv); + }); + it("secp256k1q logical or", async () => { + const tv = buildTestVector2(secp256k1q, "lor"); + await tester(secp256k1q, tv); + }); + it("mnt6753q logical or", async () => { + const tv = buildTestVector2(mnt6753q, "lor"); + await tester(mnt6753q, tv); + }); + it("bn128r logical not", async () => { + const tv = buildTestVector1(bn128r, "lnot"); + await tester(bn128r, tv); + }); + it("secp256k1q logical not", async () => { + const tv = buildTestVector1(secp256k1q, "lnot"); + await tester(secp256k1q, tv); + }); + it("mnt6753q logical not", async () => { + const tv = buildTestVector1(mnt6753q, "lnot"); + await tester(mnt6753q, tv); + }); + it("bn128r idiv", async () => { + const tv = buildTestVector2(bn128r, "idiv"); + await tester(bn128r, tv); + }); + it("secp256k1q idiv", async () => { + const tv = buildTestVector2(secp256k1q, "idiv"); + await tester(secp256k1q, tv); + }); + it("mnt6753q idiv", async () => { + const tv = buildTestVector2(mnt6753q, "idiv"); + await tester(mnt6753q, tv); + }); + it("bn128r inv", async () => { + const tv = buildTestVector1(bn128r, "inv"); + await tester(bn128r, tv); + }); + it("secp256k1q inv", async () => { + const tv = buildTestVector1(secp256k1q, "inv"); + await tester(secp256k1q, tv); + }); + it("mnt6753q inv", async () => { + const tv = buildTestVector1(mnt6753q, "inv"); + await tester(mnt6753q, tv); + }); + it("bn128r div", async () => { + const tv = buildTestVector2(bn128r, "div"); + await tester(bn128r, tv); + }); + it("secp256k1q div", async () => { + const tv = buildTestVector2(secp256k1q, "div"); + await tester(secp256k1q, tv); + }); + it("mnt6753q div", async () => { + const tv = buildTestVector2(mnt6753q, "div"); + await tester(mnt6753q, tv); + }); +*/ + it("bn128r square", async () => { + const tv = buildTestVector1(bn128r, "square"); + await tester(bn128r, tv); + }); + it("secp256k1q square", async () => { + const tv = buildTestVector1(secp256k1q, "square"); + await tester(secp256k1q, tv); + }); + it("mnt6753q square", async () => { + const tv = buildTestVector1(mnt6753q, "square"); + await tester(mnt6753q, tv); + }); + }); function buildTestVector2(p, op) { @@ -190,7 +276,7 @@ function buildTestVector2(p, op) { const tv = []; const nums = getCriticalNumbers(p, 2); - const excludeZero = ["div", "mod"].indexOf(op) >= 0; + const excludeZero = ["div", "mod", "idiv"].indexOf(op) >= 0; for (let i=0; i