@ -0,0 +1,245 @@ |
|||
<% function addS1S2() { %> |
|||
xor rdx, rdx |
|||
mov edx, eax |
|||
add edx, ecx |
|||
jo add_manageOverflow ; rsi already is the 64bits result |
|||
|
|||
mov [rdi], rdx ; not necessary to adjust so just save and return |
|||
ret |
|||
|
|||
add_manageOverflow: ; Do the operation in 64 bits |
|||
push rsi |
|||
movsx rsi, eax |
|||
movsx rdx, ecx |
|||
add rsi, rdx |
|||
call rawCopyS2L |
|||
pop rsi |
|||
ret |
|||
<% } %> |
|||
|
|||
|
|||
|
|||
<% function addL1S2() { %> |
|||
add rsi, 8 |
|||
movsx rdx, ecx |
|||
add rdi, 8 |
|||
cmp rdx, 0 |
|||
<% const rawAddLabel = global.tmpLabel() %> |
|||
jns <%= rawAddLabel %> |
|||
neg rdx |
|||
call rawSubLS |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<%= rawAddLabel %>: |
|||
call rawAddLS |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
|
|||
<% } %> |
|||
|
|||
<% function addS1L2() { %> |
|||
lea rsi, [rdx + 8] |
|||
movsx rdx, eax |
|||
add rdi, 8 |
|||
cmp rdx, 0 |
|||
<% const rawAddLabel = global.tmpLabel() %> |
|||
jns <%= rawAddLabel %> |
|||
neg rdx |
|||
call rawSubLS |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<%= rawAddLabel %>: |
|||
call rawAddLS |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<% } %> |
|||
|
|||
<% function addL1L2() { %> |
|||
add rdi, 8 |
|||
add rsi, 8 |
|||
add rdx, 8 |
|||
call rawAddLL |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<% } %> |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; add |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Adds two elements of any kind |
|||
; Params: |
|||
; rsi <= Pointer to element 1 |
|||
; rdx <= Pointer to element 2 |
|||
; rdi <= Pointer to result |
|||
; Modified Registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_add: |
|||
mov rax, [rsi] |
|||
mov rcx, [rdx] |
|||
bt rax, 63 ; Check if is short first operand |
|||
jc add_l1 |
|||
bt rcx, 63 ; Check if is short second operand |
|||
jc add_s1l2 |
|||
|
|||
add_s1s2: ; Both operands are short |
|||
<%= addS1S2() %> |
|||
add_l1: |
|||
bt rcx, 63 ; Check if is short second operand |
|||
jc add_l1l2 |
|||
|
|||
;;;;;;;; |
|||
add_l1s2: |
|||
bt rax, 62 ; check if montgomery first |
|||
jc add_l1ms2 |
|||
add_l1ns2: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= addL1S2(); %> |
|||
|
|||
add_l1ms2: |
|||
bt rcx, 62 ; check if montgomery second |
|||
jc add_l1ms2m |
|||
add_l1ms2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_b() %> |
|||
<%= addL1L2() %> |
|||
|
|||
add_l1ms2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= addL1L2() %> |
|||
|
|||
|
|||
;;;;;;;; |
|||
add_s1l2: |
|||
bt rcx, 62 ; check if montgomery first |
|||
jc add_s1l2m |
|||
add_s1l2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= addS1L2(); %> |
|||
|
|||
add_s1l2m: |
|||
bt rax, 62 ; check if montgomery second |
|||
jc add_s1ml2m |
|||
add_s1nl2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_a() %> |
|||
<%= addL1L2() %> |
|||
|
|||
add_s1ml2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= addL1L2() %> |
|||
|
|||
;;;; |
|||
add_l1l2: |
|||
bt rax, 62 ; check if montgomery first |
|||
jc add_l1ml2 |
|||
add_l1nl2: |
|||
bt rcx, 62 ; check if montgomery second |
|||
jc add_l1nl2m |
|||
add_l1nl2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= addL1L2() %> |
|||
|
|||
add_l1nl2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_a(); %> |
|||
<%= addL1L2() %> |
|||
|
|||
add_l1ml2: |
|||
bt rcx, 62 ; check if montgomery seconf |
|||
jc add_l1ml2m |
|||
add_l1ml2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_b(); %> |
|||
<%= addL1L2() %> |
|||
|
|||
add_l1ml2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= addL1L2() %> |
|||
|
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawAddLL |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Adds two elements of type long |
|||
; Params: |
|||
; rsi <= Pointer to the long data of element 1 |
|||
; rdx <= Pointer to the long data of element 2 |
|||
; rdi <= Pointer to the long data of result |
|||
; Modified Registers: |
|||
; rax |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawAddLL: |
|||
; Add component by component with carry |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [rsi + <%=i*8%>] |
|||
<%= i==0 ? "add" : "adc" %> rax, [rdx + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
jc rawAddLL_sq ; if overflow, substract q |
|||
|
|||
; Compare with q |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
<% if (i>0) { %> |
|||
mov rax, [rdi + <%= (n64-i-1)*8 %>] |
|||
<% } %> |
|||
cmp rax, [q + <%= (n64-i-1)*8 %>] |
|||
jc rawAddLL_done ; q is bigget so done. |
|||
jnz rawAddLL_sq ; q is lower |
|||
<% } %> |
|||
; If equal substract q |
|||
rawAddLL_sq: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "sub" : "sbb" %> [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
rawAddLL_done: |
|||
ret |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawAddLS |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Adds two elements of type long |
|||
; Params: |
|||
; rdi <= Pointer to the long data of result |
|||
; rsi <= Pointer to the long data of element 1 |
|||
; rdx <= Value to be added |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawAddLS: |
|||
; Add component by component with carry |
|||
|
|||
add rdx, [rsi] |
|||
mov [rdi] ,rdx |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rdx, 0 |
|||
adc rdx, [rsi + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rdx |
|||
<% } %> |
|||
jc rawAddLS_sq ; if overflow, substract q |
|||
|
|||
; Compare with q |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [rdi + <%= (n64-i-1)*8 %>] |
|||
cmp rax, [q + <%= (n64-i-1)*8 %>] |
|||
jc rawAddLS_done ; q is bigget so done. |
|||
jnz rawAddLS_sq ; q is lower |
|||
<% } %> |
|||
; If equal substract q |
|||
rawAddLS_sq: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "sub" : "sbb" %> [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
rawAddLS_done: |
|||
ret |
|||
|
|||
|
|||
|
|||
|
@ -0,0 +1,178 @@ |
|||
<% function binOpS1S2(op) { %> |
|||
cmp r8d, 0 |
|||
<% const s1s2_solveNeg = global.tmpLabel() %> |
|||
js <%=s1s2_solveNeg%> |
|||
|
|||
cmp r9d, 0 |
|||
js <%=s1s2_solveNeg%> |
|||
xor rdx, rdx ; both ops are positive so do the op and return |
|||
mov edx, r8d |
|||
<%=op%> edx, r9d |
|||
mov [rdi], rdx ; not necessary to adjust so just save and return |
|||
ret |
|||
|
|||
<%=s1s2_solveNeg%>: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= global.toLong_b() %> |
|||
<%= global.toLong_a() %> |
|||
<%= binOpL1L2(op) %> |
|||
|
|||
|
|||
<% } %> |
|||
|
|||
<% function binOpS1L2(op) { %> |
|||
cmp r8d, 0 |
|||
<% const s1l2_solveNeg = global.tmpLabel() %> |
|||
js <%=s1l2_solveNeg%> |
|||
movsx rax, r8d |
|||
<%=op%> rax, [rdx +8] |
|||
mov [rdi+8], rax |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
xor rax, rax |
|||
<%=op%> rax, [rdx + <%= (i*8)+8 %>] |
|||
<% if (i== n64-1) { %> |
|||
and rax, [lboMask] |
|||
<% } %> |
|||
mov [rdi + <%= (i*8)+8 %> ], rax |
|||
<% } %> |
|||
ret |
|||
|
|||
<%=s1l2_solveNeg%>: |
|||
<%= global.toLong_a() %> |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= binOpL1L2(op) %> |
|||
|
|||
<% } %> |
|||
|
|||
<% function binOpL1S2(op) { %> |
|||
cmp r9d, 0 |
|||
<% const l1s2_solveNeg = global.tmpLabel() %> |
|||
js <%=l1s2_solveNeg%> |
|||
movsx rax, r9d |
|||
<%=op%> rax, [rsi +8] |
|||
mov [rdi+8], rax |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
xor rax, rax |
|||
<%=op%> rax, [rsi + <%= (i*8)+8 %>]; |
|||
<% if (i== n64-1) { %> |
|||
and rax, [lboMask] ; |
|||
<% } %> |
|||
mov [rdi + <%= (i*8)+8 %> ], rax; |
|||
<% } %> |
|||
ret |
|||
|
|||
<%=l1s2_solveNeg%>: |
|||
<%= global.toLong_b() %> |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= binOpL1L2(op) %> |
|||
|
|||
<% } %> |
|||
|
|||
<% function binOpL1L2(op) { %> |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [rsi + <%= (i*8)+8 %>] |
|||
<%=op%> rax, [rdx + <%= (i*8)+8 %>] |
|||
<% if (i== n64-1) { %> |
|||
and rax, [lboMask] |
|||
<% } %> |
|||
mov [rdi + <%= (i*8)+8 %> ], rax |
|||
<% } %> |
|||
ret |
|||
<% } %> |
|||
|
|||
|
|||
|
|||
|
|||
<% function binOp(op) { %> |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; <%= op %> |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Adds two elements of any kind |
|||
; Params: |
|||
; rsi <= Pointer to element 1 |
|||
; rdx <= Pointer to element 2 |
|||
; rdi <= Pointer to result |
|||
; Modified Registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_b<%=op%>: |
|||
mov r8, [rsi] |
|||
mov r9, [rdx] |
|||
bt r8, 63 ; Check if is short first operand |
|||
jc <%=op%>_l1 |
|||
bt r9, 63 ; Check if is short second operand |
|||
jc <%=op%>_s1l2 |
|||
|
|||
<%=op%>_s1s2: |
|||
<%= binOpS1S2(op) %> |
|||
|
|||
|
|||
<%=op%>_l1: |
|||
bt r9, 63 ; Check if is short second operand |
|||
jc <%=op%>_l1l2 |
|||
|
|||
|
|||
<%=op%>_l1s2: |
|||
bt r8, 62 ; check if montgomery first |
|||
jc <%=op%>_l1ms2 |
|||
<%=op%>_l1ns2: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= binOpL1S2(op) %> |
|||
|
|||
<%=op%>_l1ms2: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
push r9 ; r9 is used in montgomery so we need to save it |
|||
<%= global.fromMont_a() %> |
|||
pop r9 |
|||
<%= binOpL1S2(op) %> |
|||
|
|||
|
|||
<%=op%>_s1l2: |
|||
bt r9, 62 ; check if montgomery first |
|||
jc <%=op%>_s1l2m |
|||
<%=op%>_s1l2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= binOpS1L2(op) %> |
|||
|
|||
<%=op%>_s1l2m: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
push r8 ; r8 is used in montgomery so we need to save it |
|||
<%= global.fromMont_b() %> |
|||
pop r8 |
|||
<%= binOpS1L2(op) %> |
|||
|
|||
|
|||
<%=op%>_l1l2: |
|||
bt r8, 62 ; check if montgomery first |
|||
jc <%=op%>_l1ml2 |
|||
bt r9, 62 ; check if montgomery first |
|||
jc <%=op%>_l1nl2m |
|||
<%=op%>_l1nl2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= binOpL1L2(op) %> |
|||
|
|||
<%=op%>_l1nl2m: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= global.fromMont_b() %> |
|||
<%= binOpL1L2(op) %> |
|||
|
|||
<%=op%>_l1ml2: |
|||
bt r9, 62 ; check if montgomery first |
|||
jc <%=op%>_l1ml2m |
|||
<%=op%>_l1ml2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= global.fromMont_a() %> |
|||
<%= binOpL1L2(op) %> |
|||
|
|||
<%=op%>_l1ml2m: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= global.fromMont_a() %> |
|||
<%= global.fromMont_b() %> |
|||
<%= binOpL1L2(op) %> |
|||
<% } %> |
|||
|
|||
<%= binOp("and") %> |
|||
<%= binOp("or") %> |
|||
<%= binOp("xor") %> |
|||
|
|||
|
@ -0,0 +1,71 @@ |
|||
const bigInt=require("big-integer"); |
|||
const path = require("path"); |
|||
const util = require("util"); |
|||
const renderFile = util.promisify(require("ejs").renderFile); |
|||
|
|||
const runningAsScript = !module.parent; |
|||
|
|||
|
|||
class ZqBuilder { |
|||
constructor(q, name) { |
|||
const self = this; |
|||
this.q=bigInt(q); |
|||
this.n64 = Math.floor((this.q.bitLength() - 1) / 64)+1; |
|||
this.name = name; |
|||
this.bigInt = bigInt; |
|||
this.lastTmp=0; |
|||
this.global = {}; |
|||
this.global.tmpLabel = function() { |
|||
self.lastTmp++; |
|||
return "tmp"+self.lastTmp; |
|||
}; |
|||
} |
|||
|
|||
constantElement(v) { |
|||
let S = ""; |
|||
const mask = bigInt("FFFFFFFFFFFFFFFF", 16); |
|||
for (let i=0; i<this.n64; i++) { |
|||
if (i>0) S = S+","; |
|||
let shex = v.shiftRight(i*64).and(mask).toString(16); |
|||
while (shex.length <16) shex = "0" + shex; |
|||
S = S + "0x" + shex; |
|||
} |
|||
return S; |
|||
} |
|||
|
|||
} |
|||
|
|||
async function buildField(q, name) { |
|||
const builder = new ZqBuilder(q, name); |
|||
|
|||
const asm = await renderFile(path.join(__dirname, "fr.asm.ejs"), builder); |
|||
const c = await renderFile(path.join(__dirname, "fr.c.ejs"), builder); |
|||
const h = await renderFile(path.join(__dirname, "fr.h.ejs"), builder); |
|||
|
|||
return {asm: asm, h: h, c: c}; |
|||
} |
|||
|
|||
if (runningAsScript) { |
|||
const fs = require("fs"); |
|||
var argv = require("yargs") |
|||
.usage("Usage: $0 -q [primeNum] -n [name] -oc [out .c file] -oh [out .h file]") |
|||
.demandOption(["q","n"]) |
|||
.alias("q", "prime") |
|||
.alias("n", "name") |
|||
.argv; |
|||
|
|||
const q = bigInt(argv.q); |
|||
|
|||
const asmFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + ".asm"; |
|||
const hFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + ".h"; |
|||
const cFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + ".c"; |
|||
|
|||
buildField(q, argv.name).then( (res) => { |
|||
fs.writeFileSync(asmFileName, res.asm, "utf8"); |
|||
fs.writeFileSync(hFileName, res.h, "utf8"); |
|||
fs.writeFileSync(cFileName, res.c, "utf8"); |
|||
}); |
|||
|
|||
} else { |
|||
module.exports = buildField; |
|||
} |
@ -0,0 +1,75 @@ |
|||
const chai = require("chai"); |
|||
const assert = chai.assert; |
|||
|
|||
const fs = require("fs"); |
|||
var tmp = require("tmp-promise"); |
|||
const path = require("path"); |
|||
const util = require("util"); |
|||
const exec = util.promisify(require("child_process").exec); |
|||
|
|||
const BuildZqField = require("./buildzqfield"); |
|||
|
|||
module.exports = testField; |
|||
|
|||
async function testField(prime, test) { |
|||
tmp.setGracefulCleanup(); |
|||
|
|||
const dir = await tmp.dir({prefix: "circom_", unsafeCleanup: true }); |
|||
|
|||
const source = await BuildZqField(prime, "Fr"); |
|||
|
|||
// console.log(dir.path);
|
|||
|
|||
await fs.promises.writeFile(path.join(dir.path, "fr.asm"), source.asm, "utf8"); |
|||
await fs.promises.writeFile(path.join(dir.path, "fr.h"), source.h, "utf8"); |
|||
await fs.promises.writeFile(path.join(dir.path, "fr.c"), source.c, "utf8"); |
|||
|
|||
await exec(`cp ${path.join(__dirname, "tester.cpp")} ${dir.path}`); |
|||
|
|||
await exec("nasm -fmacho64 --prefix _ " + |
|||
` ${path.join(dir.path, "fr.asm")}` |
|||
); |
|||
|
|||
await exec("g++" + |
|||
` ${path.join(dir.path, "tester.cpp")}` + |
|||
` ${path.join(dir.path, "fr.o")}` + |
|||
` ${path.join(dir.path, "fr.c")}` + |
|||
` -o ${path.join(dir.path, "tester")}` + |
|||
" -lgmp" |
|||
); |
|||
|
|||
const inLines = []; |
|||
for (let i=0; i<test.length; i++) { |
|||
for (let j=0; j<test[i][0].length; j++) { |
|||
inLines.push(test[i][0][j]); |
|||
} |
|||
} |
|||
inLines.push(""); |
|||
|
|||
await fs.promises.writeFile(path.join(dir.path, "in.tst"), inLines.join("\n"), "utf8"); |
|||
|
|||
await exec(`${path.join(dir.path, "tester")}` + |
|||
` <${path.join(dir.path, "in.tst")}` + |
|||
` >${path.join(dir.path, "out.tst")}`); |
|||
|
|||
const res = await fs.promises.readFile(path.join(dir.path, "out.tst"), "utf8"); |
|||
const resLines = res.split("\n"); |
|||
|
|||
for (let i=0; i<test.length; i++) { |
|||
const expected = test[i][1].toString(); |
|||
const calculated = resLines[i]; |
|||
|
|||
if (calculated != expected) { |
|||
console.log("FAILED"); |
|||
for (let j=0; j<test[i][0].length; j++) { |
|||
console.log(test[i][0][j]); |
|||
} |
|||
console.log("Should Return: " + expected); |
|||
console.log("But Returns: " + calculated); |
|||
} |
|||
|
|||
assert.equal(calculated, expected); |
|||
} |
|||
|
|||
} |
|||
|
@ -0,0 +1,108 @@ |
|||
|
|||
<% function retOne() { %> |
|||
mov qword [rdi], 1 |
|||
add rsp, <%= (n64+1)*8 %> |
|||
ret |
|||
<% } %> |
|||
|
|||
<% function retZero() { %> |
|||
mov qword [rdi], 0 |
|||
add rsp, <%= (n64+1)*8 %> |
|||
ret |
|||
<% } %> |
|||
|
|||
<% function cmpLong(op, eq) { %> |
|||
|
|||
<% |
|||
if (eq==true) { |
|||
if (["leq","geq"].indexOf(op) >= 0) retOne(); |
|||
if (["lt","gt"].indexOf(op) >= 0) retZero(); |
|||
} |
|||
%> |
|||
|
|||
|
|||
<% const label_gt = global.tmpLabel() %> |
|||
<% const label_lt = global.tmpLabel() %> |
|||
<% for (let i=n64-1; i>=0; i--) { %> |
|||
mov rax, [rsp + <%= 8+(i*8) %>] |
|||
cmp [half + <%= (i*8) %>], rax ; comare with (q-1)/2 |
|||
jc <%=label_lt%> ; half<rax => e1-e2 is neg => e1 < e2 |
|||
jnz <%=label_gt%> ; half>rax => e1 -e2 is pos => e1 > e2 |
|||
<% } %> |
|||
; half == rax => e1-e2 is pos => e1 > e2 |
|||
<%=label_gt%>: |
|||
<% if (["geq","gt"].indexOf(op) >= 0) retOne(); else retZero(); %> |
|||
<%=label_lt%>: |
|||
<% if (["leq","lt"].indexOf(op) >= 0) retOne(); else retZero(); %> |
|||
<% } // cmpLong%> |
|||
|
|||
<% function cmpOp(op) { %> |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; <%= op %> |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Adds two elements of any kind |
|||
; Params: |
|||
; rsi <= Pointer to element 1 |
|||
; rdx <= Pointer to element 2 |
|||
; rdi <= Pointer to result can be zero or one. |
|||
; Modified Registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_<%=op%>: |
|||
sub rsp, <%= (n64+1)*8 %> ; Save space for the result of the substraction |
|||
push rdi ; Save rdi |
|||
lea rdi, [rsp+8] ; We pushed rdi so we need to add 8 |
|||
call <%=name%>_sub ; Do a substraction |
|||
call <%=name%>_toNormal ; Convert it to normal |
|||
pop rdi |
|||
|
|||
mov rax, [rsp] ; We already poped do no need to add 8 |
|||
bt rax, 63 ; check is result is long |
|||
jc <%=op%>_longCmp |
|||
|
|||
<%=op%>_shortCmp: |
|||
cmp eax, 0 |
|||
je <%=op%>_s_eq |
|||
js <%=op%>_s_lt |
|||
<%=op%>_s_gt: |
|||
<% if (["geq","gt", "neq"].indexOf(op) >= 0) retOne(); else retZero(); %> |
|||
<%=op%>_s_lt: |
|||
<% if (["leq","lt", "neq"].indexOf(op) >= 0) retOne(); else retZero(); %> |
|||
<%=op%>_s_eq: |
|||
<% if (["eq","geq", "leq"].indexOf(op) >= 0) retOne(); else retZero(); %> |
|||
|
|||
<%=op%>_longCmp: |
|||
|
|||
<% for (let i=n64-1; i>=0; i--) { %> |
|||
cmp qword [rsp + <%= 8+(i*8) %>], 0 |
|||
jnz <%=op%>_neq |
|||
<% } %> |
|||
<%=op%>_eq: |
|||
<% if (op == "eq") { |
|||
retOne(); |
|||
} else if (op == "neq") { |
|||
retZero(); |
|||
} else { |
|||
cmpLong(op, true); |
|||
} |
|||
%> |
|||
<%=op%>_neq: |
|||
<% if (op == "neq") { |
|||
retOne(); |
|||
} else if (op == "eq") { |
|||
retZero(); |
|||
} else { |
|||
cmpLong(op, false); |
|||
} |
|||
%> |
|||
|
|||
|
|||
<% } %> |
|||
|
|||
<%= cmpOp("eq") %> |
|||
<%= cmpOp("neq") %> |
|||
<%= cmpOp("lt") %> |
|||
<%= cmpOp("gt") %> |
|||
<%= cmpOp("leq") %> |
|||
<%= cmpOp("geq") %> |
|||
|
@ -0,0 +1,39 @@ |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawCopyS2L |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Convert a 64 bit integer to a long format field element |
|||
; Params: |
|||
; rsi <= the integer |
|||
; rdi <= Pointer to the overwritted element |
|||
; |
|||
; Nidified registers: |
|||
; rax |
|||
;;;;;;;;;;;;;;;;;;;;;;; |
|||
|
|||
rawCopyS2L: |
|||
mov al, 0x80 |
|||
shl rax, 56 |
|||
mov [rdi], rax ; set the result to LONG normal |
|||
|
|||
cmp rsi, 0 |
|||
js u64toLong_adjust_neg |
|||
|
|||
mov [rdi + 8], rsi |
|||
xor rax, rax |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov [rdi + <%= 8+i*8 %>], rax |
|||
<% } %> |
|||
ret |
|||
|
|||
u64toLong_adjust_neg: |
|||
add rsi, [q] ; Set the first digit |
|||
mov [rdi + 8], rsi ; |
|||
|
|||
mov rsi, -1 ; all ones |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rax, rsi ; Add to q |
|||
adc rax, [q + <%= i*8 %> ] |
|||
mov [rdi + <%= (i+1)*8 %>], rax |
|||
<% } %> |
|||
ret |
@ -0,0 +1,41 @@ |
|||
|
|||
|
|||
global <%=name%>_add |
|||
global <%=name%>_sub |
|||
global <%=name%>_neg |
|||
global <%=name%>_mul |
|||
global <%=name%>_band |
|||
global <%=name%>_bor |
|||
global <%=name%>_bxor |
|||
global <%=name%>_eq |
|||
global <%=name%>_neq |
|||
global <%=name%>_lt |
|||
global <%=name%>_gt |
|||
global <%=name%>_leq |
|||
global <%=name%>_geq |
|||
global <%=name%>_toNormal |
|||
global <%=name%>_toMontgomery |
|||
global <%=name%>_q |
|||
DEFAULT REL |
|||
|
|||
section .text |
|||
<%- include('utils.asm.ejs'); %> |
|||
<%- include('copy.asm.ejs'); %> |
|||
<%- include('montgomery.asm.ejs'); %> |
|||
<%- include('add.asm.ejs'); %> |
|||
<%- include('sub.asm.ejs'); %> |
|||
<%- include('neg.asm.ejs'); %> |
|||
<%- include('mul.asm.ejs'); %> |
|||
<%- include('binops.asm.ejs'); %> |
|||
<%- include('cmpops.asm.ejs'); %> |
|||
|
|||
section .data |
|||
<%=name%>_q: |
|||
dd 0 |
|||
dd 0x80000000 |
|||
q dq <%= constantElement(q) %> |
|||
half dq <%= constantElement(q.shiftRight(1)) %> |
|||
R2 dq <%= constantElement(bigInt.one.shiftLeft(n64*64*2).mod(q)) %> |
|||
R3 dq <%= constantElement(bigInt.one.shiftLeft(n64*64*3).mod(q)) %> |
|||
lboMask dq 0x<%= bigInt("8000000000000000",16).shiftRight(n64*64 - q.bitLength()).minus(bigInt.one).toString(16) %> |
|||
|
@ -0,0 +1,39 @@ |
|||
#include "fr.h" |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <gmp.h> |
|||
|
|||
void Fr_str2element(PFrElement pE, char *s) { |
|||
mpz_t r; |
|||
mpz_init(r); |
|||
mpz_set_str(r, s, 10); |
|||
pE->type = Fr_LONG; |
|||
for (int i=0; i<Fr_N64; i++) pE->longVal[i] = 0; |
|||
mpz_export((void *)pE->longVal, NULL, -1, 8, -1, 0, r); |
|||
} |
|||
|
|||
char *Fr_element2str(PFrElement pE) { |
|||
mpz_t r; |
|||
mpz_t q; |
|||
if (pE->type == Fr_SHORT) { |
|||
if (pE->shortVal>=0) { |
|||
char *r = new char[32]; |
|||
sprintf(r, "%d", pE->shortVal); |
|||
return r; |
|||
} else { |
|||
mpz_init(q); |
|||
mpz_import(q, Fr_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); |
|||
mpz_init_set_si(r, pE->shortVal); |
|||
mpz_add(r, r, q); |
|||
mpz_clear(q); |
|||
} |
|||
} else { |
|||
Fr_toNormal(pE); |
|||
mpz_init(r); |
|||
mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)pE->longVal); |
|||
} |
|||
char *res = mpz_get_str (0, 10, r); |
|||
mpz_clear(r); |
|||
return res; |
|||
} |
|||
|
@ -0,0 +1,39 @@ |
|||
#include "<%=name.toLowerCase()+".h"%>" |
|||
#include <stdio.h> |
|||
#include <stdlib.h> |
|||
#include <gmp.h> |
|||
|
|||
void <%=name%>_str2element(P<%=name%>Element pE, char *s) { |
|||
mpz_t r; |
|||
mpz_init(r); |
|||
mpz_set_str(r, s, 10); |
|||
pE->type = <%=name%>_LONG; |
|||
for (int i=0; i<<%=name%>_N64; i++) pE->longVal[i] = 0; |
|||
mpz_export((void *)pE->longVal, NULL, -1, 8, -1, 0, r); |
|||
} |
|||
|
|||
char *<%=name%>_element2str(P<%=name%>Element pE) { |
|||
mpz_t r; |
|||
mpz_t q; |
|||
if (pE->type == <%=name%>_SHORT) { |
|||
if (pE->shortVal>=0) { |
|||
char *r = new char[32]; |
|||
sprintf(r, "%d", pE->shortVal); |
|||
return r; |
|||
} else { |
|||
mpz_init(q); |
|||
mpz_import(q, <%=name%>_N64, -1, 8, -1, 0, (const void *)Fr_q.longVal); |
|||
mpz_init_set_si(r, pE->shortVal); |
|||
mpz_add(r, r, q); |
|||
mpz_clear(q); |
|||
} |
|||
} else { |
|||
<%=name%>_toNormal(pE); |
|||
mpz_init(r); |
|||
mpz_import(r, <%=name%>_N64, -1, 8, -1, 0, (const void *)pE->longVal); |
|||
} |
|||
char *res = mpz_get_str (0, 10, r); |
|||
mpz_clear(r); |
|||
return res; |
|||
} |
|||
|
@ -0,0 +1,31 @@ |
|||
#include <stdint.h> |
|||
#define <%=name%>_N64 <%= n64 %> |
|||
#define <%=name%>_SHORT 0x00000000 |
|||
#define <%=name%>_LONG 0x80000000 |
|||
#define <%=name%>_LONGMONTGOMERY 0xC0000000 |
|||
typedef struct __attribute__((__packed__)) { |
|||
int32_t shortVal; |
|||
u_int32_t type; |
|||
u_int64_t longVal[<%=name%>_N64]; |
|||
} <%=name%>Element; |
|||
typedef <%=name%>Element *P<%=name%>Element; |
|||
extern <%=name%>Element <%=name%>_q; |
|||
extern "C" void <%=name%>_add(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_sub(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_neg(P<%=name%>Element r, P<%=name%>Element a); |
|||
extern "C" void <%=name%>_mul(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_band(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_bor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_bxor(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_eq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_neq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_lt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_gt(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_leq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_geq(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b); |
|||
extern "C" void <%=name%>_toNormal(P<%=name%>Element pE); |
|||
extern "C" void <%=name%>_toMontgomery(P<%=name%>Element pE); |
|||
void <%=name%>_str2element(P<%=name%>Element pE, char *s); |
|||
char *<%=name%>_element2str(P<%=name%>Element pE); |
|||
extern <%=name%>Element <%=name%>_q; |
|||
|
@ -0,0 +1,24 @@ |
|||
#include "stdio.h" |
|||
#include "fr.h" |
|||
|
|||
int main() { |
|||
|
|||
FrElement a = { 0, Fr_LONGMONTGOMERY, {1,1,1,1}}; |
|||
FrElement b = { 0, Fr_LONGMONTGOMERY, {2,2,2,2}}; |
|||
|
|||
/* |
|||
FrElement a={0x43e1f593f0000000ULL,0x2833e84879b97091ULL,0xb85045b68181585dULL,0x30644e72e131a029ULL}; |
|||
FrElement b = {3,0,0,0}; |
|||
*/ |
|||
FrElement c; |
|||
|
|||
// Fr_add(&(c[0]), a, a); |
|||
// Fr_add(&(c[0]), c, b); |
|||
|
|||
for (int i=0; i<1000000000; i++) { |
|||
Fr_mul(&c, &a, &b); |
|||
} |
|||
|
|||
Fr_mul(&c,&a, &b); |
|||
printf("%llu, %llu, %llu, %llu\n", c.longVal[0], c.longVal[1], c.longVal[2], c.longVal[3]); |
|||
} |
@ -0,0 +1,20 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> |
|||
<plist version="1.0"> |
|||
<dict> |
|||
<key>CFBundleDevelopmentRegion</key> |
|||
<string>English</string> |
|||
<key>CFBundleIdentifier</key> |
|||
<string>com.apple.xcode.dsym.main</string> |
|||
<key>CFBundleInfoDictionaryVersion</key> |
|||
<string>6.0</string> |
|||
<key>CFBundlePackageType</key> |
|||
<string>dSYM</string> |
|||
<key>CFBundleSignature</key> |
|||
<string>????</string> |
|||
<key>CFBundleShortVersionString</key> |
|||
<string>1.0</string> |
|||
<key>CFBundleVersion</key> |
|||
<string>1</string> |
|||
</dict> |
|||
</plist> |
@ -0,0 +1,273 @@ |
|||
|
|||
|
|||
|
|||
<% |
|||
////////////////////// |
|||
// montgomeryTemplate |
|||
////////////////////// |
|||
// This function creates functions with the montgomery transformation |
|||
// applied |
|||
// the round hook allows to add diferent code in the iteration |
|||
// |
|||
// All the montgomery functions modifies: |
|||
// r8, r9, 10, r11, rax, rcx |
|||
////////////////////// |
|||
function montgomeryTemplate(fnName, round) { |
|||
let r0, r1, r2; |
|||
function setR(step) { |
|||
if ((step % 3) == 0) { |
|||
r0 = "r8"; |
|||
r1 = "r9"; |
|||
r2 = "r10"; |
|||
} else if ((step % 3) == 1) { |
|||
r0 = "r9"; |
|||
r1 = "r10"; |
|||
r2 = "r8"; |
|||
} else { |
|||
r0 = "r10"; |
|||
r1 = "r8"; |
|||
r2 = "r9"; |
|||
} |
|||
} |
|||
|
|||
const base = bigInt.one.shiftLeft(64); |
|||
const np64 = base.minus(q.modInv(base)); |
|||
%> |
|||
<%=fnName%>: |
|||
sub rsp, <%= n64*8 %> ; Reserve space for ms |
|||
mov rcx, rdx ; rdx is needed for multiplications so keep it in cx |
|||
mov r11, 0x<%= np64.toString(16) %> ; np |
|||
xor r8,r8 |
|||
xor r9,r9 |
|||
xor r10,r10 |
|||
<% |
|||
// Main loop |
|||
for (let i=0; i<n64*2; i++) { |
|||
setR(i); |
|||
round(i, r0, r1, r2); |
|||
%> |
|||
|
|||
<% |
|||
for (let j=i-1; j>=0; j--) { // All ms |
|||
if (((i-j)<n64)&&(j<n64)) { |
|||
%> |
|||
mov rax, [rsp + <%= j*8 %>] |
|||
mul qword [q + <%= (i-j)*8 %>] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} |
|||
} // ms |
|||
%> |
|||
|
|||
<% |
|||
if (i<n64) { |
|||
%> |
|||
mov rax, <%= r0 %> |
|||
mul r11 |
|||
mov [rsp + <%= i*8 %>], rax |
|||
mul qword [q] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} else { |
|||
%> |
|||
mov [rdi + <%= (i-n64)*8 %> ], <%= r0 %> |
|||
xor <%= r0 %>,<%= r0 %> |
|||
<% |
|||
} |
|||
%> |
|||
|
|||
<% |
|||
} // Main Loop |
|||
%> |
|||
test <%= r1 %>, <%= r1 %> |
|||
jnz <%=fnName%>_mulM_sq |
|||
; Compare with q |
|||
<% |
|||
for (let i=0; i<n64; i++) { |
|||
%> |
|||
mov rax, [rdi + <%= (n64-i-1)*8 %>] |
|||
cmp rax, [q + <%= (n64-i-1)*8 %>] |
|||
jc <%=fnName%>_mulM_done ; q is bigget so done. |
|||
jnz <%=fnName%>_mulM_sq ; q is lower |
|||
<% |
|||
} |
|||
%> |
|||
; If equal substract q |
|||
|
|||
<%=fnName%>_mulM_sq: |
|||
<% |
|||
for (let i=0; i<n64; i++) { |
|||
%> |
|||
mov rax, [q + <%= i*8 %>] |
|||
<%= i==0 ? "sub" : "sbb" %> [rdi + <%= i*8 %>], rax |
|||
<% |
|||
} |
|||
%> |
|||
|
|||
<%=fnName%>_mulM_done: |
|||
mov rdx, rcx ; recover rdx to its original place. |
|||
add rsp, <%= n64*8 %> ; recover rsp |
|||
ret |
|||
|
|||
<% |
|||
} // Template |
|||
%> |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawMontgomeryMul |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Multiply two elements in montgomery form |
|||
; Params: |
|||
; rsi <= Pointer to the long data of element 1 |
|||
; rdx <= Pointer to the long data of element 2 |
|||
; rdi <= Pointer to the long data of result |
|||
; Modified registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<% |
|||
montgomeryTemplate("rawMontgomeryMul", function(i, r0, r1, r2) { |
|||
// Same Digit |
|||
for (let o1=Math.max(0, i-n64+1); (o1<=i)&&(o1<n64); o1++) { |
|||
const o2= i-o1; |
|||
%> |
|||
mov rax, [rsi + <%= 8*o1 %>] |
|||
mul qword [rcx + <%= 8*o2 %>] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} // Same digit |
|||
}) |
|||
%> |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawMontgomeryMul1 |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Multiply two elements in montgomery form |
|||
; Params: |
|||
; rsi <= Pointer to the long data of element 1 |
|||
; rdx <= second operand |
|||
; rdi <= Pointer to the long data of result |
|||
; Modified registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<% |
|||
montgomeryTemplate("rawMontgomeryMul1", function(i, r0, r1, r2) { |
|||
// Same Digit |
|||
if (i<n64) { |
|||
%> |
|||
mov rax, [rsi + <%= 8*i %>] |
|||
mul rcx |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} // Same digit |
|||
}) |
|||
%> |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawFromMontgomery |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Multiply two elements in montgomery form |
|||
; Params: |
|||
; rsi <= Pointer to the long data of element 1 |
|||
; rdi <= Pointer to the long data of result |
|||
; Modified registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<% |
|||
montgomeryTemplate("rawFromMontgomery", function(i, r0, r1, r2) { |
|||
// Same Digit |
|||
if (i<n64) { |
|||
%> |
|||
add <%= r0 %>, [rdi + <%= 8*i %>] |
|||
adc <%= r1 %>, 0x0 |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} // Same digit |
|||
}) |
|||
%> |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; toMontgomery |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Convert a number to Montgomery |
|||
; rdi <= Pointer element to convert |
|||
; Modified registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_toMontgomery: |
|||
mov rax, [rdi] |
|||
bts rax, 62 ; check if montgomery |
|||
jc toMontgomery_doNothing |
|||
bts rax, 63 |
|||
jc toMontgomeryLong |
|||
|
|||
toMontgomeryShort: |
|||
mov [rdi], rax |
|||
add rdi, 8 |
|||
push rsi |
|||
lea rsi, [R2] |
|||
movsx rdx, eax |
|||
cmp rdx, 0 |
|||
js negMontgomeryShort |
|||
posMontgomeryShort: |
|||
call rawMontgomeryMul1 |
|||
pop rsi |
|||
sub rdi, 8 |
|||
ret |
|||
|
|||
negMontgomeryShort: |
|||
neg rdx ; Do the multiplication positive and then negate the result. |
|||
call rawMontgomeryMul1 |
|||
mov rsi, rdi |
|||
call rawNegL |
|||
pop rsi |
|||
sub rdi, 8 |
|||
ret |
|||
|
|||
|
|||
toMontgomeryLong: |
|||
mov [rdi], rax |
|||
add rdi, 8 |
|||
push rsi |
|||
mov rdx, rdi |
|||
lea rsi, [R2] |
|||
call rawMontgomeryMul |
|||
pop rsi |
|||
sub rdi, 8 |
|||
|
|||
toMontgomery_doNothing: |
|||
ret |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; toNormal |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Convert a number from Montgomery |
|||
; rdi <= Pointer element to convert |
|||
; Modified registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_toNormal: |
|||
mov rax, [rdi] |
|||
btc rax, 62 ; check if montgomery |
|||
jnc fromMontgomery_doNothing |
|||
bt rax, 63 ; if short, it means it's converted |
|||
jnc fromMontgomery_doNothing |
|||
|
|||
fromMontgomeryLong: |
|||
mov [rdi], rax |
|||
add rdi, 8 |
|||
call rawFromMontgomery |
|||
sub rdi, 8 |
|||
|
|||
fromMontgomery_doNothing: |
|||
ret |
|||
|
|||
|
@ -0,0 +1,211 @@ |
|||
<% function mulS1S2() { %> |
|||
xor rax, rax |
|||
mov eax, r8d |
|||
imul r9d |
|||
jo mul_manageOverflow ; rsi already is the 64bits result |
|||
|
|||
mov [rdi], rax ; not necessary to adjust so just save and return |
|||
|
|||
mul_manageOverflow: ; Do the operation in 64 bits |
|||
push rsi |
|||
movsx rax, r8d |
|||
movsx rcx, r9d |
|||
imul rcx |
|||
mov rsi, rax |
|||
call rawCopyS2L |
|||
pop rsi |
|||
<% } %> |
|||
|
|||
<% function mulL1S2(t) { %> |
|||
push rsi |
|||
add rsi, 8 |
|||
movsx rdx, r9d |
|||
add rdi, 8 |
|||
cmp rdx, 0 |
|||
<% const rawPositiveLabel = global.tmpLabel() %> |
|||
jns <%= rawPositiveLabel %> |
|||
neg rdx |
|||
call rawMontgomeryMul1 |
|||
mov rsi, rdi |
|||
call rawNegL |
|||
sub rdi, 8 |
|||
pop rsi |
|||
<% const done = global.tmpLabel() %> |
|||
jmp <%= done %> |
|||
<%= rawPositiveLabel %>: |
|||
call rawMontgomeryMul1 |
|||
sub rdi, 8 |
|||
pop rsi |
|||
<%= done %>: |
|||
|
|||
<% } %> |
|||
|
|||
<% function mulS1L2() { %> |
|||
push rsi |
|||
lea rsi, [rdx + 8] |
|||
movsx rdx, r8d |
|||
add rdi, 8 |
|||
cmp rdx, 0 |
|||
<% const rawPositiveLabel = global.tmpLabel() %> |
|||
jns <%= rawPositiveLabel %> |
|||
neg rdx |
|||
call rawMontgomeryMul1 |
|||
mov rsi, rdi |
|||
call rawNegL |
|||
sub rdi, 8 |
|||
pop rsi |
|||
<% const done = global.tmpLabel() %> |
|||
jmp <%= done %> |
|||
<%= rawPositiveLabel %>: |
|||
call rawMontgomeryMul1 |
|||
sub rdi, 8 |
|||
pop rsi |
|||
<%= done %>: |
|||
|
|||
<% } %> |
|||
|
|||
<% function mulL1L2() { %> |
|||
add rdi, 8 |
|||
add rsi, 8 |
|||
add rdx, 8 |
|||
call rawMontgomeryMul |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
<% } %> |
|||
|
|||
<% function mulR3() { %> |
|||
push rsi |
|||
add rdi, 8 |
|||
mov rsi, rdi |
|||
lea rdx, [R3] |
|||
call rawMontgomeryMul |
|||
sub rdi, 8 |
|||
pop rsi |
|||
<% } %> |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Multiplies two elements of any kind |
|||
; Params: |
|||
; rsi <= Pointer to element 1 |
|||
; rdx <= Pointer to element 2 |
|||
; rdi <= Pointer to result |
|||
; [rdi] = [rsi] * [rdi] |
|||
; Modified Registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_mul: |
|||
mov r8, [rsi] |
|||
mov r9, [rdx] |
|||
bt r8, 63 ; Check if is short first operand |
|||
jc mul_l1 |
|||
bt r9, 63 ; Check if is short second operand |
|||
jc mul_s1l2 |
|||
|
|||
mul_s1s2: ; Both operands are short |
|||
<%= mulS1S2() %> |
|||
ret |
|||
|
|||
mul_l1: |
|||
bt r9, 63 ; Check if is short second operand |
|||
jc mul_l1l2 |
|||
|
|||
;;;;;;;; |
|||
mul_l1s2: |
|||
bt r8, 62 ; check if montgomery first |
|||
jc mul_l1ms2 |
|||
mul_l1ns2: |
|||
bt r9, 62 ; check if montgomery first |
|||
jc mul_l1ns2m |
|||
mul_l1ns2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= mulL1S2() %> |
|||
<%= mulR3() %> |
|||
ret |
|||
|
|||
|
|||
mul_l1ns2m: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
|
|||
mul_l1ms2: |
|||
bt r9, 62 ; check if montgomery second |
|||
jc mul_l1ms2m |
|||
mul_l1ms2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= mulL1S2() %> |
|||
ret |
|||
|
|||
mul_l1ms2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
|
|||
;;;;;;;; |
|||
mul_s1l2: |
|||
bt r8, 62 ; check if montgomery first |
|||
jc mul_s1ml2 |
|||
mul_s1nl2: |
|||
bt r9, 62 ; check if montgomery first |
|||
jc mul_s1nl2m |
|||
mul_s1nl2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= mulS1L2() %> |
|||
<%= mulR3() %> |
|||
ret |
|||
|
|||
mul_s1nl2m: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= mulS1L2(); %> |
|||
ret |
|||
|
|||
mul_s1ml2: |
|||
bt r9, 62 ; check if montgomery first |
|||
jc mul_s1ml2m |
|||
mul_s1ml2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
mul_s1ml2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
;;;; |
|||
mul_l1l2: |
|||
bt r8, 62 ; check if montgomery first |
|||
jc mul_l1ml2 |
|||
mul_l1nl2: |
|||
bt r9, 62 ; check if montgomery second |
|||
jc mul_l1nl2m |
|||
mul_l1nl2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= mulL1L2() %> |
|||
<%= mulR3() %> |
|||
ret |
|||
|
|||
mul_l1nl2m: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
mul_l1ml2: |
|||
bt r9, 62 ; check if montgomery seconf |
|||
jc mul_l1ml2m |
|||
mul_l1ml2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
mul_l1ml2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= mulL1L2() %> |
|||
ret |
|||
|
|||
|
@ -0,0 +1,78 @@ |
|||
<% function negS() { %> |
|||
neg eax |
|||
jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case) |
|||
|
|||
mov [rdi], rax ; not necessary to adjust so just save and return |
|||
ret |
|||
|
|||
neg_manageOverflow: ; Do the operation in 64 bits |
|||
push rsi |
|||
movsx rsi, eax |
|||
neg rsi |
|||
call rawCopyS2L |
|||
pop rsi |
|||
ret |
|||
<% } %> |
|||
|
|||
<% function negL() { %> |
|||
add rdi, 8 |
|||
add rsi, 8 |
|||
call rawNegL |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<% } %> |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; neg |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Adds two elements of any kind |
|||
; Params: |
|||
; rsi <= Pointer to element to be negated |
|||
; rdi <= Pointer to result |
|||
; [rdi] = -[rsi] |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_neg: |
|||
mov rax, [rsi] |
|||
bt rax, 63 ; Check if is short first operand |
|||
jc neg_l |
|||
|
|||
neg_s: ; Operand is short |
|||
<%= negS() %> |
|||
|
|||
|
|||
neg_l: |
|||
mov [rdi], rax ; Copy the type |
|||
<%= negL() %> |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawNeg |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Negates a value |
|||
; Params: |
|||
; rdi <= Pointer to the long data of result |
|||
; rsi <= Pointer to the long data of element 1 |
|||
; |
|||
; [rdi] = - [rsi] |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawNegL: |
|||
; Compare is zero |
|||
|
|||
xor rax, rax |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
cmp [rsi + <%=i*8%>], rax |
|||
jnz doNegate |
|||
<% } %> |
|||
; it's zero so just set to zero |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
ret |
|||
doNegate: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "sub" : "sbb" %> rax, [rsi + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
ret |
@ -0,0 +1,33 @@ |
|||
const tester = require("../c/buildasm/buildzqfieldtester2.js"); |
|||
|
|||
const bigInt = require("big-integer"); |
|||
|
|||
const __P__ = new bigInt("21888242871839275222246405745257275088548364400416034343698204186575808495617"); |
|||
|
|||
|
|||
describe("basic cases", function () { |
|||
this.timeout(100000); |
|||
it("should do basic tests", async () => { |
|||
await tester(__P__, [ |
|||
["add", 0, 0], |
|||
["add", 0, 1], |
|||
["add", 1, 0], |
|||
["add", 1, 1], |
|||
["add", 2, 1], |
|||
["add", 2, 10], |
|||
["add", -1, -1], |
|||
["add", -20, -10], |
|||
["add", "10604728079509999371218483608188593244163417117449316147628604036713980815027", "10604728079509999371218483608188593244163417117449316147628604036713980815027"], |
|||
|
|||
["mul", 0, 0], |
|||
["mul", 0, 1], |
|||
["mul", 1, 0], |
|||
["mul", 1, 1], |
|||
["mul", 2, 1], |
|||
["mul", 2, 10], |
|||
["mul", -1, -1], |
|||
["mul", -20, -10], |
|||
["mul", "10604728079509999371218483608188593244163417117449316147628604036713980815027", "10604728079509999371218483608188593244163417117449316147628604036713980815027"], |
|||
]); |
|||
}); |
|||
}); |
@ -0,0 +1,209 @@ |
|||
const bigInt=require("big-integer"); |
|||
|
|||
|
|||
|
|||
|
|||
|
|||
class ZqBuilder { |
|||
constructor(q, name) { |
|||
this.q=bigInt(q); |
|||
this.h = []; |
|||
this.c = []; |
|||
this.name = name; |
|||
} |
|||
|
|||
build() { |
|||
this._buildHeaders(); |
|||
this._buildAdd(); |
|||
this._buildMul(); |
|||
|
|||
this.c.push(""); this.h.push(""); |
|||
return [this.h.join("\n"), this.c.join("\n")]; |
|||
} |
|||
|
|||
_buildHeaders() { |
|||
this.n64 = Math.floor((this.q.bitLength() - 1) / 64)+1; |
|||
this.h.push("typedef unsigned long long u64;"); |
|||
this.h.push(`typedef u64 ${this.name}Element[${this.n64}];`); |
|||
this.h.push(`typedef u64 *P${this.name}Element;`); |
|||
this.h.push(`extern ${this.name}Element ${this.name}_q;`); |
|||
this.h.push(`#define ${this.name}_N64 ${this.n64}`); |
|||
this.c.push(`#include "${this.name.toLowerCase()}.h"`); |
|||
this._defineConstant(`${this.name}_q`, this.q); |
|||
this.c.push(""); this.h.push(""); |
|||
} |
|||
|
|||
_defineConstant(n, v) { |
|||
let S = `${this.name}Element ${n}={`; |
|||
const mask = bigInt("FFFFFFFFFFFFFFFF", 16); |
|||
for (let i=0; i<this.n64; i++) { |
|||
if (i>0) S = S+","; |
|||
let shex = v.shiftRight(i*64).and(mask).toString(16); |
|||
while (shex <16) shex = "0" + shex; |
|||
S = S + "0x" + shex + "ULL"; |
|||
} |
|||
S += "};"; |
|||
this.c.push(S); |
|||
} |
|||
|
|||
_buildAdd() { |
|||
this.h.push(`void ${this.name}_add(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b);`); |
|||
this.c.push(`void ${this.name}_add(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b) {`); |
|||
this.c.push(" __asm__ __volatile__ ("); |
|||
for (let i=0; i<this.n64; i++) { |
|||
this.c.push(` "movq ${i*8}(%2), %%rax;"`); |
|||
this.c.push(` "${i==0 ? "addq" : "adcq"} ${i*8}(%1), %%rax;"`); |
|||
this.c.push(` "movq %%rax, ${i*8}(%0);"`); |
|||
} |
|||
this.c.push(" \"jc SQ;\""); |
|||
for (let i=0; i<this.n64; i++) { |
|||
if (i>0) { |
|||
this.c.push(` "movq ${(this.n64 - i-1)*8}(%0), %%rax;"`); |
|||
} |
|||
this.c.push(` "cmp ${(this.n64 - i-1)*8}(%3), %%rax;"`); |
|||
this.c.push(" \"jg SQ;\""); |
|||
this.c.push(" \"jl DONE;\""); |
|||
} |
|||
this.c.push(" \"SQ:\""); |
|||
for (let i=0; i<this.n64; i++) { |
|||
this.c.push(` "movq ${i*8}(%3), %%rax;"`); |
|||
this.c.push(` "${i==0 ? "subq" : "sbbq"} %%rax, ${i*8}(%0);"`); |
|||
} |
|||
this.c.push(" \"DONE:\""); |
|||
this.c.push(` :: "r" (r), "r" (a), "r" (b), "r" (${this.name}_q) : "%rax", "memory");`); |
|||
this.c.push("}\n"); |
|||
} |
|||
|
|||
_buildMul() { |
|||
|
|||
let r0, r1, r2; |
|||
function setR(step) { |
|||
if ((step % 3) == 0) { |
|||
r0 = "%%r8"; |
|||
r1 = "%%r9"; |
|||
r2 = "%%r10"; |
|||
} else if ((step % 3) == 1) { |
|||
r0 = "%%r9"; |
|||
r1 = "%%r10"; |
|||
r2 = "%%r8"; |
|||
} else { |
|||
r0 = "%%r10"; |
|||
r1 = "%%r8"; |
|||
r2 = "%%r9"; |
|||
} |
|||
} |
|||
const base = bigInt.one.shiftLeft(64); |
|||
const np64 = base.minus(this.q.modInv(base)); |
|||
|
|||
this.h.push(`void ${this.name}_mul(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b);`); |
|||
this.c.push(`void ${this.name}_mul(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b) {`); |
|||
this.c.push(" __asm__ __volatile__ ("); |
|||
|
|||
this.c.push(` "subq $${this.n64*8}, %%rsp;"`); |
|||
this.c.push(` "movq $0x${np64.toString(16)}, %%r11;"`); |
|||
this.c.push(" \"movq $0x0, %%r8;\""); |
|||
this.c.push(" \"movq $0x0, %%r9;\""); |
|||
this.c.push(" \"movq $0x0, %%r10;\""); |
|||
|
|||
for (let i=0; i<this.n64*2; i++) { |
|||
setR(i); |
|||
|
|||
for (let o1=Math.max(0, i-this.n64+1); (o1<=i)&&(o1<this.n64); o1++) { |
|||
const o2= i-o1; |
|||
this.c.push(` "movq ${o1*8}(%1), %%rax;"`); |
|||
this.c.push(` "mulq ${o2*8}(%2);"`); |
|||
this.c.push(` "addq %%rax, ${r0};"`); |
|||
this.c.push(` "adcq %%rdx, ${r1};"`); |
|||
this.c.push(` "adcq $0x0, ${r2};"`); |
|||
} |
|||
|
|||
for (let j=i-1; j>=0; j--) { |
|||
if (((i-j)<this.n64)&&(j<this.n64)) { |
|||
this.c.push(` "movq ${j*8}(%%rsp), %%rax;"`); |
|||
this.c.push(` "mulq ${(i-j)*8}(%3);"`); |
|||
this.c.push(` "addq %%rax, ${r0};"`); |
|||
this.c.push(` "adcq %%rdx, ${r1};"`); |
|||
this.c.push(` "adcq $0x0, ${r2};"`); |
|||
} |
|||
} |
|||
|
|||
if (i<this.n64) { |
|||
this.c.push(` "movq ${r0}, %%rax;"`); |
|||
this.c.push(" \"mulq %%r11;\""); |
|||
this.c.push(` "movq %%rax, ${i*8}(%%rsp);"`); |
|||
this.c.push(" \"mulq (%3);\""); |
|||
this.c.push(` "addq %%rax, ${r0};"`); |
|||
this.c.push(` "adcq %%rdx, ${r1};"`); |
|||
this.c.push(` "adcq $0x0, ${r2};"`); |
|||
} else { |
|||
this.c.push(` "movq ${r0}, ${(i-this.n64)*8}(%0);"`); |
|||
this.c.push(` "movq $0, ${r0};"`); |
|||
} |
|||
} |
|||
|
|||
this.c.push(` "cmp $0, ${r1};"`); |
|||
this.c.push(" \"jne SQ2;\""); |
|||
for (let i=0; i<this.n64; i++) { |
|||
this.c.push(` "movq ${(this.n64 - i-1)*8}(%0), %%rax;"`); |
|||
this.c.push(` "cmp ${(this.n64 - i-1)*8}(%3), %%rax;"`); |
|||
this.c.push(" \"jg SQ2;\""); |
|||
this.c.push(" \"jl DONE2;\""); |
|||
} |
|||
this.c.push(" \"SQ2:\""); |
|||
for (let i=0; i<this.n64; i++) { |
|||
this.c.push(` "movq ${i*8}(%3), %%rax;"`); |
|||
this.c.push(` "${i==0 ? "subq" : "sbbq"} %%rax, ${i*8}(%0);"`); |
|||
} |
|||
this.c.push(" \"DONE2:\""); |
|||
this.c.push(` "addq $${this.n64*8}, %%rsp;"`); |
|||
|
|||
this.c.push(` :: "r" (r), "r" (a), "r" (b), "r" (${this.name}_q) : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "memory");`); |
|||
this.c.push("}\n"); |
|||
} |
|||
|
|||
_buildIDiv() { |
|||
this.h.push(`void ${this.name}_idiv(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b);`); |
|||
this.c.push(`void ${this.name}_idiv(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b) {`); |
|||
this.c.push(" __asm__ __volatile__ ("); |
|||
this.c.push(" \"pxor %%xmm0, %%xmm0;\""); // Comparison Register
|
|||
if (this.n64 == 1) { |
|||
this.c.push(` "mov %%rax, $${this.n64 - 8};"`); |
|||
|
|||
} else { |
|||
this.c.push(` "mov %%rax, $${this.n64 -16};"`); |
|||
} |
|||
|
|||
this.c.push(` :: "r" (r), "r" (a), "r" (b), "r" (${this.name}_q) : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "memory");`); |
|||
this.c.push("}\n"); |
|||
} |
|||
} |
|||
|
|||
var runningAsScript = !module.parent; |
|||
|
|||
if (runningAsScript) { |
|||
const fs = require("fs"); |
|||
var argv = require("yargs") |
|||
.usage("Usage: $0 -q [primeNum] -n [name] -oc [out .c file] -oh [out .h file]") |
|||
.demandOption(["q","n"]) |
|||
.alias("q", "prime") |
|||
.alias("n", "name") |
|||
.argv; |
|||
|
|||
const q = bigInt(argv.q); |
|||
|
|||
const cFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + ".c"; |
|||
const hFileName = (argv.oh) ? argv.oh : argv.name.toLowerCase() + ".h"; |
|||
|
|||
const builder = new ZqBuilder(q, argv.name); |
|||
|
|||
const res = builder.build(); |
|||
|
|||
fs.writeFileSync(hFileName, res[0], "utf8"); |
|||
fs.writeFileSync(cFileName, res[1], "utf8"); |
|||
} else { |
|||
module.exports = function(q, name) { |
|||
const builder = new ZqBuilder(q, name); |
|||
return builder.build(); |
|||
}; |
|||
} |
|||
|
@ -0,0 +1,68 @@ |
|||
const chai = require("chai"); |
|||
const assert = chai.assert; |
|||
|
|||
const fs = require("fs"); |
|||
var tmp = require("tmp-promise"); |
|||
const path = require("path"); |
|||
const util = require("util"); |
|||
const exec = util.promisify(require("child_process").exec); |
|||
|
|||
const bigInt = require("big-integer"); |
|||
const BuildZqField = require("./buildzqfield"); |
|||
const ZqField = require("fflib").ZqField; |
|||
|
|||
module.exports = testField; |
|||
|
|||
function toMontgomeryStr(a, prime) { |
|||
const n64 = Math.floor((prime.bitLength() - 1) / 64)+1; |
|||
return a.shiftLeft(n64*64).mod(prime).toString(10); |
|||
} |
|||
|
|||
function fromMontgomeryStr(a, prime) { |
|||
const n64 = Math.floor((prime.bitLength() - 1) / 64)+1; |
|||
const R = bigInt.one.shiftLeft(n64*64).mod(prime); |
|||
const RI = R.modInv(prime); |
|||
return bigInt(a).times(RI).mod(prime); |
|||
} |
|||
|
|||
|
|||
async function testField(prime, test) { |
|||
tmp.setGracefulCleanup(); |
|||
|
|||
const F = new ZqField(prime); |
|||
|
|||
const dir = await tmp.dir({prefix: "circom_", unsafeCleanup: true }); |
|||
|
|||
const [hSource, cSource] = BuildZqField(prime, "Fr"); |
|||
|
|||
await fs.promises.writeFile(path.join(dir.path, "fr.h"), hSource, "utf8"); |
|||
await fs.promises.writeFile(path.join(dir.path, "fr.c"), cSource, "utf8"); |
|||
|
|||
await exec("g++" + |
|||
` ${path.join(__dirname, "tester.c")}` + |
|||
` ${path.join(dir.path, "fr.c")}` + |
|||
` -o ${path.join(dir.path, "tester")}` + |
|||
" -lgmp" |
|||
); |
|||
|
|||
for (let i=0; i<test.length; i++) { |
|||
let a = bigInt(test[i][1]).mod(prime); |
|||
if (a.isNegative()) a = prime.add(a); |
|||
let b = bigInt(test[i][2]).mod(prime); |
|||
if (b.isNegative()) b = prime.add(b); |
|||
const ec = F[test[i][0]](a,b); |
|||
// console.log(toMontgomeryStr(a, prime));
|
|||
// console.log(toMontgomeryStr(b, prime));
|
|||
const res = await exec(`${path.join(dir.path, "tester")}` + |
|||
` ${test[i][0]}` + |
|||
` ${toMontgomeryStr(a, prime)}` + |
|||
` ${toMontgomeryStr(b, prime)}` |
|||
); |
|||
// console.log(res.stdout);
|
|||
const c=fromMontgomeryStr(res.stdout, prime); |
|||
|
|||
assert.equal(ec.toString(), c.toString()); |
|||
} |
|||
|
|||
} |
|||
|
@ -0,0 +1,302 @@ |
|||
|
|||
|
|||
global <%=name%>_add |
|||
global <%=name%>_mul |
|||
global <%=name%>_q |
|||
DEFAULT REL |
|||
|
|||
section .text |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; add |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_add: |
|||
; Add component by component with carry |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [rsi + <%=i*8%>] |
|||
<%= i==0 ? "add" : "adc" %> rax, [rdx + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
jc add_sq ; if overflow, substract q |
|||
|
|||
; Compare with q |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
<% if (i>0) { %> |
|||
mov rax, [rdi + <%= (n64-i-1)*8 %>] |
|||
<% } %> |
|||
cmp rax, [q + <%= (n64-i-1)*8 %>] |
|||
jg add_sq |
|||
jl add_done |
|||
<% } %> |
|||
; If equal substract q |
|||
add_sq: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "sub" : "sbb" %> [rdi + <%=i*8%>], rax |
|||
mov [rdx + <%=i*8%>], rax |
|||
<% } %> |
|||
|
|||
add_done: |
|||
ret |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul Montgomery |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
mulM: |
|||
<% |
|||
let r0, r1, r2; |
|||
function setR(step) { |
|||
if ((step % 3) == 0) { |
|||
r0 = "r8"; |
|||
r1 = "r9"; |
|||
r2 = "r10"; |
|||
} else if ((step % 3) == 1) { |
|||
r0 = "r9"; |
|||
r1 = "r10"; |
|||
r2 = "r8"; |
|||
} else { |
|||
r0 = "r10"; |
|||
r1 = "r8"; |
|||
r2 = "r9"; |
|||
} |
|||
} |
|||
|
|||
const base = bigInt.one.shiftLeft(64); |
|||
const np64 = base.minus(q.modInv(base)); |
|||
%> |
|||
sub rsp, <%= n64*8 %> ; Reserve space for ms |
|||
mov rcx, rdx ; rdx is needed for multiplications so keep it in cx |
|||
mov r11, 0x<%= np64.toString(16) %> ; np |
|||
xor r8,r8 |
|||
xor r9,r9 |
|||
xor r10,r10 |
|||
<% |
|||
// Main loop |
|||
for (let i=0; i<n64*2; i++) { |
|||
setR(i); |
|||
%> |
|||
<% |
|||
// Same Digit |
|||
for (let o1=Math.max(0, i-n64+1); (o1<=i)&&(o1<n64); o1++) { |
|||
const o2= i-o1; |
|||
%> |
|||
mov rax, [rsi + <%= 8*o1 %>] |
|||
mul qword [rcx + <%= 8*o2 %>] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} // Same digit |
|||
%> |
|||
|
|||
|
|||
<% |
|||
for (let j=i-1; j>=0; j--) { // All ms |
|||
if (((i-j)<n64)&&(j<n64)) { |
|||
%> |
|||
mov rax, [rsp + <%= j*8 %>] |
|||
mul qword [q + <%= (i-j)*8 %>] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} |
|||
} // ms |
|||
%> |
|||
|
|||
<% |
|||
if (i<n64) { |
|||
%> |
|||
mov rax, <%= r0 %> |
|||
mul r11 |
|||
mov [rsp + <%= i*8 %>], rax |
|||
mul qword [q] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} else { |
|||
%> |
|||
mov [rdi + <%= (i-n64)*8 %> ], <%= r0 %> |
|||
xor <%= r0 %>,<%= r0 %> |
|||
<% |
|||
} |
|||
%> |
|||
|
|||
<% |
|||
} // Main Loop |
|||
%> |
|||
cmp <%= r1 %>, 0x0 |
|||
jne mulM_sq |
|||
; Compare with q |
|||
<% |
|||
for (let i=0; i<n64; i++) { |
|||
%> |
|||
mov rax, [rdi + <%= (n64-i-1)*8 %>] |
|||
cmp rax, [q + <%= (n64-i-1)*8 %>] |
|||
jg mulM_sq |
|||
jl mulM_done |
|||
<% |
|||
} |
|||
%> |
|||
; If equal substract q |
|||
|
|||
mulM_sq: |
|||
<% |
|||
for (let i=0; i<n64; i++) { |
|||
%> |
|||
mov rax, [q + <%= i*8 %>] |
|||
<%= i==0 ? "sub" : "sbb" %> [rdi + <%= i*8 %>], rax |
|||
mov [rdx + <%= i*8 %>], rax |
|||
<% |
|||
} |
|||
%> |
|||
|
|||
mulM_done: |
|||
add rsp, <%= n64*8 %> ; recover rsp |
|||
ret |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul MontgomeryShort |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
mulSM: |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_mul: |
|||
mov rax, [rsi] |
|||
bt rax, 63 |
|||
jc l1 |
|||
mov rcx, [rdx] |
|||
bt rcx, 63 |
|||
jc s1l2 |
|||
s1s2: ; short first and second |
|||
mul ecx |
|||
jc rs2l ; If if doesn't feed in 32 bits convert the result to long |
|||
|
|||
; The shorts multiplication is done. copy the val to destination and return |
|||
mov [rdi], rax |
|||
ret |
|||
|
|||
rs2l: ; The result in the multiplication doen't feed |
|||
; we have the result in edx:eax we need to convert it to long |
|||
shl rdx, 32 |
|||
mov edx, eax ; pack edx:eax to rdx |
|||
|
|||
xor rax, rax ; Set the format to long |
|||
bts rax, 63 |
|||
mov [rdi], rax ; move the first digit |
|||
|
|||
cmp rdx, 0 ; check if redx is negative. |
|||
jl rs2ln |
|||
|
|||
; edx is positive. |
|||
mov [rdi + 8], rdx ; Set the firs digit |
|||
|
|||
xor rax, rax ; Set the remaining digits to 0 |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov [rdi + <%= (i+1)*8 %>], rax |
|||
<% } %> |
|||
ret |
|||
|
|||
; edx is negative. |
|||
rs2ln: |
|||
|
|||
add rdx, [q] ; Set the firs digit |
|||
mov [rdi + 8], rdx ; |
|||
|
|||
mov rdx, -1 ; all ones |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rax, rdx ; Add to q |
|||
adc rax, [q + <%= i*8 %> ] |
|||
mov [rdi + <%= (i+1)*8 %>], rax |
|||
<% } %> |
|||
ret |
|||
|
|||
l1: |
|||
mov rcx, [rdx] |
|||
bt rcx, 63 |
|||
jc ll |
|||
|
|||
l1s2: |
|||
xor rdx, rdx |
|||
mov edx, ecx |
|||
bt rax, 62 |
|||
jc lsM |
|||
jmp lsN |
|||
|
|||
s1l2: |
|||
mov rsi, rdx |
|||
xor rdx, rdx |
|||
mov edx, eax |
|||
bt rcx, 62 |
|||
jc lsM |
|||
jmp lsN |
|||
|
|||
|
|||
lsN: |
|||
mov byte [rdi + 3], 0xC0 ; set the result to montgomery |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
call mulSM |
|||
mov rdx, R3 |
|||
call mulM |
|||
ret |
|||
|
|||
lsM: |
|||
mov byte [rdi + 3], 0x80 ; set the result to long normal |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
call mulSM |
|||
ret |
|||
|
|||
|
|||
ll: |
|||
|
|||
bt rax, 62 |
|||
jc lml |
|||
bt rcx, 62 |
|||
jc lnlm |
|||
|
|||
lnln: |
|||
mov byte [rdi + 3], 0xC0 ; set the result to long montgomery |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
add rdx, 8 |
|||
call mulM |
|||
mov rdi, rsi |
|||
mov rdx, R3 |
|||
call mulM |
|||
ret |
|||
|
|||
lml: |
|||
bt rcx, 62 |
|||
jc lmlm |
|||
|
|||
lnlm: |
|||
mov byte [rdi + 3], 0x80 ; set the result to long normal |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
add rdx, 8 |
|||
call mulM |
|||
ret |
|||
|
|||
lmlm: |
|||
mov byte [rdi + 3], 0xC0 ; set the result to long montgomery |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
add rdx, 8 |
|||
call mulM |
|||
ret |
|||
|
|||
|
|||
section .data |
|||
<%=name%>_q: |
|||
dd 0 |
|||
dd 0x80000000 |
|||
q dq <%= constantElement(q) %> |
|||
R3 dq <%= constantElement(bigInt.one.shiftLeft(n64*64*3).mod(q)) %> |
|||
|
|||
|
@ -0,0 +1,251 @@ |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul Montgomery |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
mulM: |
|||
<% |
|||
let r0, r1, r2; |
|||
function setR(step) { |
|||
if ((step % 3) == 0) { |
|||
r0 = "r8"; |
|||
r1 = "r9"; |
|||
r2 = "r10"; |
|||
} else if ((step % 3) == 1) { |
|||
r0 = "r9"; |
|||
r1 = "r10"; |
|||
r2 = "r8"; |
|||
} else { |
|||
r0 = "r10"; |
|||
r1 = "r8"; |
|||
r2 = "r9"; |
|||
} |
|||
} |
|||
|
|||
const base = bigInt.one.shiftLeft(64); |
|||
const np64 = base.minus(q.modInv(base)); |
|||
%> |
|||
sub rsp, <%= n64*8 %> ; Reserve space for ms |
|||
mov rcx, rdx ; rdx is needed for multiplications so keep it in cx |
|||
mov r11, 0x<%= np64.toString(16) %> ; np |
|||
xor r8,r8 |
|||
xor r9,r9 |
|||
xor r10,r10 |
|||
<% |
|||
// Main loop |
|||
for (let i=0; i<n64*2; i++) { |
|||
setR(i); |
|||
%> |
|||
<% |
|||
// Same Digit |
|||
for (let o1=Math.max(0, i-n64+1); (o1<=i)&&(o1<n64); o1++) { |
|||
const o2= i-o1; |
|||
%> |
|||
mov rax, [rsi + <%= 8*o1 %>] |
|||
mul qword [rcx + <%= 8*o2 %>] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} // Same digit |
|||
%> |
|||
|
|||
|
|||
<% |
|||
for (let j=i-1; j>=0; j--) { // All ms |
|||
if (((i-j)<n64)&&(j<n64)) { |
|||
%> |
|||
mov rax, [rsp + <%= j*8 %>] |
|||
mul qword [q + <%= (i-j)*8 %>] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} |
|||
} // ms |
|||
%> |
|||
|
|||
<% |
|||
if (i<n64) { |
|||
%> |
|||
mov rax, <%= r0 %> |
|||
mul r11 |
|||
mov [rsp + <%= i*8 %>], rax |
|||
mul qword [q] |
|||
add <%= r0 %>, rax |
|||
adc <%= r1 %>, rdx |
|||
adc <%= r2 %>, 0x0 |
|||
<% |
|||
} else { |
|||
%> |
|||
mov [rdi + <%= (i-n64)*8 %> ], <%= r0 %> |
|||
xor <%= r0 %>,<%= r0 %> |
|||
<% |
|||
} |
|||
%> |
|||
|
|||
<% |
|||
} // Main Loop |
|||
%> |
|||
cmp <%= r1 %>, 0x0 |
|||
jne mulM_sq |
|||
; Compare with q |
|||
<% |
|||
for (let i=0; i<n64; i++) { |
|||
%> |
|||
mov rax, [rdi + <%= (n64-i-1)*8 %>] |
|||
cmp rax, [q + <%= (n64-i-1)*8 %>] |
|||
jg mulM_sq |
|||
jl mulM_done |
|||
<% |
|||
} |
|||
%> |
|||
; If equal substract q |
|||
|
|||
mulM_sq: |
|||
<% |
|||
for (let i=0; i<n64; i++) { |
|||
%> |
|||
mov rax, [q + <%= i*8 %>] |
|||
<%= i==0 ? "sub" : "sbb" %> [rdi + <%= i*8 %>], rax |
|||
<% |
|||
} |
|||
%> |
|||
|
|||
mulM_done: |
|||
add rsp, <%= n64*8 %> ; recover rsp |
|||
ret |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul MontgomeryShort |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
mulSM: |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; mul |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_mul: |
|||
mov rax, [rsi] |
|||
bt rax, 63 |
|||
jc l1 |
|||
mov rcx, [rdx] |
|||
bt rcx, 63 |
|||
jc s1l2 |
|||
s1s2: ; short first and second |
|||
mul ecx |
|||
jc rs2l ; If if doesn't feed in 32 bits convert the result to long |
|||
|
|||
; The shorts multiplication is done. copy the val to destination and return |
|||
mov [rdi], rax |
|||
ret |
|||
|
|||
rs2l: ; The result in the multiplication doen't feed |
|||
; we have the result in edx:eax we need to convert it to long |
|||
shl rdx, 32 |
|||
mov edx, eax ; pack edx:eax to rdx |
|||
|
|||
xor rax, rax ; Set the format to long |
|||
bts rax, 63 |
|||
mov [rdi], rax ; move the first digit |
|||
|
|||
cmp rdx, 0 ; check if redx is negative. |
|||
jl rs2ln |
|||
|
|||
; edx is positive. |
|||
mov [rdi + 8], rdx ; Set the firs digit |
|||
|
|||
xor rax, rax ; Set the remaining digits to 0 |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov [rdi + <%= (i+1)*8 %>], rax |
|||
<% } %> |
|||
ret |
|||
|
|||
; edx is negative. |
|||
rs2ln: |
|||
|
|||
add rdx, [q] ; Set the firs digit |
|||
mov [rdi + 8], rdx ; |
|||
|
|||
mov rdx, -1 ; all ones |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rax, rdx ; Add to q |
|||
adc rax, [q + <%= i*8 %> ] |
|||
mov [rdi + <%= (i+1)*8 %>], rax |
|||
<% } %> |
|||
ret |
|||
|
|||
l1: |
|||
mov rcx, [rdx] |
|||
bt rcx, 63 |
|||
jc ll |
|||
|
|||
l1s2: |
|||
xor rdx, rdx |
|||
mov edx, ecx |
|||
bt rax, 62 |
|||
jc lsM |
|||
jmp lsN |
|||
|
|||
s1l2: |
|||
mov rsi, rdx |
|||
xor rdx, rdx |
|||
mov edx, eax |
|||
bt rcx, 62 |
|||
jc lsM |
|||
jmp lsN |
|||
|
|||
|
|||
lsN: |
|||
mov byte [rdi + 7], 0xC0 ; set the result to montgomery |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
call mulSM |
|||
mov rsi, rdi |
|||
lea rdx, [R3] |
|||
call mulM |
|||
ret |
|||
|
|||
lsM: |
|||
mov byte [rdi + 7], 0x80 ; set the result to long normal |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
call mulSM |
|||
ret |
|||
|
|||
|
|||
ll: |
|||
|
|||
bt rax, 62 |
|||
jc lml |
|||
bt rcx, 62 |
|||
jc lnlm |
|||
|
|||
lnln: |
|||
mov byte [rdi + 7], 0xC0 ; set the result to long montgomery |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
add rdx, 8 |
|||
call mulM |
|||
mov rsi, rdi |
|||
lea rdx, [R3] |
|||
call mulM |
|||
ret |
|||
|
|||
lml: |
|||
bt rcx, 62 |
|||
jc lmlm |
|||
|
|||
lnlm: |
|||
mov byte [rdi + 7], 0x80 ; set the result to long normal |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
add rdx, 8 |
|||
call mulM |
|||
ret |
|||
|
|||
lmlm: |
|||
mov byte [rdi + 7], 0xC0 ; set the result to long montgomery |
|||
add rsi, 8 |
|||
add rdi, 8 |
|||
add rdx, 8 |
|||
call mulM |
|||
ret |
@ -0,0 +1,219 @@ |
|||
# Path: main |
|||
# Arch: x86_64 |
|||
# Object files: |
|||
[ 0] linker synthesized |
|||
[ 1] /var/folders/g_/74y0ll3503d4sm0c64jw432r0000gn/T//cczqYl2H.o |
|||
[ 2] fr.o |
|||
[ 3] /var/folders/g_/74y0ll3503d4sm0c64jw432r0000gn/T//cc5nHggh.o |
|||
[ 4] /usr/local/lib/libgmp.dylib |
|||
[ 5] /usr/local/Cellar/gcc/9.2.0_2/lib/gcc/9/libstdc++.dylib |
|||
[ 6] /Library/Developer/CommandLineTools/SDKs/MacOSX10.15.sdk/usr/lib/libSystem.tbd |
|||
# Sections: |
|||
# Address Size Segment Section |
|||
0x1000011BD 0x00001C99 __TEXT __text |
|||
0x100002E56 0x00000042 __TEXT __stubs |
|||
0x100002E98 0x00000074 __TEXT __stub_helper |
|||
0x100002F0C 0x0000001B __TEXT __cstring |
|||
0x100002F28 0x000000D8 __TEXT __eh_frame |
|||
0x100003000 0x00000008 __DATA_CONST __got |
|||
0x100004000 0x00000058 __DATA __la_symbol_ptr |
|||
0x100004058 0x00000078 __DATA __data |
|||
# Symbols: |
|||
# Address Size File Name |
|||
0x1000011BD 0x000000E2 [ 1] _main |
|||
0x10000129F 0x00000023 [ 2] rawCopyS2L |
|||
0x1000012C2 0x0000003D [ 2] u64toLong_adjust_neg |
|||
0x1000012FF 0x00000301 [ 2] rawMontgomeryMul |
|||
0x100001600 0x0000002B [ 2] rawMontgomeryMul_mulM_sq |
|||
0x10000162B 0x00000005 [ 2] rawMontgomeryMul_mulM_done |
|||
0x100001630 0x0000022C [ 2] rawMontgomeryMul1 |
|||
0x10000185C 0x0000002B [ 2] rawMontgomeryMul1_mulM_sq |
|||
0x100001887 0x00000005 [ 2] rawMontgomeryMul1_mulM_done |
|||
0x10000188C 0x00000218 [ 2] rawFromMontgomery |
|||
0x100001AA4 0x0000002B [ 2] rawFromMontgomery_mulM_sq |
|||
0x100001ACF 0x00000005 [ 2] rawFromMontgomery_mulM_done |
|||
0x100001AD4 0x00000011 [ 2] _Fr_toMontgomery |
|||
0x100001AE5 0x00000018 [ 2] toMontgomeryShort |
|||
0x100001AFD 0x0000000B [ 2] posMontgomeryShort |
|||
0x100001B08 0x00000016 [ 2] negMontgomeryShort |
|||
0x100001B1E 0x0000001C [ 2] toMontgomeryLong |
|||
0x100001B3A 0x00000001 [ 2] toMontgomery_doNothing |
|||
0x100001B3B 0x00000011 [ 2] _Fr_toNormal |
|||
0x100001B4C 0x00000010 [ 2] fromMontgomeryLong |
|||
0x100001B5C 0x00000001 [ 2] fromMontgomery_doNothing |
|||
0x100001B5D 0x00000018 [ 2] _Fr_add |
|||
0x100001B75 0x0000000D [ 2] add_s1s2 |
|||
0x100001B82 0x00000011 [ 2] add_manageOverflow |
|||
0x100001B93 0x0000000B [ 2] add_l1 |
|||
0x100001B9E 0x00000007 [ 2] add_l1s2 |
|||
0x100001BA5 0x0000002C [ 2] add_l1ns2 |
|||
0x100001BD1 0x0000000E [ 2] tmp1 |
|||
0x100001BDF 0x00000007 [ 2] add_l1ms2 |
|||
0x100001BE6 0x00000031 [ 2] add_l1ms2n |
|||
0x100001C17 0x00000024 [ 2] add_l1ms2m |
|||
0x100001C3B 0x00000007 [ 2] add_s1l2 |
|||
0x100001C42 0x0000002C [ 2] add_s1l2n |
|||
0x100001C6E 0x0000000E [ 2] tmp2 |
|||
0x100001C7C 0x00000007 [ 2] add_s1l2m |
|||
0x100001C83 0x00000037 [ 2] add_s1nl2m |
|||
0x100001CBA 0x00000024 [ 2] add_s1ml2m |
|||
0x100001CDE 0x00000007 [ 2] add_l1l2 |
|||
0x100001CE5 0x00000007 [ 2] add_l1nl2 |
|||
0x100001CEC 0x00000024 [ 2] add_l1nl2n |
|||
0x100001D10 0x00000037 [ 2] add_l1nl2m |
|||
0x100001D47 0x00000007 [ 2] add_l1ml2 |
|||
0x100001D4E 0x00000031 [ 2] add_l1ml2n |
|||
0x100001D7F 0x00000024 [ 2] add_l1ml2m |
|||
0x100001DA3 0x00000066 [ 2] rawAddLL |
|||
0x100001E09 0x0000002B [ 2] rawAddLL_sq |
|||
0x100001E34 0x00000001 [ 2] rawAddLL_done |
|||
0x100001E35 0x0000006A [ 2] rawAddLS |
|||
0x100001E9F 0x0000002B [ 2] rawAddLS_sq |
|||
0x100001ECA 0x00000001 [ 2] rawAddLS_done |
|||
0x100001ECB 0x00000018 [ 2] _Fr_sub |
|||
0x100001EE3 0x0000000D [ 2] sub_s1s2 |
|||
0x100001EF0 0x00000011 [ 2] sub_manageOverflow |
|||
0x100001F01 0x0000000B [ 2] sub_l1 |
|||
0x100001F0C 0x00000007 [ 2] sub_l1s2 |
|||
0x100001F13 0x0000002C [ 2] sub_l1ns2 |
|||
0x100001F3F 0x0000000E [ 2] tmp3 |
|||
0x100001F4D 0x00000007 [ 2] sub_l1ms2 |
|||
0x100001F54 0x00000031 [ 2] sub_l1ms2n |
|||
0x100001F85 0x00000024 [ 2] sub_l1ms2m |
|||
0x100001FA9 0x00000007 [ 2] sub_s1l2 |
|||
0x100001FB0 0x00000026 [ 2] sub_s1l2n |
|||
0x100001FD6 0x0000001A [ 2] tmp4 |
|||
0x100001FF0 0x00000007 [ 2] sub_s1l2m |
|||
0x100001FF7 0x00000037 [ 2] sub_s1nl2m |
|||
0x10000202E 0x00000024 [ 2] sub_s1ml2m |
|||
0x100002052 0x00000007 [ 2] sub_l1l2 |
|||
0x100002059 0x00000007 [ 2] sub_l1nl2 |
|||
0x100002060 0x00000024 [ 2] sub_l1nl2n |
|||
0x100002084 0x00000037 [ 2] sub_l1nl2m |
|||
0x1000020BB 0x00000007 [ 2] sub_l1ml2 |
|||
0x1000020C2 0x00000031 [ 2] sub_l1ml2n |
|||
0x1000020F3 0x00000024 [ 2] sub_l1ml2m |
|||
0x100002117 0x00000031 [ 2] rawSubLS |
|||
0x100002148 0x0000002B [ 2] rawSubLS_aq |
|||
0x100002173 0x00000001 [ 2] rawSubLS_done |
|||
0x100002174 0x0000002F [ 2] rawSubSL |
|||
0x1000021A3 0x0000002B [ 2] rawSubSL_aq |
|||
0x1000021CE 0x00000001 [ 2] rawSubSL_done |
|||
0x1000021CF 0x0000002F [ 2] rawSubLL |
|||
0x1000021FE 0x0000002B [ 2] rawSubLL_aq |
|||
0x100002229 0x00000001 [ 2] rawSubLL_done |
|||
0x10000222A 0x0000009C [ 2] rawNegLS |
|||
0x1000022C6 0x00000001 [ 2] rawNegSL_done |
|||
0x1000022C7 0x0000000A [ 2] _Fr_neg |
|||
0x1000022D1 0x00000008 [ 2] neg_s |
|||
0x1000022D9 0x0000000E [ 2] neg_manageOverflow |
|||
0x1000022E7 0x00000019 [ 2] neg_l |
|||
0x100002300 0x0000002A [ 2] rawNegL |
|||
0x10000232A 0x0000003B [ 2] doNegate |
|||
0x100002365 0x00000018 [ 2] _Fr_mul |
|||
0x10000237D 0x0000000E [ 2] mul_s1s2 |
|||
0x10000238B 0x00000014 [ 2] mul_manageOverflow |
|||
0x10000239F 0x0000000B [ 2] mul_l1 |
|||
0x1000023AA 0x0000000B [ 2] mul_l1s2 |
|||
0x1000023B5 0x00000007 [ 2] mul_l1ns2 |
|||
0x1000023BC 0x00000033 [ 2] mul_l1ns2n |
|||
0x1000023EF 0x0000000A [ 2] tmp5 |
|||
0x1000023F9 0x0000001A [ 2] tmp6 |
|||
0x100002413 0x00000024 [ 2] mul_l1ns2m |
|||
0x100002437 0x00000007 [ 2] mul_l1ms2 |
|||
0x10000243E 0x00000033 [ 2] mul_l1ms2n |
|||
0x100002471 0x0000000A [ 2] tmp7 |
|||
0x10000247B 0x00000001 [ 2] tmp8 |
|||
0x10000247C 0x00000024 [ 2] mul_l1ms2m |
|||
0x1000024A0 0x0000000B [ 2] mul_s1l2 |
|||
0x1000024AB 0x00000007 [ 2] mul_s1nl2 |
|||
0x1000024B2 0x00000033 [ 2] mul_s1nl2n |
|||
0x1000024E5 0x0000000A [ 2] tmp9 |
|||
0x1000024EF 0x0000001A [ 2] tmp10 |
|||
0x100002509 0x00000033 [ 2] mul_s1nl2m |
|||
0x10000253C 0x0000000A [ 2] tmp11 |
|||
0x100002546 0x00000001 [ 2] tmp12 |
|||
0x100002547 0x00000007 [ 2] mul_s1ml2 |
|||
0x10000254E 0x00000024 [ 2] mul_s1ml2n |
|||
0x100002572 0x00000024 [ 2] mul_s1ml2m |
|||
0x100002596 0x00000007 [ 2] mul_l1l2 |
|||
0x10000259D 0x00000007 [ 2] mul_l1nl2 |
|||
0x1000025A4 0x0000003D [ 2] mul_l1nl2n |
|||
0x1000025E1 0x00000024 [ 2] mul_l1nl2m |
|||
0x100002605 0x00000007 [ 2] mul_l1ml2 |
|||
0x10000260C 0x00000024 [ 2] mul_l1ml2n |
|||
0x100002630 0x00000024 [ 2] mul_l1ml2m |
|||
0x100002654 0x0000001C [ 2] _Fr_band |
|||
0x100002670 0x00000019 [ 2] and_s1s2 |
|||
0x100002689 0x00000012 [ 2] tmp13 |
|||
0x10000269B 0x00000054 [ 2] tmp14 |
|||
0x1000026EF 0x0000000B [ 2] and_l1 |
|||
0x1000026FA 0x0000000B [ 2] and_l1s2 |
|||
0x100002705 0x00000044 [ 2] and_l1ns2 |
|||
0x100002749 0x00000054 [ 2] tmp15 |
|||
0x10000279D 0x00000059 [ 2] and_l1ms2 |
|||
0x1000027F6 0x00000054 [ 2] tmp16 |
|||
0x10000284A 0x0000000B [ 2] and_s1l2 |
|||
0x100002855 0x00000044 [ 2] and_s1l2n |
|||
0x100002899 0x00000054 [ 2] tmp17 |
|||
0x1000028ED 0x00000053 [ 2] and_s1l2m |
|||
0x100002940 0x00000054 [ 2] tmp18 |
|||
0x100002994 0x00000016 [ 2] and_l1l2 |
|||
0x1000029AA 0x00000044 [ 2] and_l1nl2n |
|||
0x1000029EE 0x00000054 [ 2] tmp19 |
|||
0x100002A42 0x00000053 [ 2] and_l1nl2m |
|||
0x100002A95 0x00000054 [ 2] tmp20 |
|||
0x100002AE9 0x0000000B [ 2] and_l1ml2 |
|||
0x100002AF4 0x00000059 [ 2] and_l1ml2n |
|||
0x100002B4D 0x00000054 [ 2] tmp21 |
|||
0x100002BA1 0x00000068 [ 2] and_l1ml2m |
|||
0x100002C09 0x00000054 [ 2] tmp22 |
|||
0x100002C5D 0x0000009F [ 3] __Z14Fr_str2elementP9FrElementPc |
|||
0x100002CFC 0x0000015A [ 3] __Z14Fr_element2strP9FrElement |
|||
0x100002E56 0x00000006 [ 5] __Znam |
|||
0x100002E5C 0x00000006 [ 4] ___gmpz_add |
|||
0x100002E62 0x00000006 [ 4] ___gmpz_clear |
|||
0x100002E68 0x00000006 [ 4] ___gmpz_export |
|||
0x100002E6E 0x00000006 [ 4] ___gmpz_get_str |
|||
0x100002E74 0x00000006 [ 4] ___gmpz_import |
|||
0x100002E7A 0x00000006 [ 4] ___gmpz_init |
|||
0x100002E80 0x00000006 [ 4] ___gmpz_init_set_si |
|||
0x100002E86 0x00000006 [ 4] ___gmpz_set_str |
|||
0x100002E8C 0x00000006 [ 6] _printf |
|||
0x100002E92 0x00000006 [ 6] _sprintf |
|||
0x100002E98 0x00000010 [ 0] helper helper |
|||
0x100002EA8 0x0000000A [ 4] ___gmpz_add |
|||
0x100002EB2 0x0000000A [ 4] ___gmpz_clear |
|||
0x100002EBC 0x0000000A [ 4] ___gmpz_export |
|||
0x100002EC6 0x0000000A [ 4] ___gmpz_get_str |
|||
0x100002ED0 0x0000000A [ 4] ___gmpz_import |
|||
0x100002EDA 0x0000000A [ 4] ___gmpz_init |
|||
0x100002EE4 0x0000000A [ 4] ___gmpz_init_set_si |
|||
0x100002EEE 0x0000000A [ 4] ___gmpz_set_str |
|||
0x100002EF8 0x0000000A [ 6] _printf |
|||
0x100002F02 0x0000000A [ 6] _sprintf |
|||
0x100002F0C 0x00000018 [ 1] literal string: %llu, %llu, %llu, %llu\n |
|||
0x100002F24 0x00000003 [ 3] literal string: %d |
|||
0x100002F28 0x00000018 [ 1] CIE |
|||
0x100002F40 0x00000038 [ 1] FDE for: _main |
|||
0x100002F78 0x00000018 [ 3] CIE |
|||
0x100002F90 0x00000038 [ 3] FDE for: __Z14Fr_str2elementP9FrElementPc |
|||
0x100002FC8 0x00000038 [ 3] FDE for: __Z14Fr_element2strP9FrElement |
|||
0x100003000 0x00000008 [ 0] non-lazy-pointer-to-local: dyld_stub_binder |
|||
0x100004000 0x00000008 [ 5] __Znam |
|||
0x100004008 0x00000008 [ 4] ___gmpz_add |
|||
0x100004010 0x00000008 [ 4] ___gmpz_clear |
|||
0x100004018 0x00000008 [ 4] ___gmpz_export |
|||
0x100004020 0x00000008 [ 4] ___gmpz_get_str |
|||
0x100004028 0x00000008 [ 4] ___gmpz_import |
|||
0x100004030 0x00000008 [ 4] ___gmpz_init |
|||
0x100004038 0x00000008 [ 4] ___gmpz_init_set_si |
|||
0x100004040 0x00000008 [ 4] ___gmpz_set_str |
|||
0x100004048 0x00000008 [ 6] _printf |
|||
0x100004050 0x00000008 [ 6] _sprintf |
|||
0x100004058 0x00000008 [ 0] __dyld_private |
|||
0x100004060 0x00000008 [ 2] _Fr_q |
|||
0x100004068 0x00000020 [ 2] q |
|||
0x100004088 0x00000020 [ 2] R2 |
|||
0x1000040A8 0x00000020 [ 2] R3 |
|||
0x1000040C8 0x00000008 [ 2] lboMask |
@ -0,0 +1,317 @@ |
|||
<% function subS1S2() { %> |
|||
xor rdx, rdx |
|||
mov edx, eax |
|||
sub edx, ecx |
|||
jo sub_manageOverflow ; rsi already is the 64bits result |
|||
|
|||
mov [rdi], rdx ; not necessary to adjust so just save and return |
|||
ret |
|||
|
|||
sub_manageOverflow: ; Do the operation in 64 bits |
|||
push rsi |
|||
movsx rsi, eax |
|||
movsx rdx, ecx |
|||
sub rsi, rdx |
|||
call rawCopyS2L |
|||
pop rsi |
|||
ret |
|||
<% } %> |
|||
|
|||
<% function subL1S2(t) { %> |
|||
add rsi, 8 |
|||
movsx rdx, ecx |
|||
add rdi, 8 |
|||
cmp rdx, 0 |
|||
<% const rawSubLabel = global.tmpLabel() %> |
|||
jns <%= rawSubLabel %> |
|||
neg rdx |
|||
call rawAddLS |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<%= rawSubLabel %>: |
|||
call rawSubLS |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<% } %> |
|||
|
|||
|
|||
<% function subS1L2(t) { %> |
|||
cmp eax, 0 |
|||
<% const s1NegLabel = global.tmpLabel() %> |
|||
js <%= s1NegLabel %> |
|||
|
|||
; First Operand is positive |
|||
push rsi |
|||
add rdi, 8 |
|||
movsx rsi, eax |
|||
add rdx, 8 |
|||
call rawSubSL |
|||
sub rdi, 8 |
|||
pop rsi |
|||
ret |
|||
|
|||
<%= s1NegLabel %>: ; First operand is negative |
|||
push rsi |
|||
lea rsi, [rdx + 8] |
|||
movsx rdx, eax |
|||
add rdi, 8 |
|||
neg rdx |
|||
call rawNegLS |
|||
sub rdi, 8 |
|||
pop rsi |
|||
ret |
|||
<% } %> |
|||
|
|||
|
|||
<% function subL1L2(t) { %> |
|||
add rdi, 8 |
|||
add rsi, 8 |
|||
add rdx, 8 |
|||
call rawSubLL |
|||
sub rdi, 8 |
|||
sub rsi, 8 |
|||
ret |
|||
<% } %> |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; sub |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Substracts two elements of any kind |
|||
; Params: |
|||
; rsi <= Pointer to element 1 |
|||
; rdx <= Pointer to element 2 |
|||
; rdi <= Pointer to result |
|||
; Modified Registers: |
|||
; r8, r9, 10, r11, rax, rcx |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
<%=name%>_sub: |
|||
mov rax, [rsi] |
|||
mov rcx, [rdx] |
|||
bt rax, 63 ; Check if is long first operand |
|||
jc sub_l1 |
|||
bt rcx, 63 ; Check if is long second operand |
|||
jc sub_s1l2 |
|||
|
|||
sub_s1s2: ; Both operands are short |
|||
<%= subS1S2() %> |
|||
sub_l1: |
|||
bt rcx, 63 ; Check if is short second operand |
|||
jc sub_l1l2 |
|||
|
|||
;;;;;;;; |
|||
sub_l1s2: |
|||
bt rax, 62 ; check if montgomery first |
|||
jc sub_l1ms2 |
|||
sub_l1ns2: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= subL1S2(); %> |
|||
|
|||
sub_l1ms2: |
|||
bt rcx, 62 ; check if montgomery second |
|||
jc sub_l1ms2m |
|||
sub_l1ms2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_b() %> |
|||
<%= subL1L2() %> |
|||
|
|||
sub_l1ms2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= subL1L2() %> |
|||
|
|||
|
|||
;;;;;;;; |
|||
sub_s1l2: |
|||
bt rcx, 62 ; check if montgomery first |
|||
jc sub_s1l2m |
|||
sub_s1l2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= subS1L2(); %> |
|||
|
|||
sub_s1l2m: |
|||
bt rax, 62 ; check if montgomery second |
|||
jc sub_s1ml2m |
|||
sub_s1nl2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_a() %> |
|||
<%= subL1L2() %> |
|||
|
|||
sub_s1ml2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= subL1L2() %> |
|||
|
|||
;;;; |
|||
sub_l1l2: |
|||
bt rax, 62 ; check if montgomery first |
|||
jc sub_l1ml2 |
|||
sub_l1nl2: |
|||
bt rcx, 62 ; check if montgomery second |
|||
jc sub_l1nl2m |
|||
sub_l1nl2n: |
|||
<%= global.setTypeDest("0x80"); %> |
|||
<%= subL1L2() %> |
|||
|
|||
sub_l1nl2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_a(); %> |
|||
<%= subL1L2() %> |
|||
|
|||
sub_l1ml2: |
|||
bt rcx, 62 ; check if montgomery seconf |
|||
jc sub_l1ml2m |
|||
sub_l1ml2n: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= global.toMont_b(); %> |
|||
<%= subL1L2() %> |
|||
|
|||
sub_l1ml2m: |
|||
<%= global.setTypeDest("0xC0"); %> |
|||
<%= subL1L2() %> |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawSubLS |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Substracts a short element from the long element |
|||
; Params: |
|||
; rdi <= Pointer to the long data of result |
|||
; rsi <= Pointer to the long data of element 1 where will be substracted |
|||
; rdx <= Value to be substracted |
|||
; [rdi] = [rsi] - rdx |
|||
; Modified Registers: |
|||
; rax |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawSubLS: |
|||
; Substract first digit |
|||
|
|||
mov rax, [rsi] |
|||
sub rax, rdx |
|||
mov [rdi] ,rax |
|||
mov rdx, 0 |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rax, [rsi + <%=i*8%>] |
|||
sbb rax, rdx |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
jnc rawSubLS_done ; if overflow, add q |
|||
|
|||
; Add q |
|||
rawSubLS_aq: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "add" : "adc" %> [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
rawSubLS_done: |
|||
ret |
|||
|
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawSubSL |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Substracts a long element from a short element |
|||
; Params: |
|||
; rdi <= Pointer to the long data of result |
|||
; rsi <= Value from where will bo substracted |
|||
; rdx <= Pointer to long of the value to be substracted |
|||
; |
|||
; [rdi] = rsi - [rdx] |
|||
; Modified Registers: |
|||
; rax |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawSubSL: |
|||
; Substract first digit |
|||
sub rsi, [rdx] |
|||
mov [rdi] ,rsi |
|||
|
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rax, 0 |
|||
sbb rax, [rdx + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
jnc rawSubSL_done ; if overflow, add q |
|||
|
|||
; Add q |
|||
rawSubSL_aq: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "add" : "adc" %> [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
rawSubSL_done: |
|||
ret |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawSubLL |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Substracts a long element from a short element |
|||
; Params: |
|||
; rdi <= Pointer to the long data of result |
|||
; rsi <= Pointer to long from where substracted |
|||
; rdx <= Pointer to long of the value to be substracted |
|||
; |
|||
; [rdi] = [rsi] - [rdx] |
|||
; Modified Registers: |
|||
; rax |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawSubLL: |
|||
; Substract first digit |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [rsi + <%=i*8%>] |
|||
<%= i==0 ? "sub" : "sbb" %> rax, [rdx + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
jnc rawSubLL_done ; if overflow, add q |
|||
|
|||
; Add q |
|||
rawSubLL_aq: |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "add" : "adc" %> [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
rawSubLL_done: |
|||
ret |
|||
|
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; rawNegLS |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
; Substracts a long element and a short element form 0 |
|||
; Params: |
|||
; rdi <= Pointer to the long data of result |
|||
; rsi <= Pointer to long from where substracted |
|||
; rdx <= short value to be substracted too |
|||
; |
|||
; [rdi] = -[rsi] - rdx |
|||
; Modified Registers: |
|||
; rax |
|||
;;;;;;;;;;;;;;;;;;;;;; |
|||
rawNegLS: |
|||
mov rax, [q] |
|||
sub rax, rdx |
|||
mov [rdi], rax |
|||
<% for (let i=1; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%> ] |
|||
sbb rax, 0 |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
setc dl |
|||
|
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [rdi + <%=i*8%> ] |
|||
<%= i==0 ? "sub" : "sbb" %> rax, [rsi + <%=i*8%>] |
|||
mov [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
|
|||
setc dh |
|||
or dl, dh |
|||
jz rawNegSL_done |
|||
|
|||
; it is a negative value, so add q |
|||
<% for (let i=0; i<n64; i++) { %> |
|||
mov rax, [q + <%=i*8%>] |
|||
<%= i==0 ? "add" : "adc" %> [rdi + <%=i*8%>], rax |
|||
<% } %> |
|||
|
|||
rawNegSL_done: |
|||
ret |
|||
|
|||
|
@ -0,0 +1,209 @@ |
|||
#include <string>
|
|||
#include <iostream>
|
|||
|
|||
#include <regex>
|
|||
#include <string>
|
|||
#include <iostream>
|
|||
#include <stdexcept>
|
|||
#include <sstream>
|
|||
|
|||
#include <stdio.h> /* printf, NULL */
|
|||
#include <stdlib.h>
|
|||
#include <cassert>
|
|||
|
|||
|
|||
#include "fr.h"
|
|||
|
|||
|
|||
typedef void (*Func1)(PFrElement, PFrElement); |
|||
typedef void (*Func2)(PFrElement, PFrElement, PFrElement); |
|||
typedef void *FuncAny; |
|||
|
|||
typedef struct { |
|||
FuncAny fn; |
|||
int nOps; |
|||
} FunctionSpec; |
|||
|
|||
std::map<std::string, FunctionSpec> functions; |
|||
std::vector<FrElement> stack; |
|||
|
|||
void addFunction(std::string name, FuncAny f, int nOps) { |
|||
FunctionSpec fs; |
|||
fs.fn = f; |
|||
fs.nOps = nOps; |
|||
functions[name] = fs; |
|||
} |
|||
|
|||
void fillMap() { |
|||
addFunction("add", (FuncAny)Fr_add, 2); |
|||
addFunction("sub", (FuncAny)Fr_sub, 2); |
|||
addFunction("neg", (FuncAny)Fr_neg, 1); |
|||
addFunction("mul", (FuncAny)Fr_mul, 2); |
|||
addFunction("band", (FuncAny)Fr_band, 2); |
|||
addFunction("bor", (FuncAny)Fr_bor, 2); |
|||
addFunction("bxor", (FuncAny)Fr_bxor, 2); |
|||
addFunction("eq", (FuncAny)Fr_eq, 2); |
|||
addFunction("neq", (FuncAny)Fr_neq, 2); |
|||
addFunction("lt", (FuncAny)Fr_lt, 2); |
|||
addFunction("gt", (FuncAny)Fr_gt, 2); |
|||
addFunction("leq", (FuncAny)Fr_leq, 2); |
|||
addFunction("geq", (FuncAny)Fr_geq, 2); |
|||
} |
|||
|
|||
u_int64_t readInt(std::string &s) { |
|||
if (s.rfind("0x", 0) == 0) { |
|||
return std::stoull(s.substr(2), 0, 16); |
|||
} else { |
|||
return std::stoull(s, 0, 10); |
|||
} |
|||
} |
|||
|
|||
void pushNumber(std::vector<std::string> &v) { |
|||
u_int64_t a; |
|||
if ((v.size()<1) || (v.size() > (Fr_N64+1))) { |
|||
printf("Invalid Size: %d - %d \n", v.size(), Fr_N64); |
|||
throw std::runtime_error("Invalid number of parameters for number"); |
|||
} |
|||
FrElement e; |
|||
a = readInt(v[0]); |
|||
*(u_int64_t *)(&e) = a; |
|||
for (int i=0; i<Fr_N64; i++) { |
|||
if (i+1 < v.size()) { |
|||
a = readInt(v[i+1]); |
|||
} else { |
|||
a = 0; |
|||
} |
|||
e.longVal[i] = a; |
|||
} |
|||
stack.push_back(e); |
|||
} |
|||
|
|||
void callFunction(FunctionSpec fs) { |
|||
if (stack.size() < fs.nOps) { |
|||
throw new std::runtime_error("Not enough elements in stack"); |
|||
} |
|||
if (fs.nOps == 1) { |
|||
FrElement a = stack.back(); |
|||
stack.pop_back(); |
|||
FrElement c; |
|||
(*(Func1)fs.fn)(&c, &a); |
|||
stack.push_back(c); |
|||
} else if (fs.nOps == 2) { |
|||
FrElement b = stack.back(); |
|||
stack.pop_back(); |
|||
FrElement a = stack.back(); |
|||
stack.pop_back(); |
|||
FrElement c; |
|||
(*(Func2)fs.fn)(&c, &a, &b); |
|||
stack.push_back(c); |
|||
} else { |
|||
assert(false); |
|||
} |
|||
} |
|||
|
|||
void processLine(std::string &line) { |
|||
std::regex re("(\\s*[,;]\\s*)|\\s+"); // whitespace
|
|||
|
|||
std::sregex_token_iterator begin( line.begin(), line.end(), re ,-1); |
|||
std::sregex_token_iterator end; |
|||
std::vector<std::string> tokens; |
|||
|
|||
std::copy(begin, end, std::back_inserter(tokens)); |
|||
|
|||
// Remove initial empty tokens
|
|||
while ((tokens.size() > 0)&&(tokens[0] == "")) { |
|||
tokens.erase(tokens.begin()); |
|||
} |
|||
|
|||
// Empty lines are valid but are not processed
|
|||
if (tokens.size() == 0) return; |
|||
|
|||
auto search = functions.find(tokens[0]); |
|||
if (search == functions.end()) { |
|||
pushNumber(tokens); |
|||
} else { |
|||
if (tokens.size() != 1) { |
|||
throw std::runtime_error("Functions does not accept parameters"); |
|||
} |
|||
callFunction(search->second); |
|||
} |
|||
} |
|||
|
|||
int main(void) |
|||
{ |
|||
fillMap(); |
|||
std::string line; |
|||
int i=0; |
|||
while (std::getline(std::cin, line)) { |
|||
processLine(line); |
|||
// if (i%1000 == 0) printf("%d\n", i);
|
|||
// printf("%d\n", i);
|
|||
i++; |
|||
} |
|||
// Print the elements in the stack
|
|||
//
|
|||
for (int i=0; i<stack.size(); i++) { |
|||
char *s; |
|||
s = Fr_element2str(&stack[i]); |
|||
printf("%s\n", s); |
|||
free(s); |
|||
} |
|||
return EXIT_SUCCESS; |
|||
} |
|||
|
|||
|
|||
|
|||
/*
|
|||
|
|||
|
|||
#include <stdlib.h>
|
|||
#include <string.h>
|
|||
#include "fr.h"
|
|||
|
|||
typedef void (*Func2)(PFrElement, PFrElement, PFrElement); |
|||
|
|||
typedef struct { |
|||
const char *fnName; |
|||
Func2 fn; |
|||
} FN; |
|||
|
|||
|
|||
#define NFN 2
|
|||
FN fns[NFN] = { |
|||
{"add", Fr_add}, |
|||
{"mul", Fr_mul}, |
|||
}; |
|||
|
|||
int main(int argc, char **argv) { |
|||
|
|||
if (argc <= 1) { |
|||
fprintf( stderr, "invalid number of parameters"); |
|||
return 1; |
|||
} |
|||
|
|||
for (int i=0; i< NFN;i++) { |
|||
if (strcmp(argv[1], fns[i].fnName) == 0) { |
|||
if (argc != 4) { |
|||
fprintf( stderr, "invalid number of parameters"); |
|||
return 1; |
|||
} |
|||
FrElement a; |
|||
FrElement b; |
|||
|
|||
Fr_str2element(&a, argv[2]); |
|||
Fr_str2element(&b, argv[3]); |
|||
FrElement c; |
|||
fns[i].fn(&c, &a, &b); |
|||
|
|||
char *s; |
|||
s = Fr_element2str(&c); |
|||
printf("%s", s); |
|||
free(s); |
|||
return 0; |
|||
} |
|||
} |
|||
fprintf( stderr, "invalid operation %s", argv[1]); |
|||
return 1; |
|||
} |
|||
|
|||
*/ |
@ -0,0 +1,20 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<!DOCTYPE plist PUBLIC "-//Apple Computer//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd"> |
|||
<plist version="1.0"> |
|||
<dict> |
|||
<key>CFBundleDevelopmentRegion</key> |
|||
<string>English</string> |
|||
<key>CFBundleIdentifier</key> |
|||
<string>com.apple.xcode.dsym.tester</string> |
|||
<key>CFBundleInfoDictionaryVersion</key> |
|||
<string>6.0</string> |
|||
<key>CFBundlePackageType</key> |
|||
<string>dSYM</string> |
|||
<key>CFBundleSignature</key> |
|||
<string>????</string> |
|||
<key>CFBundleShortVersionString</key> |
|||
<string>1.0</string> |
|||
<key>CFBundleVersion</key> |
|||
<string>1</string> |
|||
</dict> |
|||
</plist> |
@ -0,0 +1,73 @@ |
|||
<% global.setTypeDest = function (t) { |
|||
return ( |
|||
` mov r11b, ${t} |
|||
shl r11, 56 |
|||
mov [rdi], r11`); |
|||
} %> |
|||
|
|||
|
|||
<% global.toMont_a = function () { |
|||
return ( |
|||
` push rdi |
|||
mov rdi, rsi |
|||
mov rsi, rdx |
|||
call ${name}_toMontgomery |
|||
mov rdx, rsi |
|||
mov rsi, rdi |
|||
pop rdi`); |
|||
} %> |
|||
|
|||
<% global.toMont_b = function() { |
|||
return ( |
|||
` push rdi |
|||
mov rdi, rdx |
|||
call ${name}_toMontgomery |
|||
mov rdx, rdi |
|||
pop rdi`); |
|||
} %> |
|||
|
|||
<% global.fromMont_a = function () { |
|||
return ( |
|||
` push rdi |
|||
mov rdi, rsi |
|||
mov rsi, rdx |
|||
call ${name}_toNormal |
|||
mov rdx, rsi |
|||
mov rsi, rdi |
|||
pop rdi`); |
|||
} %> |
|||
|
|||
<% global.fromMont_b = function() { |
|||
return ( |
|||
` push rdi |
|||
mov rdi, rdx |
|||
call ${name}_toNormal |
|||
mov rdx, rdi |
|||
pop rdi`); |
|||
} %> |
|||
|
|||
<% global.toLong_a = function () { |
|||
return ( |
|||
` push rdi |
|||
push rdx |
|||
mov rdi, rsi |
|||
movsx rsi, r8d |
|||
call rawCopyS2L |
|||
mov rsi, rdi |
|||
pop rdx |
|||
pop rdi`); |
|||
} %> |
|||
|
|||
<% global.toLong_b = function() { |
|||
return ( |
|||
` push rdi |
|||
push rsi |
|||
mov rdi, rdx |
|||
movsx rsi, r9d |
|||
call rawCopyS2L |
|||
mov rdx, rdi |
|||
pop rsi |
|||
pop rdi`); |
|||
} %> |
|||
|
|||
|
@ -0,0 +1,322 @@ |
|||
const tester = require("../c/buildasm/buildzqfieldtester.js"); |
|||
|
|||
const ZqField = require("fflib").ZqField; |
|||
|
|||
const bigInt = require("big-integer"); |
|||
|
|||
const bn128q = new bigInt("21888242871839275222246405745257275088696311157297823662689037894645226208583"); |
|||
const bn128r = new bigInt("21888242871839275222246405745257275088548364400416034343698204186575808495617"); |
|||
const secp256k1q = new bigInt("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", 16); |
|||
const secp256k1r = new bigInt("FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", 16); |
|||
const mnt6753q = new bigInt("41898490967918953402344214791240637128170709919953949071783502921025352812571106773058893763790338921418070971888458477323173057491593855069696241854796396165721416325350064441470418137846398469611935719059908164220784476160001"); |
|||
const mnt6753r = new bigInt("41898490967918953402344214791240637128170709919953949071783502921025352812571106773058893763790338921418070971888253786114353726529584385201591605722013126468931404347949840543007986327743462853720628051692141265303114721689601"); |
|||
|
|||
describe("field asm test", function () { |
|||
this.timeout(1000000000); |
|||
/* |
|||
it("bn128r add", async () => { |
|||
const tv = buildTestVector2(bn128r, "add"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q add", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "add"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q add", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "add"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
it("bn128r sub", async () => { |
|||
const tv = buildTestVector2(bn128r, "sub"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q sub", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "sub"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q sub", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "sub"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
|
|||
it("bn128r neg", async () => { |
|||
const tv = buildTestVector1(bn128r, "neg"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q neg", async () => { |
|||
const tv = buildTestVector1(secp256k1q, "neg"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q neg", async () => { |
|||
const tv = buildTestVector1(mnt6753q, "neg"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
it("bn128r mul", async () => { |
|||
const tv = buildTestVector2(bn128r, "mul"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q mul", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "mul"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q mul", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "mul"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
it("bn128r binary and", async () => { |
|||
const tv = buildTestVector2(bn128r, "band"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q binary and", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "band"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q binary and", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "band"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
it("bn128r binary or", async () => { |
|||
const tv = buildTestVector2(bn128r, "bor"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
|
|||
it("secp256k1q binary or", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "bor"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q binary or", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "bor"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
it("bn128r binary xor", async () => { |
|||
const tv = buildTestVector2(bn128r, "bxor"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q binary xor", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "bxor"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
it("mnt6753q binary xor", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "bxor"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
it("bn128r eq", async () => { |
|||
const tv = buildTestVector2(bn128r, "eq"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q eq", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "eq"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
*/ |
|||
it("mnt6753q eq", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "eq"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
/* |
|||
it("bn128r neq", async () => { |
|||
const tv = buildTestVector2(bn128r, "neq"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q neq", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "neq"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
*/ |
|||
it("mnt6753q neq", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "neq"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
/* |
|||
it("bn128r lt", async () => { |
|||
const tv = buildTestVector2(bn128r, "lt"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q lt", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "lt"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
*/ |
|||
it("mnt6753q lt", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "lt"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
/* |
|||
it("bn128r gt", async () => { |
|||
const tv = buildTestVector2(bn128r, "gt"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q gt", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "gt"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
*/ |
|||
it("mnt6753q gt", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "gt"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
/* |
|||
it("bn128r leq", async () => { |
|||
const tv = buildTestVector2(bn128r, "leq"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q leq", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "leq"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
*/ |
|||
it("mnt6753q leq", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "leq"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
/* |
|||
it("bn128r geq", async () => { |
|||
const tv = buildTestVector2(bn128r, "geq"); |
|||
await tester(bn128r, tv); |
|||
}); |
|||
it("secp256k1q geq", async () => { |
|||
const tv = buildTestVector2(secp256k1q, "geq"); |
|||
await tester(secp256k1q, tv); |
|||
}); |
|||
*/ |
|||
it("mnt6753q geq", async () => { |
|||
const tv = buildTestVector2(mnt6753q, "geq"); |
|||
await tester(mnt6753q, tv); |
|||
}); |
|||
}); |
|||
|
|||
function buildTestVector2(p, op) { |
|||
const F = new ZqField(p); |
|||
const tv = []; |
|||
const nums = getCriticalNumbers(p, 2); |
|||
|
|||
const excludeZero = ["div", "mod"].indexOf(op) >= 0; |
|||
|
|||
for (let i=0; i<nums.length; i++) { |
|||
for (let j=0; j<nums.length; j++) { |
|||
if ((excludeZero)&&(nums[j][0].isZero())) continue; |
|||
tv.push([ |
|||
[nums[i][1], nums[j][1], op], |
|||
F[op](nums[i][0], nums[j][0]) |
|||
]); |
|||
} |
|||
} |
|||
|
|||
return tv; |
|||
} |
|||
|
|||
function buildTestVector1(p, op) { |
|||
const F = new ZqField(p); |
|||
const tv = []; |
|||
const nums = getCriticalNumbers(p, 2); |
|||
|
|||
const excludeZero = ["inv"].indexOf(op) >= 0; |
|||
|
|||
for (let i=0; i<nums.length; i++) { |
|||
if ((excludeZero)&&(nums[i][0].isZero())) continue; |
|||
tv.push([ |
|||
[nums[i][1], op], |
|||
F[op](nums[i][0]) |
|||
]); |
|||
} |
|||
|
|||
return tv; |
|||
} |
|||
|
|||
function getCriticalNumbers(p, lim) { |
|||
const numbers = []; |
|||
|
|||
addFrontier(0); |
|||
addFrontier(bigInt.one.shiftLeft(31)); |
|||
addFrontier(p.minus(bigInt.one.shiftLeft(31))); |
|||
addFrontier(bigInt.one.shiftLeft(32)); |
|||
addFrontier(p.minus(bigInt.one.shiftLeft(32))); |
|||
addFrontier(bigInt.one.shiftLeft(63)); |
|||
addFrontier(p.minus(bigInt.one.shiftLeft(63))); |
|||
addFrontier(bigInt.one.shiftLeft(64)); |
|||
addFrontier(p.minus(bigInt.one.shiftLeft(64))); |
|||
addFrontier(bigInt.one.shiftLeft(p.bitLength()-1)); |
|||
addFrontier(p.shiftRight(1)); |
|||
|
|||
function addFrontier(f) { |
|||
for (let i=-lim; i<=lim; i++) { |
|||
let n = bigInt(f).add(bigInt(i)); |
|||
n = n.mod(p); |
|||
if (n.isNegative()) n = p.add(n); |
|||
addNumber(n); |
|||
} |
|||
} |
|||
|
|||
return numbers; |
|||
|
|||
function addNumber(n) { |
|||
if (n.lt(bigInt("80000000", 16)) ) { |
|||
addShortPositive(n); |
|||
addShortMontgomeryPositive(n); |
|||
} |
|||
if (n.geq(p.minus(bigInt("80000000", 16))) ) { |
|||
addShortNegative(n); |
|||
addShortMontgomeryNegative(n); |
|||
} |
|||
addLongNormal(n); |
|||
addLongMontgomery(n); |
|||
|
|||
function addShortPositive(a) { |
|||
numbers.push([a, "0x"+a.toString(16)]); |
|||
} |
|||
|
|||
function addShortMontgomeryPositive(a) { |
|||
let S = "0x" + bigInt("40", 16).shiftLeft(56).add(a).toString(16); |
|||
S = S + "," + getLongString(toMontgomery(a)); |
|||
numbers.push([a, S]); |
|||
} |
|||
|
|||
function addShortNegative(a) { |
|||
const b = bigInt("80000000", 16 ).add(a.minus( p.minus(bigInt("80000000", 16 )))); |
|||
numbers.push([a, "0x"+b.toString(16)]); |
|||
} |
|||
|
|||
function addShortMontgomeryNegative(a) { |
|||
const b = bigInt("80000000", 16 ).add(a.minus( p.minus(bigInt("80000000", 16 )))); |
|||
let S = "0x" + bigInt("40", 16).shiftLeft(56).add(b).toString(16); |
|||
S = S + "," + getLongString(toMontgomery(a)); |
|||
numbers.push([a, S]); |
|||
} |
|||
|
|||
function addLongNormal(a) { |
|||
let S = "0x" + bigInt("80", 16).shiftLeft(56).toString(16); |
|||
S = S + "," + getLongString(a); |
|||
numbers.push([a, S]); |
|||
} |
|||
|
|||
|
|||
function addLongMontgomery(a) { |
|||
|
|||
let S = "0x" + bigInt("C0", 16).shiftLeft(56).toString(16); |
|||
S = S + "," + getLongString(toMontgomery(a)); |
|||
numbers.push([a, S]); |
|||
} |
|||
|
|||
function getLongString(a) { |
|||
if (a.isZero()) { |
|||
return "0x0"; |
|||
} |
|||
let r = a; |
|||
let S = ""; |
|||
while (!r.isZero()) { |
|||
if (S!= "") S = S+","; |
|||
S += "0x" + r.and(bigInt("FFFFFFFFFFFFFFFF", 16)).toString(16); |
|||
r = r.shiftRight(64); |
|||
} |
|||
return S; |
|||
} |
|||
|
|||
function toMontgomery(a) { |
|||
const n64 = Math.floor((p.bitLength() - 1) / 64)+1; |
|||
const R = bigInt.one.shiftLeft(n64*64); |
|||
return a.times(R).mod(p); |
|||
} |
|||
|
|||
} |
|||
} |
|||
|