diff --git a/cli.js b/cli.js index bf56050..17651fe 100755 --- a/cli.js +++ b/cli.js @@ -30,7 +30,7 @@ const version = require("./package").version; const argv = require("yargs") .version(version) - .usage("circom [input source circuit file] -o [output definition circuit file] -c [output c file]") + .usage("circom [input source circuit file] -r [output r1cs file] -c [output c file] -w [output wasm file] -t [output wat file] -s [output sym file]") .alias("o", "output") .alias("c", "csource") .alias("w", "wasm") @@ -50,6 +50,10 @@ const argv = require("yargs") type: "boolean", description: "Do not optimize constraints" }) + .option("sanityCheck", { + type: "boolean", + description: "Add sanity check code" + }) .epilogue(`Copyright (C) 2018 0kims association This program comes with ABSOLUTELY NO WARRANTY; This is free software, and you are welcome to redistribute it @@ -79,6 +83,7 @@ const symName = typeof(argv.sym) === "string" ? argv.sym : fileName + ".sym"; const options = {}; options.reduceConstraints = !argv.fast; options.verbose = argv.verbose || false; +options.sanityCheck = argv.sanitycheck; if (argv.csource) { options.cSourceWriteStream = fs.createWriteStream(cSourceName); } diff --git a/index.js b/index.js index a75c4a3..add3ff5 100644 --- a/index.js +++ b/index.js @@ -1,3 +1,4 @@ module.exports.compiler = require("./src/compiler.js"); module.exports.c_tester = require("./ports/c/tester.js"); module.exports.wasm_tester = require("./ports/wasm/tester.js"); +module.exports.tester = module.exports.wasm_tester; diff --git a/ports/c/buildasm/add.asm.ejs b/ports/c/buildasm/add.asm.ejs index 292bcb0..70cfd73 100644 --- a/ports/c/buildasm/add.asm.ejs +++ b/ports/c/buildasm/add.asm.ejs @@ -116,14 +116,14 @@ add_l1ms2m: ;;;;;;;; add_s1l2: - bt rcx, 62 ; check if montgomery first + bt rcx, 62 ; check if montgomery second jc add_s1l2m add_s1l2n: <%= global.setTypeDest("0x80"); %> <%= addS1L2(); %> add_s1l2m: - bt rax, 62 ; check if montgomery second + bt rax, 62 ; check if montgomery first jc add_s1ml2m add_s1nl2m: <%= global.setTypeDest("0xC0"); %> diff --git a/ports/c/buildasm/binops.asm.ejs b/ports/c/buildasm/binops.asm.ejs index 43a06fe..4001893 100644 --- a/ports/c/buildasm/binops.asm.ejs +++ b/ports/c/buildasm/binops.asm.ejs @@ -1,3 +1,24 @@ +<% function binOpSubQIfBigger() { %> + <% const subQ = global.tmpLabel() %> + <% const done = global.tmpLabel() %> + + ; Compare with q +<% for (let i=0; i + mov rax, [rdi + <%= (n64-i)*8 %>] + cmp rax, [q + <%= (n64-i-1)*8 %>] + jc <%=done%> ; q is bigget so done. + jnz <%=subQ%> ; q is lower +<% } %> + ; If equal substract q +<%=subQ%>: +<% for (let i=0; i + mov rax, [q + <%=i*8%>] + <%= i==0 ? "sub" : "sbb" %> [rdi + <%=i*8 + 8 %>], rax +<% } %> +<%=done%>: +<% } %> + + <% function binOpS1S2(op) { %> cmp r8d, 0 <% const s1s2_solveNeg = global.tmpLabel() %> @@ -35,6 +56,7 @@ <% } %> mov [rdi + <%= (i*8)+8 %> ], rax <% } %> +<% binOpSubQIfBigger() %> ret <%=s1l2_solveNeg%>: @@ -59,6 +81,7 @@ <% } %> mov [rdi + <%= (i*8)+8 %> ], rax; <% } %> +<% binOpSubQIfBigger() %> ret <%=l1s2_solveNeg%>: @@ -77,12 +100,11 @@ <% } %> mov [rdi + <%= (i*8)+8 %> ], rax <% } %> +<% binOpSubQIfBigger() %> ret <% } %> - - <% function binOp(op) { %> ;;;;;;;;;;;;;;;;;;;;;; ; b<%= op %> @@ -212,6 +234,7 @@ bnot_l1n: <% } %> mov [rdi + <%= i*8 + 8 %>], rax <% } %> +<% binOpSubQIfBigger() %> ret diff --git a/ports/c/buildasm/fr.asm b/ports/c/buildasm/fr.asm index ceb7538..180cd8d 100644 --- a/ports/c/buildasm/fr.asm +++ b/ports/c/buildasm/fr.asm @@ -1415,23 +1415,27 @@ rawFromMontgomery_mulM_done: ;;;;;;;;;;;;;;;;;;;; Fr_toMontgomery: mov rax, [rdi] - bts rax, 62 ; check if montgomery + bt rax, 62 ; check if montgomery jc toMontgomery_doNothing - bts rax, 63 + bt rax, 63 jc toMontgomeryLong toMontgomeryShort: - mov [rdi], rax add rdi, 8 push rsi + push rdx lea rsi, [R2] movsx rdx, eax cmp rdx, 0 js negMontgomeryShort posMontgomeryShort: call rawMontgomeryMul1 + pop rdx pop rsi sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d ret negMontgomeryShort: @@ -1439,8 +1443,12 @@ negMontgomeryShort: call rawMontgomeryMul1 mov rsi, rdi call rawNegL + pop rdx pop rsi sub rdi, 8 + mov r11b, 0x40 + shl r11d, 24 + mov [rdi+4], r11d ret @@ -1453,6 +1461,10 @@ toMontgomeryLong: call rawMontgomeryMul pop rsi sub rdi, 8 + mov r11b, 0xC0 + shl r11d, 24 + mov [rdi+4], r11d + toMontgomery_doNothing: ret @@ -1467,16 +1479,18 @@ toMontgomery_doNothing: ;;;;;;;;;;;;;;;;;;;; Fr_toNormal: mov rax, [rdi] - btc rax, 62 ; check if montgomery + bt rax, 62 ; check if montgomery jnc toNormal_doNothing bt rax, 63 ; if short, it means it's converted jnc toNormal_doNothing toNormalLong: - mov [rdi], rax add rdi, 8 call rawFromMontgomery sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d toNormal_doNothing: ret @@ -1501,6 +1515,9 @@ toLongNormal_fromMontgomery: add rdi, 8 call rawFromMontgomery sub rdi, 8 + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d ret toLongNormal_fromShort: @@ -1508,6 +1525,9 @@ toLongNormal_fromShort: movsx rsi, eax call rawCopyS2L mov rsi, r8 ; recover rsi + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d ret @@ -1569,8 +1589,8 @@ add_l1s2: jc add_l1ms2 add_l1ns2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rsi, 8 movsx rdx, ecx @@ -1596,8 +1616,8 @@ add_l1ms2: jc add_l1ms2m add_l1ms2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toMontgomery @@ -1615,8 +1635,8 @@ add_l1ms2n: add_l1ms2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -1630,12 +1650,12 @@ add_l1ms2m: ;;;;;;;; add_s1l2: - bt rcx, 62 ; check if montgomery first + bt rcx, 62 ; check if montgomery second jc add_s1l2m add_s1l2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d lea rsi, [rdx + 8] movsx rdx, eax @@ -1656,12 +1676,12 @@ tmp_2: add_s1l2m: - bt rax, 62 ; check if montgomery second + bt rax, 62 ; check if montgomery first jc add_s1ml2m add_s1nl2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -1681,8 +1701,8 @@ add_s1nl2m: add_s1ml2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -1702,8 +1722,8 @@ add_l1nl2: jc add_l1nl2m add_l1nl2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -1716,8 +1736,8 @@ add_l1nl2n: add_l1nl2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -1740,8 +1760,8 @@ add_l1ml2: jc add_l1ml2m add_l1ml2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toMontgomery @@ -1759,8 +1779,8 @@ add_l1ml2n: add_l1ml2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -1983,8 +2003,8 @@ sub_l1s2: jc sub_l1ms2 sub_l1ns2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rsi, 8 movsx rdx, ecx @@ -2009,8 +2029,8 @@ sub_l1ms2: jc sub_l1ms2m sub_l1ms2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toMontgomery @@ -2028,8 +2048,8 @@ sub_l1ms2n: sub_l1ms2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2047,8 +2067,8 @@ sub_s1l2: jc sub_s1l2m sub_s1l2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp eax, 0 @@ -2081,8 +2101,8 @@ sub_s1l2m: jc sub_s1ml2m sub_s1nl2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -2102,8 +2122,8 @@ sub_s1nl2m: sub_s1ml2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2123,8 +2143,8 @@ sub_l1nl2: jc sub_l1nl2m sub_l1nl2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2137,8 +2157,8 @@ sub_l1nl2n: sub_l1nl2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -2161,8 +2181,8 @@ sub_l1ml2: jc sub_l1ml2m sub_l1ml2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toMontgomery @@ -2180,8 +2200,8 @@ sub_l1ml2n: sub_l1ml2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2587,8 +2607,8 @@ square_l1: jc square_l1m square_l1n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2609,8 +2629,8 @@ square_l1n: square_l1m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2675,8 +2695,8 @@ mul_l1ns2: jc mul_l1ns2m mul_l1ns2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rsi add rsi, 8 @@ -2714,8 +2734,8 @@ tmp_6: mul_l1ns2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2732,8 +2752,8 @@ mul_l1ms2: jc mul_l1ms2m mul_l1ms2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rsi add rsi, 8 @@ -2761,8 +2781,8 @@ tmp_8: mul_l1ms2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2783,8 +2803,8 @@ mul_s1nl2: jc mul_s1nl2m mul_s1nl2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rsi lea rsi, [rdx + 8] @@ -2821,8 +2841,8 @@ tmp_10: mul_s1nl2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rsi lea rsi, [rdx + 8] @@ -2853,8 +2873,8 @@ mul_s1ml2: jc mul_s1ml2m mul_s1ml2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2867,8 +2887,8 @@ mul_s1ml2n: mul_s1ml2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2888,8 +2908,8 @@ mul_l1nl2: jc mul_l1nl2m mul_l1nl2n: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2911,8 +2931,8 @@ mul_l1nl2n: mul_l1nl2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2928,8 +2948,8 @@ mul_l1ml2: jc mul_l1ml2m mul_l1ml2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2942,8 +2962,8 @@ mul_l1ml2n: mul_l1ml2m: mov r11b, 0xC0 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d add rdi, 8 add rsi, 8 @@ -2970,6 +2990,7 @@ mul_l1ml2m: + ;;;;;;;;;;;;;;;;;;;;;; ; band ;;;;;;;;;;;;;;;;;;;;;; @@ -3005,8 +3026,8 @@ and_s1s2: tmp_13: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi push rsi mov rdi, rdx @@ -3047,6 +3068,49 @@ tmp_13: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_15 ; q is bigget so done. + jnz tmp_14 ; q is lower + + ; If equal substract q +tmp_14: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_15: + ret @@ -3064,12 +3128,12 @@ and_l1s2: jc and_l1ms2 and_l1ns2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp r9d, 0 - js tmp_14 + js tmp_16 movsx rax, r9d and rax, [rsi +8] mov [rdi+8], rax @@ -3091,9 +3155,52 @@ and_l1ns2: mov [rdi + 32 ], rax; + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_18 ; q is bigget so done. + jnz tmp_17 ; q is lower + + ; If equal substract q +tmp_17: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_18: + ret -tmp_14: +tmp_16: push rdi push rsi mov rdi, rdx @@ -3103,8 +3210,8 @@ tmp_14: pop rsi pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3129,6 +3236,49 @@ tmp_14: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_20 ; q is bigget so done. + jnz tmp_19 ; q is lower + + ; If equal substract q +tmp_19: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_20: + ret @@ -3136,8 +3286,8 @@ tmp_14: and_l1ms2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push r9 ; r9 is used in montgomery so we need to save it push rdi mov rdi, rsi @@ -3150,7 +3300,7 @@ and_l1ms2: cmp r9d, 0 - js tmp_15 + js tmp_21 movsx rax, r9d and rax, [rsi +8] mov [rdi+8], rax @@ -3172,9 +3322,52 @@ and_l1ms2: mov [rdi + 32 ], rax; + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_23 ; q is bigget so done. + jnz tmp_22 ; q is lower + + ; If equal substract q +tmp_22: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_23: + ret -tmp_15: +tmp_21: push rdi push rsi mov rdi, rdx @@ -3184,8 +3377,8 @@ tmp_15: pop rsi pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3210,6 +3403,49 @@ tmp_15: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_25 ; q is bigget so done. + jnz tmp_24 ; q is lower + + ; If equal substract q +tmp_24: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_25: + ret @@ -3221,12 +3457,12 @@ and_s1l2: jc and_s1l2m and_s1l2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp r8d, 0 - js tmp_16 + js tmp_26 movsx rax, r8d and rax, [rdx +8] mov [rdi+8], rax @@ -3248,9 +3484,52 @@ and_s1l2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_28 ; q is bigget so done. + jnz tmp_27 ; q is lower + + ; If equal substract q +tmp_27: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_28: + ret -tmp_16: +tmp_26: push rdi push rdx mov rdi, rsi @@ -3260,8 +3539,8 @@ tmp_16: pop rdx pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3286,6 +3565,49 @@ tmp_16: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_30 ; q is bigget so done. + jnz tmp_29 ; q is lower + + ; If equal substract q +tmp_29: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_30: + ret @@ -3293,8 +3615,8 @@ tmp_16: and_s1l2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push r8 ; r8 is used in montgomery so we need to save it push rdi mov rdi, rdx @@ -3305,7 +3627,7 @@ and_s1l2m: cmp r8d, 0 - js tmp_17 + js tmp_31 movsx rax, r8d and rax, [rdx +8] mov [rdi+8], rax @@ -3327,20 +3649,63 @@ and_s1l2m: mov [rdi + 32 ], rax - ret -tmp_17: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_33 ; q is bigget so done. + jnz tmp_32 ; q is lower + + ; If equal substract q +tmp_32: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_33: + + ret + +tmp_31: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3365,6 +3730,49 @@ tmp_17: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_35 ; q is bigget so done. + jnz tmp_34 ; q is lower + + ; If equal substract q +tmp_34: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_35: + ret @@ -3378,8 +3786,8 @@ and_l1l2: jc and_l1nl2m and_l1nl2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3404,13 +3812,56 @@ and_l1nl2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_37 ; q is bigget so done. + jnz tmp_36 ; q is lower + + ; If equal substract q +tmp_36: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_37: + ret and_l1nl2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toNormal @@ -3440,6 +3891,49 @@ and_l1nl2m: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_39 ; q is bigget so done. + jnz tmp_38 ; q is lower + + ; If equal substract q +tmp_38: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_39: + ret @@ -3448,8 +3942,8 @@ and_l1ml2: jc and_l1ml2m and_l1ml2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -3481,13 +3975,56 @@ and_l1ml2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_41 ; q is bigget so done. + jnz tmp_40 ; q is lower + + ; If equal substract q +tmp_40: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_41: + ret and_l1ml2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -3524,6 +4061,49 @@ and_l1ml2m: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_43 ; q is bigget so done. + jnz tmp_42 ; q is lower + + ; If equal substract q +tmp_42: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_43: + ret @@ -3551,20 +4131,20 @@ or_s1s2: cmp r8d, 0 - js tmp_18 + js tmp_44 cmp r9d, 0 - js tmp_18 + js tmp_44 xor rdx, rdx ; both ops are positive so do the op and return mov edx, r8d or edx, r9d mov [rdi], rdx ; not necessary to adjust so just save and return ret -tmp_18: +tmp_44: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi push rsi mov rdi, rdx @@ -3605,6 +4185,49 @@ tmp_18: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_46 ; q is bigget so done. + jnz tmp_45 ; q is lower + + ; If equal substract q +tmp_45: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_46: + ret @@ -3622,12 +4245,12 @@ or_l1s2: jc or_l1ms2 or_l1ns2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp r9d, 0 - js tmp_19 + js tmp_47 movsx rax, r9d or rax, [rsi +8] mov [rdi+8], rax @@ -3649,9 +4272,52 @@ or_l1ns2: mov [rdi + 32 ], rax; + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_49 ; q is bigget so done. + jnz tmp_48 ; q is lower + + ; If equal substract q +tmp_48: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_49: + ret -tmp_19: +tmp_47: push rdi push rsi mov rdi, rdx @@ -3661,8 +4327,8 @@ tmp_19: pop rsi pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3687,15 +4353,58 @@ tmp_19: mov [rdi + 32 ], rax - ret + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_51 ; q is bigget so done. + jnz tmp_50 ; q is lower + + ; If equal substract q +tmp_50: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_51: + + ret or_l1ms2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push r9 ; r9 is used in montgomery so we need to save it push rdi mov rdi, rsi @@ -3708,7 +4417,7 @@ or_l1ms2: cmp r9d, 0 - js tmp_20 + js tmp_52 movsx rax, r9d or rax, [rsi +8] mov [rdi+8], rax @@ -3730,9 +4439,52 @@ or_l1ms2: mov [rdi + 32 ], rax; + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_54 ; q is bigget so done. + jnz tmp_53 ; q is lower + + ; If equal substract q +tmp_53: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_54: + ret -tmp_20: +tmp_52: push rdi push rsi mov rdi, rdx @@ -3742,8 +4494,8 @@ tmp_20: pop rsi pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3768,6 +4520,49 @@ tmp_20: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_56 ; q is bigget so done. + jnz tmp_55 ; q is lower + + ; If equal substract q +tmp_55: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_56: + ret @@ -3779,12 +4574,12 @@ or_s1l2: jc or_s1l2m or_s1l2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp r8d, 0 - js tmp_21 + js tmp_57 movsx rax, r8d or rax, [rdx +8] mov [rdi+8], rax @@ -3806,9 +4601,52 @@ or_s1l2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_59 ; q is bigget so done. + jnz tmp_58 ; q is lower + + ; If equal substract q +tmp_58: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_59: + ret -tmp_21: +tmp_57: push rdi push rdx mov rdi, rsi @@ -3818,8 +4656,8 @@ tmp_21: pop rdx pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3844,6 +4682,49 @@ tmp_21: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_61 ; q is bigget so done. + jnz tmp_60 ; q is lower + + ; If equal substract q +tmp_60: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_61: + ret @@ -3851,8 +4732,8 @@ tmp_21: or_s1l2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push r8 ; r8 is used in montgomery so we need to save it push rdi mov rdi, rdx @@ -3863,7 +4744,7 @@ or_s1l2m: cmp r8d, 0 - js tmp_22 + js tmp_62 movsx rax, r8d or rax, [rdx +8] mov [rdi+8], rax @@ -3885,9 +4766,52 @@ or_s1l2m: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_64 ; q is bigget so done. + jnz tmp_63 ; q is lower + + ; If equal substract q +tmp_63: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_64: + ret -tmp_22: +tmp_62: push rdi push rdx mov rdi, rsi @@ -3897,8 +4821,8 @@ tmp_22: pop rdx pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3923,6 +4847,49 @@ tmp_22: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_66 ; q is bigget so done. + jnz tmp_65 ; q is lower + + ; If equal substract q +tmp_65: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_66: + ret @@ -3936,8 +4903,8 @@ or_l1l2: jc or_l1nl2m or_l1nl2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -3962,13 +4929,56 @@ or_l1nl2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_68 ; q is bigget so done. + jnz tmp_67 ; q is lower + + ; If equal substract q +tmp_67: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_68: + ret or_l1nl2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toNormal @@ -3998,19 +5008,62 @@ or_l1nl2m: mov [rdi + 32 ], rax - ret + + -or_l1ml2: - bt r9, 62 ; check if montgomery first - jc or_l1ml2m -or_l1ml2n: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_70 ; q is bigget so done. + jnz tmp_69 ; q is lower + + ; If equal substract q +tmp_69: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_70: + + ret + + +or_l1ml2: + bt r9, 62 ; check if montgomery first + jc or_l1ml2m +or_l1ml2n: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + push rdi + mov rdi, rsi + mov rsi, rdx call Fr_toNormal mov rdx, rsi mov rsi, rdi @@ -4039,13 +5092,56 @@ or_l1ml2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_72 ; q is bigget so done. + jnz tmp_71 ; q is lower + + ; If equal substract q +tmp_71: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_72: + ret or_l1ml2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -4082,6 +5178,49 @@ or_l1ml2m: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_74 ; q is bigget so done. + jnz tmp_73 ; q is lower + + ; If equal substract q +tmp_73: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_74: + ret @@ -4109,20 +5248,20 @@ xor_s1s2: cmp r8d, 0 - js tmp_23 + js tmp_75 cmp r9d, 0 - js tmp_23 + js tmp_75 xor rdx, rdx ; both ops are positive so do the op and return mov edx, r8d xor edx, r9d mov [rdi], rdx ; not necessary to adjust so just save and return ret -tmp_23: +tmp_75: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi push rsi mov rdi, rdx @@ -4163,6 +5302,49 @@ tmp_23: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_77 ; q is bigget so done. + jnz tmp_76 ; q is lower + + ; If equal substract q +tmp_76: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_77: + ret @@ -4180,12 +5362,12 @@ xor_l1s2: jc xor_l1ms2 xor_l1ns2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp r9d, 0 - js tmp_24 + js tmp_78 movsx rax, r9d xor rax, [rsi +8] mov [rdi+8], rax @@ -4207,9 +5389,52 @@ xor_l1ns2: mov [rdi + 32 ], rax; + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_80 ; q is bigget so done. + jnz tmp_79 ; q is lower + + ; If equal substract q +tmp_79: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_80: + ret -tmp_24: +tmp_78: push rdi push rsi mov rdi, rdx @@ -4219,8 +5444,8 @@ tmp_24: pop rsi pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -4245,6 +5470,49 @@ tmp_24: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_82 ; q is bigget so done. + jnz tmp_81 ; q is lower + + ; If equal substract q +tmp_81: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_82: + ret @@ -4252,8 +5520,8 @@ tmp_24: xor_l1ms2: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push r9 ; r9 is used in montgomery so we need to save it push rdi mov rdi, rsi @@ -4266,7 +5534,7 @@ xor_l1ms2: cmp r9d, 0 - js tmp_25 + js tmp_83 movsx rax, r9d xor rax, [rsi +8] mov [rdi+8], rax @@ -4288,9 +5556,52 @@ xor_l1ms2: mov [rdi + 32 ], rax; + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_85 ; q is bigget so done. + jnz tmp_84 ; q is lower + + ; If equal substract q +tmp_84: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_85: + ret -tmp_25: +tmp_83: push rdi push rsi mov rdi, rdx @@ -4300,8 +5611,8 @@ tmp_25: pop rsi pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -4326,6 +5637,49 @@ tmp_25: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_87 ; q is bigget so done. + jnz tmp_86 ; q is lower + + ; If equal substract q +tmp_86: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_87: + ret @@ -4337,12 +5691,12 @@ xor_s1l2: jc xor_s1l2m xor_s1l2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d cmp r8d, 0 - js tmp_26 + js tmp_88 movsx rax, r8d xor rax, [rdx +8] mov [rdi+8], rax @@ -4364,20 +5718,63 @@ xor_s1l2n: mov [rdi + 32 ], rax - ret -tmp_26: - push rdi - push rdx - mov rdi, rsi + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_90 ; q is bigget so done. + jnz tmp_89 ; q is lower + + ; If equal substract q +tmp_89: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_90: + + ret + +tmp_88: + push rdi + push rdx + mov rdi, rsi movsx rsi, r8d call rawCopyS2L mov rsi, rdi pop rdx pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -4402,6 +5799,49 @@ tmp_26: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_92 ; q is bigget so done. + jnz tmp_91 ; q is lower + + ; If equal substract q +tmp_91: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_92: + ret @@ -4409,8 +5849,8 @@ tmp_26: xor_s1l2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push r8 ; r8 is used in montgomery so we need to save it push rdi mov rdi, rdx @@ -4421,7 +5861,7 @@ xor_s1l2m: cmp r8d, 0 - js tmp_27 + js tmp_93 movsx rax, r8d xor rax, [rdx +8] mov [rdi+8], rax @@ -4443,9 +5883,52 @@ xor_s1l2m: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_95 ; q is bigget so done. + jnz tmp_94 ; q is lower + + ; If equal substract q +tmp_94: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_95: + ret -tmp_27: +tmp_93: push rdi push rdx mov rdi, rsi @@ -4455,8 +5938,8 @@ tmp_27: pop rdx pop rdi mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -4481,6 +5964,49 @@ tmp_27: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_97 ; q is bigget so done. + jnz tmp_96 ; q is lower + + ; If equal substract q +tmp_96: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_97: + ret @@ -4494,8 +6020,8 @@ xor_l1l2: jc xor_l1nl2m xor_l1nl2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d mov rax, [rsi + 8] @@ -4520,13 +6046,56 @@ xor_l1nl2n: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_99 ; q is bigget so done. + jnz tmp_98 ; q is lower + + ; If equal substract q +tmp_98: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_99: + ret xor_l1nl2m: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rdx call Fr_toNormal @@ -4556,6 +6125,49 @@ xor_l1nl2m: mov [rdi + 32 ], rax + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_101 ; q is bigget so done. + jnz tmp_100 ; q is lower + + ; If equal substract q +tmp_100: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_101: + ret @@ -4564,8 +6176,89 @@ xor_l1ml2: jc xor_l1ml2m xor_l1ml2n: mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 + shl r11d, 24 + mov [rdi+4], r11d + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + + + mov rax, [rsi + 8] + xor rax, [rdx + 8] + + mov [rdi + 8 ], rax + + mov rax, [rsi + 16] + xor rax, [rdx + 16] + + mov [rdi + 16 ], rax + + mov rax, [rsi + 24] + xor rax, [rdx + 24] + + mov [rdi + 24 ], rax + + mov rax, [rsi + 32] + xor rax, [rdx + 32] + + and rax, [lboMask] + + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_103 ; q is bigget so done. + jnz tmp_102 ; q is lower + + ; If equal substract q +tmp_102: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_103: + + ret + + +xor_l1ml2m: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d push rdi mov rdi, rsi mov rsi, rdx @@ -4573,6 +6266,11 @@ xor_l1ml2n: mov rdx, rsi mov rsi, rdi pop rdi + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi mov rax, [rsi + 8] @@ -4595,131 +6293,456 @@ xor_l1ml2n: and rax, [lboMask] - mov [rdi + 32 ], rax + mov [rdi + 32 ], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_105 ; q is bigget so done. + jnz tmp_104 ; q is lower + + ; If equal substract q +tmp_104: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_105: + + ret + + + + +;;;;;;;;;;;;;;;;;;;;;; +; bnot +;;;;;;;;;;;;;;;;;;;;;; +; Adds two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdi <= Pointer to result +; Modified Registers: +; r8, r9, 10, r11, rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_bnot: + mov r11b, 0x80 + shl r11d, 24 + mov [rdi+4], r11d + + mov r8, [rsi] + bt r8, 63 ; Check if is long operand + jc bnot_l1 +bnot_s: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + jmp bnot_l1n + +bnot_l1: + bt r8, 62 ; check if montgomery first + jnc bnot_l1n + +bnot_l1m: + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + +bnot_l1n: + + mov rax, [rsi + 8] + not rax + + mov [rdi + 8], rax + + mov rax, [rsi + 16] + not rax + + mov [rdi + 16], rax + + mov rax, [rsi + 24] + not rax + + mov [rdi + 24], rax + + mov rax, [rsi + 32] + not rax + + and rax, [lboMask] + + mov [rdi + 32], rax + + + + + + ; Compare with q + + mov rax, [rdi + 32] + cmp rax, [q + 24] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 24] + cmp rax, [q + 16] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 16] + cmp rax, [q + 8] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + mov rax, [rdi + 8] + cmp rax, [q + 0] + jc tmp_107 ; q is bigget so done. + jnz tmp_106 ; q is lower + + ; If equal substract q +tmp_106: + + mov rax, [q + 0] + sub [rdi + 8], rax + + mov rax, [q + 8] + sbb [rdi + 16], rax + + mov rax, [q + 16] + sbb [rdi + 24], rax + + mov rax, [q + 24] + sbb [rdi + 32], rax + +tmp_107: + + ret + + + + + + +;;;;;;;;;;;;;;;;;;;;;; +; rgt - Raw Greater Than +;;;;;;;;;;;;;;;;;;;;;; +; returns in ax 1 id *rsi > *rdx +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rax <= Return 1 or 0 +; Modified Registers: +; r8, r9, rax +;;;;;;;;;;;;;;;;;;;;;; +Fr_rgt: + mov r8, [rsi] + mov r9, [rdx] + bt r8, 63 ; Check if is short first operand + jc rgt_l1 + bt r9, 63 ; Check if is short second operand + jc rgt_s1l2 + +rgt_s1s2: ; Both operands are short + cmp r8d, r9d + jg rgt_ret1 + jmp rgt_ret0 + + +rgt_l1: + bt r9, 63 ; Check if is short second operand + jc rgt_l1l2 + +;;;;;;;; +rgt_l1s2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ms2 +rgt_l1ns2: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + jmp rgtL1L2 + +rgt_l1ms2: + push rdi + push rsi + mov rdi, rdx + movsx rsi, r9d + call rawCopyS2L + mov rdx, rdi + pop rsi + pop rdi + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + jmp rgtL1L2 + + +;;;;;;;; +rgt_s1l2: + bt r9, 62 ; check if montgomery second + jc rgt_s1l2m +rgt_s1l2n: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + jmp rgtL1L2 + +rgt_s1l2m: + push rdi + push rdx + mov rdi, rsi + movsx rsi, r8d + call rawCopyS2L + mov rsi, rdi + pop rdx + pop rdi + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + jmp rgtL1L2 + +;;;; +rgt_l1l2: + bt r8, 62 ; check if montgomery first + jc rgt_l1ml2 +rgt_l1nl2: + bt r9, 62 ; check if montgomery second + jc rgt_l1nl2m +rgt_l1nl2n: + jmp rgtL1L2 + +rgt_l1nl2m: + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + jmp rgtL1L2 + +rgt_l1ml2: + bt r9, 62 ; check if montgomery second + jc rgt_l1ml2m +rgt_l1ml2n: + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + jmp rgtL1L2 + +rgt_l1ml2m: + push rdi + mov rdi, rsi + mov rsi, rdx + call Fr_toNormal + mov rdx, rsi + mov rsi, rdi + pop rdi + push rdi + mov rdi, rdx + call Fr_toNormal + mov rdx, rdi + pop rdi + jmp rgtL1L2 + + +;;;;;; +; rgtL1L2 +;;;;;; + +rgtL1L2: + + + mov rax, [rsi + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rsi + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + + jmp rgtl1l2_p1 + + + +rgtl1l2_p1: + + + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + + jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + + + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 - ret + jmp rgtRawL1L2 -xor_l1ml2m: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi - push rdi - mov rdi, rdx - call Fr_toNormal - mov rdx, rdi - pop rdi - mov rax, [rsi + 8] - xor rax, [rdx + 8] +rgtl1l2_n1: - mov [rdi + 8 ], rax - mov rax, [rsi + 16] - xor rax, [rdx + 16] + mov rax, [rdx + 32] + cmp [half + 24], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 - mov [rdi + 16 ], rax + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 - mov rax, [rsi + 24] - xor rax, [rdx + 24] - mov [rdi + 24 ], rax + mov rax, [rdx + 24] + cmp [half + 16], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 - mov rax, [rsi + 32] - xor rax, [rdx + 32] + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 - and rax, [lboMask] - mov [rdi + 32 ], rax + mov rax, [rdx + 16] + cmp [half + 8], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 - ret + jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + mov rax, [rdx + 8] + cmp [half + 0], rax ; comare with (q-1)/2 + jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + jmp rgt_ret0 -;;;;;;;;;;;;;;;;;;;;;; -; bnot -;;;;;;;;;;;;;;;;;;;;;; -; Adds two elements of any kind -; Params: -; rsi <= Pointer to element 1 -; rdi <= Pointer to result -; Modified Registers: -; r8, r9, 10, r11, rax, rcx -;;;;;;;;;;;;;;;;;;;;;; -Fr_bnot: - mov r11b, 0x80 - shl r11, 56 - mov [rdi], r11 - mov r8, [rsi] - bt r8, 63 ; Check if is long operand - jc bnot_l1 -bnot_s: - push rdi - push rdx - mov rdi, rsi - movsx rsi, r8d - call rawCopyS2L - mov rsi, rdi - pop rdx - pop rdi - jmp bnot_l1n -bnot_l1: - bt r8, 62 ; check if montgomery first - jnc bnot_l1n -bnot_l1m: - push rdi - mov rdi, rsi - mov rsi, rdx - call Fr_toNormal - mov rdx, rsi - mov rsi, rdi - pop rdi -bnot_l1n: +rgtRawL1L2: - mov rax, [rsi + 8] - not rax + mov rax, [rsi + 32] + cmp [rdx + 32], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd - mov [rdi + 8], rax + jnz rgt_ret0 - mov rax, [rsi + 16] - not rax - mov [rdi + 16], rax + mov rax, [rsi + 24] + cmp [rdx + 24], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd - mov rax, [rsi + 24] - not rax + jnz rgt_ret0 - mov [rdi + 24], rax - mov rax, [rsi + 32] - not rax + mov rax, [rsi + 16] + cmp [rdx + 16], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd - and rax, [lboMask] + jnz rgt_ret0 - mov [rdi + 32], rax - ret + mov rax, [rsi + 8] + cmp [rdx + 8], rax ; comare with (q-1)/2 + jc rgt_ret1 ; rsi 1st > 2nd +rgt_ret0: + xor rax, rax + ret +rgt_ret1: + mov rax, 1 + ret ;;;;;;;;;;;;;;;;;;;;;; -; rgt - Raw Greater Than +; rlt - Raw Less Than ;;;;;;;;;;;;;;;;;;;;;; ; returns in ax 1 id *rsi > *rdx ; Params: @@ -4729,29 +6752,29 @@ bnot_l1n: ; Modified Registers: ; r8, r9, rax ;;;;;;;;;;;;;;;;;;;;;; -Fr_rgt: +Fr_rlt: mov r8, [rsi] mov r9, [rdx] bt r8, 63 ; Check if is short first operand - jc rgt_l1 + jc rlt_l1 bt r9, 63 ; Check if is short second operand - jc rgt_s1l2 + jc rlt_s1l2 -rgt_s1s2: ; Both operands are short +rlt_s1s2: ; Both operands are short cmp r8d, r9d - jg rgt_ret1 - jmp rgt_ret0 + jl rlt_ret1 + jmp rlt_ret0 -rgt_l1: +rlt_l1: bt r9, 63 ; Check if is short second operand - jc rgt_l1l2 + jc rlt_l1l2 ;;;;;;;; -rgt_l1s2: +rlt_l1s2: bt r8, 62 ; check if montgomery first - jc rgt_l1ms2 -rgt_l1ns2: + jc rlt_l1ms2 +rlt_l1ns2: push rdi push rsi mov rdi, rdx @@ -4760,9 +6783,9 @@ rgt_l1ns2: mov rdx, rdi pop rsi pop rdi - jmp rgtL1L2 + jmp rltL1L2 -rgt_l1ms2: +rlt_l1ms2: push rdi push rsi mov rdi, rdx @@ -4778,14 +6801,14 @@ rgt_l1ms2: mov rdx, rsi mov rsi, rdi pop rdi - jmp rgtL1L2 + jmp rltL1L2 ;;;;;;;; -rgt_s1l2: +rlt_s1l2: bt r9, 62 ; check if montgomery second - jc rgt_s1l2m -rgt_s1l2n: + jc rlt_s1l2m +rlt_s1l2n: push rdi push rdx mov rdi, rsi @@ -4794,9 +6817,9 @@ rgt_s1l2n: mov rsi, rdi pop rdx pop rdi - jmp rgtL1L2 + jmp rltL1L2 -rgt_s1l2m: +rlt_s1l2m: push rdi push rdx mov rdi, rsi @@ -4810,30 +6833,30 @@ rgt_s1l2m: call Fr_toNormal mov rdx, rdi pop rdi - jmp rgtL1L2 + jmp rltL1L2 ;;;; -rgt_l1l2: +rlt_l1l2: bt r8, 62 ; check if montgomery first - jc rgt_l1ml2 -rgt_l1nl2: + jc rlt_l1ml2 +rlt_l1nl2: bt r9, 62 ; check if montgomery second - jc rgt_l1nl2m -rgt_l1nl2n: - jmp rgtL1L2 + jc rlt_l1nl2m +rlt_l1nl2n: + jmp rltL1L2 -rgt_l1nl2m: +rlt_l1nl2m: push rdi mov rdi, rdx call Fr_toNormal mov rdx, rdi pop rdi - jmp rgtL1L2 + jmp rltL1L2 -rgt_l1ml2: +rlt_l1ml2: bt r9, 62 ; check if montgomery second - jc rgt_l1ml2m -rgt_l1ml2n: + jc rlt_l1ml2m +rlt_l1ml2n: push rdi mov rdi, rsi mov rsi, rdx @@ -4841,9 +6864,9 @@ rgt_l1ml2n: mov rdx, rsi mov rsi, rdi pop rdi - jmp rgtL1L2 + jmp rltL1L2 -rgt_l1ml2m: +rlt_l1ml2m: push rdi mov rdi, rsi mov rsi, rdx @@ -4856,145 +6879,139 @@ rgt_l1ml2m: call Fr_toNormal mov rdx, rdi pop rdi - jmp rgtL1L2 + jmp rltL1L2 ;;;;;; -; rgtL1L2 +; rltL1L2 ;;;;;; -rgtL1L2: +rltL1L2: mov rax, [rsi + 32] cmp [half + 24], rax ; comare with (q-1)/2 - jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 - jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rsi + 24] cmp [half + 16], rax ; comare with (q-1)/2 - jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 - jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rsi + 16] cmp [half + 8], rax ; comare with (q-1)/2 - jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 - jnz rgtl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rltl1l2_p1 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rsi + 8] cmp [half + 0], rax ; comare with (q-1)/2 - jc rgtl1l2_n1 ; half e1-e2 is neg => e1 < e2 + jc rltl1l2_n1 ; half e1-e2 is neg => e1 < e2 - jmp rgtl1l2_p1 + jmp rltl1l2_p1 -rgtl1l2_p1: +rltl1l2_p1: mov rax, [rdx + 32] cmp [half + 24], rax ; comare with (q-1)/2 - jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 - jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rdx + 24] cmp [half + 16], rax ; comare with (q-1)/2 - jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 - jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rdx + 16] cmp [half + 8], rax ; comare with (q-1)/2 - jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 - jnz rgtRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rltRawL1L2 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rdx + 8] cmp [half + 0], rax ; comare with (q-1)/2 - jc rgt_ret1 ; half e1-e2 is neg => e1 < e2 + jc rlt_ret0 ; half e1-e2 is neg => e1 < e2 - jmp rgtRawL1L2 + jmp rltRawL1L2 -rgtl1l2_n1: +rltl1l2_n1: mov rax, [rdx + 32] cmp [half + 24], rax ; comare with (q-1)/2 - jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 - jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rdx + 24] cmp [half + 16], rax ; comare with (q-1)/2 - jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 - jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rdx + 16] cmp [half + 8], rax ; comare with (q-1)/2 - jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 - jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 + jnz rlt_ret1 ; half>rax => e1 -e2 is pos => e1 > e2 mov rax, [rdx + 8] cmp [half + 0], rax ; comare with (q-1)/2 - jc rgtRawL1L2 ; half e1-e2 is neg => e1 < e2 + jc rltRawL1L2 ; half e1-e2 is neg => e1 < e2 - jmp rgt_ret0 + jmp rlt_ret1 -rgtRawL1L2: +rltRawL1L2: mov rax, [rsi + 32] cmp [rdx + 32], rax ; comare with (q-1)/2 - jc rgt_ret1 ; rsi 1st > 2nd - - jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 - + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 mov rax, [rsi + 24] cmp [rdx + 24], rax ; comare with (q-1)/2 - jc rgt_ret1 ; rsi 1st > 2nd - - jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 - + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 mov rax, [rsi + 16] cmp [rdx + 16], rax ; comare with (q-1)/2 - jc rgt_ret1 ; rsi 1st > 2nd - - jnz rgt_ret0 ; half>rax => e1 -e2 is pos => e1 > e2 - + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 mov rax, [rsi + 8] cmp [rdx + 8], rax ; comare with (q-1)/2 - jc rgt_ret1 ; rsi 1st > 2nd + jc rlt_ret0 ; rsi 1st > 2nd + jnz rlt_ret1 - -rgt_ret0: +rlt_ret0: xor rax, rax ret -rgt_ret1: +rlt_ret1: mov rax, 1 ret @@ -5161,6 +7178,22 @@ Fr_gt: mov [rdi], rax ret +;;;;;;;;;;;;;;;;;;;;;; +; lt +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_lt: + call Fr_rlt + mov [rdi], rax + ret + ;;;;;;;;;;;;;;;;;;;;;; ; eq ;;;;;;;;;;;;;;;;;;;;;; @@ -5177,10 +7210,56 @@ Fr_eq: mov [rdi], rax ret +;;;;;;;;;;;;;;;;;;;;;; +; neq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; +Fr_neq: + call Fr_req + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; geq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; Fr_geq: + call Fr_rlt + xor rax, 1 + mov [rdi], rax + ret + +;;;;;;;;;;;;;;;;;;;;;; +; leq +;;;;;;;;;;;;;;;;;;;;;; +; Compares two elements of any kind +; Params: +; rsi <= Pointer to element 1 +; rdx <= Pointer to element 2 +; rdi <= Pointer to result can be zero or one. +; Modified Registers: +; rax, rcx +;;;;;;;;;;;;;;;;;;;;;; Fr_leq: -Fr_lt: -Fr_neq: + call Fr_rgt + xor rax, 1 + mov [rdi], rax + ret @@ -5212,39 +7291,39 @@ Fr_land: mov rax, [rsi] bt rax, 63 - jc tmp_28 + jc tmp_108 test eax, eax - jz retZero_30 - jmp retOne_29 + jz retZero_110 + jmp retOne_109 -tmp_28: +tmp_108: mov rax, [rsi + 8] test rax, rax - jnz retOne_29 + jnz retOne_109 mov rax, [rsi + 16] test rax, rax - jnz retOne_29 + jnz retOne_109 mov rax, [rsi + 24] test rax, rax - jnz retOne_29 + jnz retOne_109 mov rax, [rsi + 32] test rax, rax - jnz retOne_29 + jnz retOne_109 -retZero_30: +retZero_110: mov qword r8, 0 - jmp done_31 + jmp done_111 -retOne_29: +retOne_109: mov qword r8, 1 -done_31: +done_111: @@ -5254,39 +7333,39 @@ done_31: mov rax, [rdx] bt rax, 63 - jc tmp_32 + jc tmp_112 test eax, eax - jz retZero_34 - jmp retOne_33 + jz retZero_114 + jmp retOne_113 -tmp_32: +tmp_112: mov rax, [rdx + 8] test rax, rax - jnz retOne_33 + jnz retOne_113 mov rax, [rdx + 16] test rax, rax - jnz retOne_33 + jnz retOne_113 mov rax, [rdx + 24] test rax, rax - jnz retOne_33 + jnz retOne_113 mov rax, [rdx + 32] test rax, rax - jnz retOne_33 + jnz retOne_113 -retZero_34: +retZero_114: mov qword rcx, 0 - jmp done_35 + jmp done_115 -retOne_33: +retOne_113: mov qword rcx, 1 -done_35: +done_115: and rcx, r8 mov [rdi], rcx @@ -5313,39 +7392,39 @@ Fr_lor: mov rax, [rsi] bt rax, 63 - jc tmp_36 + jc tmp_116 test eax, eax - jz retZero_38 - jmp retOne_37 + jz retZero_118 + jmp retOne_117 -tmp_36: +tmp_116: mov rax, [rsi + 8] test rax, rax - jnz retOne_37 + jnz retOne_117 mov rax, [rsi + 16] test rax, rax - jnz retOne_37 + jnz retOne_117 mov rax, [rsi + 24] test rax, rax - jnz retOne_37 + jnz retOne_117 mov rax, [rsi + 32] test rax, rax - jnz retOne_37 + jnz retOne_117 -retZero_38: +retZero_118: mov qword r8, 0 - jmp done_39 + jmp done_119 -retOne_37: +retOne_117: mov qword r8, 1 -done_39: +done_119: @@ -5355,39 +7434,39 @@ done_39: mov rax, [rdx] bt rax, 63 - jc tmp_40 + jc tmp_120 test eax, eax - jz retZero_42 - jmp retOne_41 + jz retZero_122 + jmp retOne_121 -tmp_40: +tmp_120: mov rax, [rdx + 8] test rax, rax - jnz retOne_41 + jnz retOne_121 mov rax, [rdx + 16] test rax, rax - jnz retOne_41 + jnz retOne_121 mov rax, [rdx + 24] test rax, rax - jnz retOne_41 + jnz retOne_121 mov rax, [rdx + 32] test rax, rax - jnz retOne_41 + jnz retOne_121 -retZero_42: +retZero_122: mov qword rcx, 0 - jmp done_43 + jmp done_123 -retOne_41: +retOne_121: mov qword rcx, 1 -done_43: +done_123: or rcx, r8 mov [rdi], rcx @@ -5413,39 +7492,39 @@ Fr_lnot: mov rax, [rsi] bt rax, 63 - jc tmp_44 + jc tmp_124 test eax, eax - jz retZero_46 - jmp retOne_45 + jz retZero_126 + jmp retOne_125 -tmp_44: +tmp_124: mov rax, [rsi + 8] test rax, rax - jnz retOne_45 + jnz retOne_125 mov rax, [rsi + 16] test rax, rax - jnz retOne_45 + jnz retOne_125 mov rax, [rsi + 24] test rax, rax - jnz retOne_45 + jnz retOne_125 mov rax, [rsi + 32] test rax, rax - jnz retOne_45 + jnz retOne_125 -retZero_46: +retZero_126: mov qword rcx, 0 - jmp done_47 + jmp done_127 -retOne_45: +retOne_125: mov qword rcx, 1 -done_47: +done_127: test rcx, rcx @@ -5476,39 +7555,39 @@ Fr_isTrue: mov rax, [rdi] bt rax, 63 - jc tmp_48 + jc tmp_128 test eax, eax - jz retZero_50 - jmp retOne_49 + jz retZero_130 + jmp retOne_129 -tmp_48: +tmp_128: mov rax, [rdi + 8] test rax, rax - jnz retOne_49 + jnz retOne_129 mov rax, [rdi + 16] test rax, rax - jnz retOne_49 + jnz retOne_129 mov rax, [rdi + 24] test rax, rax - jnz retOne_49 + jnz retOne_129 mov rax, [rdi + 32] test rax, rax - jnz retOne_49 + jnz retOne_129 -retZero_50: +retZero_130: mov qword rax, 0 - jmp done_51 + jmp done_131 -retOne_49: +retOne_129: mov qword rax, 1 -done_51: +done_131: ret @@ -5524,5 +7603,5 @@ q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644 half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014 R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5 R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c -lboMask dq 0x1fffffffffffffff +lboMask dq 0x3fffffffffffffff diff --git a/ports/c/buildasm/fr.asm.ejs b/ports/c/buildasm/fr.asm.ejs index 6819ec7..fa5f339 100644 --- a/ports/c/buildasm/fr.asm.ejs +++ b/ports/c/buildasm/fr.asm.ejs @@ -49,5 +49,5 @@ q dq <%= constantElement(q) %> half dq <%= constantElement(q.shiftRight(1)) %> R2 dq <%= constantElement(bigInt.one.shiftLeft(n64*64*2).mod(q)) %> R3 dq <%= constantElement(bigInt.one.shiftLeft(n64*64*3).mod(q)) %> -lboMask dq 0x<%= bigInt("8000000000000000",16).shiftRight(n64*64 - q.bitLength()).minus(bigInt.one).toString(16) %> +lboMask dq 0x<%= bigInt("10000000000000000",16).shiftRight(n64*64 - q.bitLength()).minus(bigInt.one).toString(16) %> diff --git a/ports/c/buildasm/fr.c b/ports/c/buildasm/fr.c index dca448b..8e4aec0 100644 --- a/ports/c/buildasm/fr.c +++ b/ports/c/buildasm/fr.c @@ -19,6 +19,7 @@ void Fr_toMpz(mpz_t r, PFrElement pE) { mpz_add(r, r, q); } } else { + Fr_toNormal(pE); mpz_import(r, Fr_N64, -1, 8, -1, 0, (const void *)pE->longVal); } } @@ -42,7 +43,7 @@ void Fr_init() { mpz_init_set_ui(one, 1); nBits = mpz_sizeinbase (q, 2); mpz_init(mask); - mpz_mul_2exp(mask, one, nBits-1); + mpz_mul_2exp(mask, one, nBits); mpz_sub(mask, mask, one); } @@ -118,11 +119,19 @@ void Fr_shl(PFrElement r, PFrElement a, PFrElement b) { Fr_toMpz(ma, a); Fr_toMpz(mb, b); - if (mpz_cmp_ui(mb, nBits) >= 0) { - mpz_set(mr, zero); - } else { + if (mpz_cmp_ui(mb, nBits) < 0) { mpz_mul_2exp(mr, ma, mpz_get_ui(mb)); mpz_and(mr, mr, mask); + if (mpz_cmp(mr, q) >= 0) { + mpz_sub(mr, mr, q); + } + } else { + mpz_sub(mb, q, mb); + if (mpz_cmp_ui(mb, nBits) < 0) { + mpz_tdiv_q_2exp(mr, ma, mpz_get_ui(mb)); + } else { + mpz_set(mr, zero); + } } Fr_fromMpz(r, mr); } @@ -137,11 +146,19 @@ void Fr_shr(PFrElement r, PFrElement a, PFrElement b) { Fr_toMpz(ma, a); Fr_toMpz(mb, b); - if (mpz_cmp_ui(mb, nBits) >= 0) { - mpz_set(mr, zero); - } else { + if (mpz_cmp_ui(mb, nBits) < 0) { mpz_tdiv_q_2exp(mr, ma, mpz_get_ui(mb)); - mpz_and(mr, mr, mask); + } else { + mpz_sub(mb, q, mb); + if (mpz_cmp_ui(mb, nBits) < 0) { + mpz_mul_2exp(mr, ma, mpz_get_ui(mb)); + mpz_and(mr, mr, mask); + if (mpz_cmp(mr, q) >= 0) { + mpz_sub(mr, mr, q); + } + } else { + mpz_set(mr, zero); + } } Fr_fromMpz(r, mr); } diff --git a/ports/c/buildasm/fr.c.ejs b/ports/c/buildasm/fr.c.ejs index 8ed8ea1..962b6c1 100644 --- a/ports/c/buildasm/fr.c.ejs +++ b/ports/c/buildasm/fr.c.ejs @@ -19,6 +19,7 @@ void <%=name%>_toMpz(mpz_t r, P<%=name%>Element pE) { mpz_add(r, r, q); } } else { + <%=name%>_toNormal(pE); mpz_import(r, <%=name%>_N64, -1, 8, -1, 0, (const void *)pE->longVal); } } @@ -42,7 +43,7 @@ void <%=name%>_init() { mpz_init_set_ui(one, 1); nBits = mpz_sizeinbase (q, 2); mpz_init(mask); - mpz_mul_2exp(mask, one, nBits-1); + mpz_mul_2exp(mask, one, nBits); mpz_sub(mask, mask, one); } @@ -118,11 +119,19 @@ void <%=name%>_shl(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b <%=name%>_toMpz(ma, a); <%=name%>_toMpz(mb, b); - if (mpz_cmp_ui(mb, nBits) >= 0) { - mpz_set(mr, zero); - } else { + if (mpz_cmp_ui(mb, nBits) < 0) { mpz_mul_2exp(mr, ma, mpz_get_ui(mb)); mpz_and(mr, mr, mask); + if (mpz_cmp(mr, q) >= 0) { + mpz_sub(mr, mr, q); + } + } else { + mpz_sub(mb, q, mb); + if (mpz_cmp_ui(mb, nBits) < 0) { + mpz_tdiv_q_2exp(mr, ma, mpz_get_ui(mb)); + } else { + mpz_set(mr, zero); + } } <%=name%>_fromMpz(r, mr); } @@ -137,11 +146,19 @@ void <%=name%>_shr(P<%=name%>Element r, P<%=name%>Element a, P<%=name%>Element b <%=name%>_toMpz(ma, a); <%=name%>_toMpz(mb, b); - if (mpz_cmp_ui(mb, nBits) >= 0) { - mpz_set(mr, zero); - } else { + if (mpz_cmp_ui(mb, nBits) < 0) { mpz_tdiv_q_2exp(mr, ma, mpz_get_ui(mb)); - mpz_and(mr, mr, mask); + } else { + mpz_sub(mb, q, mb); + if (mpz_cmp_ui(mb, nBits) < 0) { + mpz_mul_2exp(mr, ma, mpz_get_ui(mb)); + mpz_and(mr, mr, mask); + if (mpz_cmp(mr, q) >= 0) { + mpz_sub(mr, mr, q); + } + } else { + mpz_set(mr, zero); + } } <%=name%>_fromMpz(r, mr); } diff --git a/ports/c/buildasm/fr.o b/ports/c/buildasm/fr.o index bffc8de..7bf2b66 100644 Binary files a/ports/c/buildasm/fr.o and b/ports/c/buildasm/fr.o differ diff --git a/ports/c/buildasm/montgomery.asm.ejs b/ports/c/buildasm/montgomery.asm.ejs index 6cb2f58..1a6632c 100644 --- a/ports/c/buildasm/montgomery.asm.ejs +++ b/ports/c/buildasm/montgomery.asm.ejs @@ -245,23 +245,25 @@ montgomeryTemplate("rawFromMontgomery", function(i, r0, r1, r2) { ;;;;;;;;;;;;;;;;;;;; <%=name%>_toMontgomery: mov rax, [rdi] - bts rax, 62 ; check if montgomery + bt rax, 62 ; check if montgomery jc toMontgomery_doNothing - bts rax, 63 + bt rax, 63 jc toMontgomeryLong toMontgomeryShort: - mov [rdi], rax add rdi, 8 push rsi + push rdx lea rsi, [R2] movsx rdx, eax cmp rdx, 0 js negMontgomeryShort posMontgomeryShort: call rawMontgomeryMul1 + pop rdx pop rsi sub rdi, 8 + <%= global.setTypeDest("0x40"); %> ret negMontgomeryShort: @@ -269,8 +271,10 @@ negMontgomeryShort: call rawMontgomeryMul1 mov rsi, rdi call rawNegL + pop rdx pop rsi sub rdi, 8 + <%= global.setTypeDest("0x40"); %> ret @@ -283,6 +287,8 @@ toMontgomeryLong: call rawMontgomeryMul pop rsi sub rdi, 8 + <%= global.setTypeDest("0xC0"); %> + toMontgomery_doNothing: ret @@ -297,16 +303,16 @@ toMontgomery_doNothing: ;;;;;;;;;;;;;;;;;;;; <%=name%>_toNormal: mov rax, [rdi] - btc rax, 62 ; check if montgomery + bt rax, 62 ; check if montgomery jnc toNormal_doNothing bt rax, 63 ; if short, it means it's converted jnc toNormal_doNothing toNormalLong: - mov [rdi], rax add rdi, 8 call rawFromMontgomery sub rdi, 8 + <%= global.setTypeDest("0x80"); %> toNormal_doNothing: ret @@ -331,6 +337,7 @@ toLongNormal_fromMontgomery: add rdi, 8 call rawFromMontgomery sub rdi, 8 + <%= global.setTypeDest("0x80"); %> ret toLongNormal_fromShort: @@ -338,5 +345,6 @@ toLongNormal_fromShort: movsx rsi, eax call rawCopyS2L mov rsi, r8 ; recover rsi + <%= global.setTypeDest("0x80"); %> ret diff --git a/ports/c/buildasm/tester b/ports/c/buildasm/tester index e782f2a..1a99905 100755 Binary files a/ports/c/buildasm/tester and b/ports/c/buildasm/tester differ diff --git a/ports/c/buildasm/tester.cpp b/ports/c/buildasm/tester.cpp index 95b2d2a..b74a983 100644 --- a/ports/c/buildasm/tester.cpp +++ b/ports/c/buildasm/tester.cpp @@ -56,6 +56,8 @@ void fillMap() { addFunction("land", (FuncAny)Fr_land, 2); addFunction("lor", (FuncAny)Fr_lor, 2); addFunction("lnot", (FuncAny)Fr_lnot, 1); + addFunction("shl", (FuncAny)Fr_shl, 2); + addFunction("shr", (FuncAny)Fr_shr, 2); } u_int64_t readInt(std::string &s) { diff --git a/ports/c/buildasm/tester.dSYM/Contents/Info.plist b/ports/c/buildasm/tester.dSYM/Contents/Info.plist deleted file mode 100644 index c78a483..0000000 --- a/ports/c/buildasm/tester.dSYM/Contents/Info.plist +++ /dev/null @@ -1,20 +0,0 @@ - - - - - CFBundleDevelopmentRegion - English - CFBundleIdentifier - com.apple.xcode.dsym.tester - CFBundleInfoDictionaryVersion - 6.0 - CFBundlePackageType - dSYM - CFBundleSignature - ???? - CFBundleShortVersionString - 1.0 - CFBundleVersion - 1 - - diff --git a/ports/c/buildasm/tester.dSYM/Contents/Resources/DWARF/tester b/ports/c/buildasm/tester.dSYM/Contents/Resources/DWARF/tester deleted file mode 100644 index 8404f1b..0000000 Binary files a/ports/c/buildasm/tester.dSYM/Contents/Resources/DWARF/tester and /dev/null differ diff --git a/ports/c/buildasm/utils.asm.ejs b/ports/c/buildasm/utils.asm.ejs index 6925c52..a8852bf 100644 --- a/ports/c/buildasm/utils.asm.ejs +++ b/ports/c/buildasm/utils.asm.ejs @@ -1,8 +1,8 @@ <% global.setTypeDest = function (t) { return ( ` mov r11b, ${t} - shl r11, 56 - mov [rdi], r11`); + shl r11d, 24 + mov [rdi+4], r11d`); } %> diff --git a/ports/c/main.cpp b/ports/c/main.cpp index 37f9587..3a8b2ea 100644 --- a/ports/c/main.cpp +++ b/ports/c/main.cpp @@ -184,7 +184,7 @@ int main(int argc, char *argv[]) { ctx->join(); - printf("Finished!\n"); + // printf("Finished!\n"); std::string outfilename = argv[2]; diff --git a/ports/c/tester.js b/ports/c/tester.js index 17e4725..84c1dc0 100644 --- a/ports/c/tester.js +++ b/ports/c/tester.js @@ -47,7 +47,7 @@ async function c_tester(circomFile, _options) { ` ${path.join(dir.path, baseName + ".cpp")} ` + ` -o ${path.join(dir.path, baseName)}` + ` -I ${cdir}` + - " -lgmp -std=c++11 -DSANITY_CHECK" + " -lgmp -std=c++11 -DSANITY_CHECK -g" ); // console.log(dir.path); @@ -70,10 +70,13 @@ class CTester { path.join(this.dir.path, "in.json"), JSON.stringify(stringifyBigInts(input), null, 1) ); - await exec(`${path.join(this.dir.path, this.baseName)}` + + const r = await exec(`${path.join(this.dir.path, this.baseName)}` + ` ${path.join(this.dir.path, "in.json")}` + ` ${path.join(this.dir.path, "out.json")}` ); + if (r.stdout) { + console.log(r.stdout); + } const resStr = await fs.promises.readFile( path.join(this.dir.path, "out.json") ); diff --git a/ports/wasm/build_runtime.js b/ports/wasm/build_runtime.js index 16cb709..f568e7d 100644 --- a/ports/wasm/build_runtime.js +++ b/ports/wasm/build_runtime.js @@ -545,15 +545,15 @@ module.exports = function buildRuntime(module, builder) { f.addCode( c.call( "Fr_eq", - c.getLocal(c.i32_const(pTmp)), + c.i32_const(pTmp), c.getLocal("pA"), c.getLocal("pB") ), c.if ( - c.eqz( + c.i32_eqz( c.call( "Fr_isTrue", - c.getLocal(c.i32_const(pTmp)), + c.i32_const(pTmp), ) ), c.call( @@ -658,21 +658,6 @@ module.exports = function buildRuntime(module, builder) { ); } - function buildFrToInt() { - const f = module.addFunction("Fr_toInt"); - f.addParam("p", "i32"); - f.setReturnType("i32"); - - const c = f.getCodeBuilder(); - - f.addCode( - c.i32_load(c.getLocal("p")) - ); - - // TODO Handle long and montgomery. - } - - const fErr = module.addIimportFunction("err", "runtime"); fErr.addParam("code", "i32"); fErr.addParam("pStr", "i32"); @@ -703,6 +688,9 @@ module.exports = function buildRuntime(module, builder) { fErr4.addParam("param3", "i32"); fErr4.addParam("param4", "i32"); + const fLog = module.addIimportFunction("log", "runtime"); + fLog.addParam("code", "i32"); + buildWasmFf(module, "Fr", builder.header.P); builder.pSignals=module.alloc(builder.header.NSignals*builder.sizeFr); @@ -734,7 +722,7 @@ module.exports = function buildRuntime(module, builder) { buildGetPWitness(); buildGetPRawPrime(); - buildFrToInt(); +// buildFrToInt(); module.exportFunction("init"); module.exportFunction("getNVars"); diff --git a/ports/wasm/builder.js b/ports/wasm/builder.js index 5cf32c0..6b5c429 100644 --- a/ports/wasm/builder.js +++ b/ports/wasm/builder.js @@ -104,6 +104,10 @@ class CodeBuilderWasm { this.ops.push(...cb.ops); } + log(val) { + this.ops.push({op: "LOG", val}); + } + hasCode() { for (let i=0; i options.wasmWriteStream.on("finish", fulfill))); diff --git a/ports/wasm/witness_calculator.js b/ports/wasm/witness_calculator.js index b93a44b..8c22668 100644 --- a/ports/wasm/witness_calculator.js +++ b/ports/wasm/witness_calculator.js @@ -17,33 +17,55 @@ module.exports.fromBuffer = async function(code) { const memory = new WebAssembly.Memory({initial:20000}); const wasmModule = await WebAssembly.compile(code); + let wc; + const instance = await WebAssembly.instantiate(wasmModule, { env: { "memory": memory }, runtime: { err: function(code, pstr) { - console.log("ERROR", code, p2str(pstr)); + const errStr=p2str(pstr); + console.log("ERROR: ", code, errStr); + throw new Error(errStr); }, err1: function(code, pstr, a) { - console.log("ERROR: ", code, p2str(pstr), a); + const errStr=p2str(pstr)+ " " + a; + console.log("ERROR: ", code, errStr); + throw new Error(errStr); }, err2: function(code, pstr, a, b) { - console.log("ERROR: ", code, p2str(pstr), a, b); + const errStr=p2str(pstr)+ " " + a + " " + b; + console.log("ERROR: ", code, errStr); + throw new Error(errStr); }, err3: function(code, pstr, a, b, c) { - console.log("ERROR: ", code, p2str(pstr), a, b, c); + const errStr=p2str(pstr)+ " " + a + " " + b + " " + c; + console.log("ERROR: ", code, errStr); + throw new Error(errStr); }, err4: function(code, pstr, a,b,c,d) { - console.log("ERROR: ", code, p2str(pstr), a, b, c, d); + const errStr=p2str(pstr) + " " + wc.getFr(b).toString() + " != " + wc.getFr(c).toString() + " " +p2str(d); + console.log("ERROR: ", code, errStr); + throw new Error(errStr); + }, + log: function(a) { + console.log(wc.getFr(a).toString()); }, } }); - return new WitnessCalculator(memory, instance); + wc = new WitnessCalculator(memory, instance); + return wc; function p2str(p) { - return "TODO"+p; + const i8 = new Uint8Array(memory.buffer); + + const bytes = []; + + for (let i=0; i8[p+i]>0; i++) bytes.push(i8[p+i]); + + return String.fromCharCode.apply(null, bytes); } }; @@ -120,30 +142,31 @@ class WitnessCalculator { } getFr(p) { + const self = this; const idx = (p>>2); - if (this.i32[idx + 1] & 0x80000000) { + if (self.i32[idx + 1] & 0x80000000) { let res= bigInt(0); - for (let i=this.n32-1; i>=0; i--) { + for (let i=self.n32-1; i>=0; i--) { res = res.shiftLeft(32); - res = res.add(bigInt(this.i32[idx+2+i])); + res = res.add(bigInt(self.i32[idx+2+i])); } - if (this.i32[idx + 1] & 0x40000000) { + if (self.i32[idx + 1] & 0x40000000) { return fromMontgomery(res); } else { return res; } } else { - if (this.i32[idx] & 0x80000000) { - return this.prime.add( bigInt(this.i32[idx]).minus(bigInt(0x100000000)) ); + if (self.i32[idx] & 0x80000000) { + return self.prime.add( bigInt(self.i32[idx]).minus(bigInt(0x100000000)) ); } else { - return bigInt(this.i32[idx]); + return bigInt(self.i32[idx]); } } function fromMontgomery(n) { - return n.times(this.RInv).mod(this.prime); + return n.times(self.RInv).mod(self.prime); } } diff --git a/src/compiler.js b/src/compiler.js index f01cfc5..4016c3d 100644 --- a/src/compiler.js +++ b/src/compiler.js @@ -94,7 +94,7 @@ async function compile(srcFile, options) { } if ((options.wasmWriteStream)||(options.watWriteStream)) { - ctx.builder = new BuilderWasm(); + ctx.builder = new BuilderWasm(options.sanityCheck); build(ctx); if (options.wasmWriteStream) { const rdStream = ctx.builder.build("wasm"); diff --git a/src/construction_phase.js b/src/construction_phase.js index 1d33da6..3f3262c 100644 --- a/src/construction_phase.js +++ b/src/construction_phase.js @@ -558,7 +558,11 @@ function execFunctionCall(ctx, ast) { if (ast.name == "log") { const v = exec(ctx, ast.params[0]); const ev = val(ctx, v, ast); - console.log(ev.v.toString()); + if (ev.v) { + console.log(ev.v.toString()); + } else { + console.log(JSON.stringify(ev)); + } return; } if (ast.name == "assert") { diff --git a/src/gencode.js b/src/gencode.js index 98a070d..c2e629e 100644 --- a/src/gencode.js +++ b/src/gencode.js @@ -423,13 +423,21 @@ function genVariable(ctx, ast) { } else if (v.type == "BIGINT") { const refOffset = genGetOffset(ctx, 0, v.sizes, ast.selectors ); const offset = ctx.refs[refOffset]; + let ot; + if (offset.type == "BIGINT") { + ot = "R"; + } else if (offset.type == "INT") { + ot= "RI"; + } else { + assert(false); + } if (v.used) { if (offset.used) { const refRes = newRef(ctx, "BIGINT", "_v", null, v.sizes.slice(l)); const res = ctx.refs[refRes]; res.used = true; ctx.fnBuilder.definePFrElement(res.label); - ctx.codeBuilder.assign(res.label, ["R", v.label], ["R", offset.label]); + ctx.codeBuilder.assign(res.label, ["R", v.label], [ot, offset.label]); return refRes; } else if ((offset.value[0]>0)||(l>0)) { const refRes = newRef(ctx, "BIGINT", "_v", null, v.sizes.slice(l)); @@ -448,7 +456,7 @@ function genVariable(ctx, ast) { const res = ctx.refs[resRef]; res.used = true; ctx.fnBuilder.definePFrElement(res.label); - ctx.codeBuilder.assign(res.label, ["R", v.label], ["R", offset.label]); + ctx.codeBuilder.assign(res.label, ["R", v.label], [ot, offset.label]); return resRef; } else { // return newSubRef(ctx, ast.name, ast.selectors); @@ -499,7 +507,13 @@ function genGetSubComponentOffset(ctx, cIdxRef, label) { if (cIdxRef>=0) { const cIdx = ctx.refs[cIdxRef]; if (cIdx.used) { - c = ["R", cIdx.label]; + if (cIdx.type == "BIGINT") { + c = ["R", cIdx.label]; + } else if (cIdx.type == "INT") { + c = ["RI", cIdx.label]; + } else { + assert(false); + } } else { c = ["V", cIdx.value[0]]; } @@ -520,7 +534,13 @@ function genGetSubComponentSizes(ctx, cIdxRef, label) { if (cIdxRef>=0) { const cIdx = ctx.refs[cIdxRef]; if (cIdx.used) { - c = ["R", cIdx.label]; + if (cIdx.type == "BIGINT") { + c = ["R", cIdx.label]; + } else if (cIdx.type == "INT") { + c = ["RI", cIdx.label]; + } else { + assert(false); + } } else { c = ["V", cIdx.value[0]]; } @@ -553,7 +573,13 @@ function genGetSignalOffset(ctx, cIdxRef, label) { if (cIdxRef>=0) { const cIdx = ctx.refs[cIdxRef]; if (cIdx.used) { - c = ["R", cIdx.label]; + if (cIdx.type == "BIGINT") { + c = ["R", cIdx.label]; + } else if (cIdx.type == "INT") { + c = ["RI", cIdx.label]; + } else { + assert(false); + } } else { c = ["V", cIdx.value[0]]; } @@ -590,7 +616,13 @@ function genGetSignalSizes(ctx, cIdxRef, label) { if (cIdxRef>=0) { const cIdx = ctx.refs[cIdxRef]; if (cIdx.used) { - c = ["R", cIdx.label]; + if (cIdx.type == "BIGINT") { + c = ["R", cIdx.label]; + } else if (cIdx.type == "INT") { + c = ["RI", cIdx.label]; + } else { + assert(false); + } } else { c = ["V", cIdx.value[0]]; } @@ -743,7 +775,11 @@ function toRefA_Fr1(ctx, ast, aRef) { const a = ctx.refs[aRef]; if (a.sizes[0] != 1) return ctx.throwError(ast, "Expected only one element"); if (a.used) { - return ["R", a.label]; + if (a.type == "BIGINT") { + return ["R", a.label]; + } else { + assert(false); + } } else { return ["C", ctx.addConstant(a.value[0])]; } diff --git a/test/basiccases.js b/test/basiccases.js index 68d5ee1..7e5ee4b 100644 --- a/test/basiccases.js +++ b/test/basiccases.js @@ -49,18 +49,18 @@ async function doTest(tester, circuit, testVectors) { describe("basic cases", function () { this.timeout(100000); -/* + for (let i=0; i { await doTest(c_tester, basicCases[i].circuit, basicCases[i].tv); }); } -*/ - for (let i=16; i<17; i++) { +/* + for (let i=0; i { await doTest(wasm_tester, basicCases[i].circuit, basicCases[i].tv); }); } - +*/ }); diff --git a/test/basiccases.json b/test/basiccases.json index a18e7a2..65abf1d 100644 --- a/test/basiccases.json +++ b/test/basiccases.json @@ -198,7 +198,7 @@ "and": 1, "or": 7, "xor":6, - "not1": "14474011154664524427946373126085988481658748083205070504932198000989141204986", + "not1": "7059779437489773633646340506914701874769131765994106666166191815402473914361", "shl": 40, "shr":0 } @@ -211,7 +211,7 @@ "and": 0, "or": 0, "xor":0, - "not1":"14474011154664524427946373126085988481658748083205070504932198000989141204991", + "not1":"7059779437489773633646340506914701874769131765994106666166191815402473914366", "shl": 0, "shr":0 } @@ -222,10 +222,10 @@ }, { "and": 0, - "or": "7414231717174750794300032619171286606889616317210963838766006185586667290625", - "xor":"7414231717174750794300032619171286606889616317210963838766006185586667290625", + "or": 0, + "xor": 0, "not1": "7059779437489773633646340506914701874769131765994106666166191815402473914367", - "shl": "354452279684977160653692112256584732120484551216857172599814370184193376256", + "shl": "14828463434349501588600065238342573213779232634421927677532012371173334581248", "shr": "10944121435919637611123202872628637544274182200208017171849102093287904247808" } ] diff --git a/test/fieldasm.js b/test/fieldasm.js index 5900abd..205fc8c 100644 --- a/test/fieldasm.js +++ b/test/fieldasm.js @@ -17,6 +17,7 @@ describe("field asm test", function () { const tv = buildTestVector2(bn128r, "add"); await tester(bn128r, tv); }); +/* it("secp256k1q add", async () => { const tv = buildTestVector2(secp256k1q, "add"); await tester(secp256k1q, tv); @@ -37,7 +38,6 @@ describe("field asm test", function () { const tv = buildTestVector2(mnt6753q, "sub"); await tester(mnt6753q, tv); }); - it("bn128r neg", async () => { const tv = buildTestVector1(bn128r, "neg"); await tester(bn128r, tv); @@ -266,6 +266,51 @@ describe("field asm test", function () { const tv = buildTestVector1(mnt6753q, "square"); await tester(mnt6753q, tv); }); +*/ + it("bn128r shl", async () => { + const tv = buildTestVector2(bn128r, "shl"); + await tester(bn128r, tv); + }); +/* + it("secp256k1q shl", async () => { + const tv = buildTestVector2(secp256k1q, "shl"); + await tester(secp256k1q, tv); + }); + it("mnt6753q shl", async () => { + const tv = buildTestVector2(mnt6753q, "shl"); + await tester(mnt6753q, tv); + }); +*/ + it("bn128r shr", async () => { + const tv = buildTestVector2(bn128r, "shr"); + await tester(bn128r, tv); + }); +/* + it("secp256k1q shr", async () => { + const tv = buildTestVector2(secp256k1q, "shr"); + await tester(secp256k1q, tv); + }); + it("mnt6753q shr", async () => { + const tv = buildTestVector2(mnt6753q, "shr"); + await tester(mnt6753q, tv); + }); + it("mnt6753q band", async () => { + const tv = buildTestVector2(mnt6753q, "band"); + await tester(mnt6753q, tv); + }); + it("mnt6753q bor", async () => { + const tv = buildTestVector2(mnt6753q, "bor"); + await tester(mnt6753q, tv); + }); + it("mnt6753q bxor", async () => { + const tv = buildTestVector2(mnt6753q, "bxor"); + await tester(mnt6753q, tv); + }); + it("mnt6753q bnot", async () => { + const tv = buildTestVector1(mnt6753q, "bnot"); + await tester(mnt6753q, tv); + }); +*/ }); function buildTestVector2(p, op) { @@ -310,6 +355,9 @@ function getCriticalNumbers(p, lim) { const numbers = []; addFrontier(0); + addFrontier(bigInt(32)); + addFrontier(bigInt(64)); + addFrontier(bigInt(p.bitLength())); addFrontier(bigInt.one.shiftLeft(31)); addFrontier(p.minus(bigInt.one.shiftLeft(31))); addFrontier(bigInt.one.shiftLeft(32));