You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

209 lines
7.8 KiB

  1. const bigInt=require("big-integer");
  2. class ZqBuilder {
  3. constructor(q, name) {
  4. this.q=bigInt(q);
  5. this.h = [];
  6. this.c = [];
  7. this.name = name;
  8. }
  9. build() {
  10. this._buildHeaders();
  11. this._buildAdd();
  12. this._buildMul();
  13. this.c.push(""); this.h.push("");
  14. return [this.h.join("\n"), this.c.join("\n")];
  15. }
  16. _buildHeaders() {
  17. this.n64 = Math.floor((this.q.bitLength() - 1) / 64)+1;
  18. this.h.push("typedef unsigned long long u64;");
  19. this.h.push(`typedef u64 ${this.name}Element[${this.n64}];`);
  20. this.h.push(`typedef u64 *P${this.name}Element;`);
  21. this.h.push(`extern ${this.name}Element ${this.name}_q;`);
  22. this.h.push(`#define ${this.name}_N64 ${this.n64}`);
  23. this.c.push(`#include "${this.name.toLowerCase()}.h"`);
  24. this._defineConstant(`${this.name}_q`, this.q);
  25. this.c.push(""); this.h.push("");
  26. }
  27. _defineConstant(n, v) {
  28. let S = `${this.name}Element ${n}={`;
  29. const mask = bigInt("FFFFFFFFFFFFFFFF", 16);
  30. for (let i=0; i<this.n64; i++) {
  31. if (i>0) S = S+",";
  32. let shex = v.shiftRight(i*64).and(mask).toString(16);
  33. while (shex <16) shex = "0" + shex;
  34. S = S + "0x" + shex + "ULL";
  35. }
  36. S += "};";
  37. this.c.push(S);
  38. }
  39. _buildAdd() {
  40. this.h.push(`void ${this.name}_add(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b);`);
  41. this.c.push(`void ${this.name}_add(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b) {`);
  42. this.c.push(" __asm__ __volatile__ (");
  43. for (let i=0; i<this.n64; i++) {
  44. this.c.push(` "movq ${i*8}(%2), %%rax;"`);
  45. this.c.push(` "${i==0 ? "addq" : "adcq"} ${i*8}(%1), %%rax;"`);
  46. this.c.push(` "movq %%rax, ${i*8}(%0);"`);
  47. }
  48. this.c.push(" \"jc SQ;\"");
  49. for (let i=0; i<this.n64; i++) {
  50. if (i>0) {
  51. this.c.push(` "movq ${(this.n64 - i-1)*8}(%0), %%rax;"`);
  52. }
  53. this.c.push(` "cmp ${(this.n64 - i-1)*8}(%3), %%rax;"`);
  54. this.c.push(" \"jg SQ;\"");
  55. this.c.push(" \"jl DONE;\"");
  56. }
  57. this.c.push(" \"SQ:\"");
  58. for (let i=0; i<this.n64; i++) {
  59. this.c.push(` "movq ${i*8}(%3), %%rax;"`);
  60. this.c.push(` "${i==0 ? "subq" : "sbbq"} %%rax, ${i*8}(%0);"`);
  61. }
  62. this.c.push(" \"DONE:\"");
  63. this.c.push(` :: "r" (r), "r" (a), "r" (b), "r" (${this.name}_q) : "%rax", "memory");`);
  64. this.c.push("}\n");
  65. }
  66. _buildMul() {
  67. let r0, r1, r2;
  68. function setR(step) {
  69. if ((step % 3) == 0) {
  70. r0 = "%%r8";
  71. r1 = "%%r9";
  72. r2 = "%%r10";
  73. } else if ((step % 3) == 1) {
  74. r0 = "%%r9";
  75. r1 = "%%r10";
  76. r2 = "%%r8";
  77. } else {
  78. r0 = "%%r10";
  79. r1 = "%%r8";
  80. r2 = "%%r9";
  81. }
  82. }
  83. const base = bigInt.one.shiftLeft(64);
  84. const np64 = base.minus(this.q.modInv(base));
  85. this.h.push(`void ${this.name}_mul(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b);`);
  86. this.c.push(`void ${this.name}_mul(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b) {`);
  87. this.c.push(" __asm__ __volatile__ (");
  88. this.c.push(` "subq $${this.n64*8}, %%rsp;"`);
  89. this.c.push(` "movq $0x${np64.toString(16)}, %%r11;"`);
  90. this.c.push(" \"movq $0x0, %%r8;\"");
  91. this.c.push(" \"movq $0x0, %%r9;\"");
  92. this.c.push(" \"movq $0x0, %%r10;\"");
  93. for (let i=0; i<this.n64*2; i++) {
  94. setR(i);
  95. for (let o1=Math.max(0, i-this.n64+1); (o1<=i)&&(o1<this.n64); o1++) {
  96. const o2= i-o1;
  97. this.c.push(` "movq ${o1*8}(%1), %%rax;"`);
  98. this.c.push(` "mulq ${o2*8}(%2);"`);
  99. this.c.push(` "addq %%rax, ${r0};"`);
  100. this.c.push(` "adcq %%rdx, ${r1};"`);
  101. this.c.push(` "adcq $0x0, ${r2};"`);
  102. }
  103. for (let j=i-1; j>=0; j--) {
  104. if (((i-j)<this.n64)&&(j<this.n64)) {
  105. this.c.push(` "movq ${j*8}(%%rsp), %%rax;"`);
  106. this.c.push(` "mulq ${(i-j)*8}(%3);"`);
  107. this.c.push(` "addq %%rax, ${r0};"`);
  108. this.c.push(` "adcq %%rdx, ${r1};"`);
  109. this.c.push(` "adcq $0x0, ${r2};"`);
  110. }
  111. }
  112. if (i<this.n64) {
  113. this.c.push(` "movq ${r0}, %%rax;"`);
  114. this.c.push(" \"mulq %%r11;\"");
  115. this.c.push(` "movq %%rax, ${i*8}(%%rsp);"`);
  116. this.c.push(" \"mulq (%3);\"");
  117. this.c.push(` "addq %%rax, ${r0};"`);
  118. this.c.push(` "adcq %%rdx, ${r1};"`);
  119. this.c.push(` "adcq $0x0, ${r2};"`);
  120. } else {
  121. this.c.push(` "movq ${r0}, ${(i-this.n64)*8}(%0);"`);
  122. this.c.push(` "movq $0, ${r0};"`);
  123. }
  124. }
  125. this.c.push(` "cmp $0, ${r1};"`);
  126. this.c.push(" \"jne SQ2;\"");
  127. for (let i=0; i<this.n64; i++) {
  128. this.c.push(` "movq ${(this.n64 - i-1)*8}(%0), %%rax;"`);
  129. this.c.push(` "cmp ${(this.n64 - i-1)*8}(%3), %%rax;"`);
  130. this.c.push(" \"jg SQ2;\"");
  131. this.c.push(" \"jl DONE2;\"");
  132. }
  133. this.c.push(" \"SQ2:\"");
  134. for (let i=0; i<this.n64; i++) {
  135. this.c.push(` "movq ${i*8}(%3), %%rax;"`);
  136. this.c.push(` "${i==0 ? "subq" : "sbbq"} %%rax, ${i*8}(%0);"`);
  137. }
  138. this.c.push(" \"DONE2:\"");
  139. this.c.push(` "addq $${this.n64*8}, %%rsp;"`);
  140. this.c.push(` :: "r" (r), "r" (a), "r" (b), "r" (${this.name}_q) : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "memory");`);
  141. this.c.push("}\n");
  142. }
  143. _buildIDiv() {
  144. this.h.push(`void ${this.name}_idiv(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b);`);
  145. this.c.push(`void ${this.name}_idiv(P${this.name}Element r, P${this.name}Element a, P${this.name}Element b) {`);
  146. this.c.push(" __asm__ __volatile__ (");
  147. this.c.push(" \"pxor %%xmm0, %%xmm0;\""); // Comparison Register
  148. if (this.n64 == 1) {
  149. this.c.push(` "mov %%rax, $${this.n64 - 8};"`);
  150. } else {
  151. this.c.push(` "mov %%rax, $${this.n64 -16};"`);
  152. }
  153. this.c.push(` :: "r" (r), "r" (a), "r" (b), "r" (${this.name}_q) : "%rax", "%rdx", "%r8", "%r9", "%r10", "%r11", "memory");`);
  154. this.c.push("}\n");
  155. }
  156. }
  157. var runningAsScript = !module.parent;
  158. if (runningAsScript) {
  159. const fs = require("fs");
  160. var argv = require("yargs")
  161. .usage("Usage: $0 -q [primeNum] -n [name] -oc [out .c file] -oh [out .h file]")
  162. .demandOption(["q","n"])
  163. .alias("q", "prime")
  164. .alias("n", "name")
  165. .argv;
  166. const q = bigInt(argv.q);
  167. const cFileName = (argv.oc) ? argv.oc : argv.name.toLowerCase() + ".c";
  168. const hFileName = (argv.oh) ? argv.oh : argv.name.toLowerCase() + ".h";
  169. const builder = new ZqBuilder(q, argv.name);
  170. const res = builder.build();
  171. fs.writeFileSync(hFileName, res[0], "utf8");
  172. fs.writeFileSync(cFileName, res[1], "utf8");
  173. } else {
  174. module.exports = function(q, name) {
  175. const builder = new ZqBuilder(q, name);
  176. return builder.build();
  177. };
  178. }