You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

342 lines
7.7 KiB

  1. <%
  2. //////////////////////
  3. // montgomeryTemplate
  4. //////////////////////
  5. // This function creates functions with the montgomery transformation
  6. // applied
  7. // the round hook allows to add diferent code in the iteration
  8. //
  9. // All the montgomery functions modifies:
  10. // r8, r9, 10, r11, rax, rcx
  11. //////////////////////
  12. function montgomeryTemplate(fnName, round) {
  13. let r0, r1, r2;
  14. function setR(step) {
  15. if ((step % 3) == 0) {
  16. r0 = "r8";
  17. r1 = "r9";
  18. r2 = "r10";
  19. } else if ((step % 3) == 1) {
  20. r0 = "r9";
  21. r1 = "r10";
  22. r2 = "r8";
  23. } else {
  24. r0 = "r10";
  25. r1 = "r8";
  26. r2 = "r9";
  27. }
  28. }
  29. const base = bigInt.one.shiftLeft(64);
  30. const np64 = base.minus(q.modInv(base));
  31. %>
  32. <%=fnName%>:
  33. sub rsp, <%= n64*8 %> ; Reserve space for ms
  34. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  35. mov r11, 0x<%= np64.toString(16) %> ; np
  36. xor r8,r8
  37. xor r9,r9
  38. xor r10,r10
  39. <%
  40. // Main loop
  41. for (let i=0; i<n64*2; i++) {
  42. setR(i);
  43. round(i, r0, r1, r2);
  44. %>
  45. <%
  46. for (let j=i-1; j>=0; j--) { // All ms
  47. if (((i-j)<n64)&&(j<n64)) {
  48. %>
  49. mov rax, [rsp + <%= j*8 %>]
  50. mul qword [q + <%= (i-j)*8 %>]
  51. add <%= r0 %>, rax
  52. adc <%= r1 %>, rdx
  53. adc <%= r2 %>, 0x0
  54. <%
  55. }
  56. } // ms
  57. %>
  58. <%
  59. if (i<n64) {
  60. %>
  61. mov rax, <%= r0 %>
  62. mul r11
  63. mov [rsp + <%= i*8 %>], rax
  64. mul qword [q]
  65. add <%= r0 %>, rax
  66. adc <%= r1 %>, rdx
  67. adc <%= r2 %>, 0x0
  68. <%
  69. } else {
  70. %>
  71. mov [rdi + <%= (i-n64)*8 %> ], <%= r0 %>
  72. xor <%= r0 %>,<%= r0 %>
  73. <%
  74. }
  75. %>
  76. <%
  77. } // Main Loop
  78. %>
  79. test <%= r1 %>, <%= r1 %>
  80. jnz <%=fnName%>_mulM_sq
  81. ; Compare with q
  82. <%
  83. for (let i=0; i<n64; i++) {
  84. %>
  85. mov rax, [rdi + <%= (n64-i-1)*8 %>]
  86. cmp rax, [q + <%= (n64-i-1)*8 %>]
  87. jc <%=fnName%>_mulM_done ; q is bigget so done.
  88. jnz <%=fnName%>_mulM_sq ; q is lower
  89. <%
  90. }
  91. %>
  92. ; If equal substract q
  93. <%=fnName%>_mulM_sq:
  94. <%
  95. for (let i=0; i<n64; i++) {
  96. %>
  97. mov rax, [q + <%= i*8 %>]
  98. <%= i==0 ? "sub" : "sbb" %> [rdi + <%= i*8 %>], rax
  99. <%
  100. }
  101. %>
  102. <%=fnName%>_mulM_done:
  103. mov rdx, rcx ; recover rdx to its original place.
  104. add rsp, <%= n64*8 %> ; recover rsp
  105. ret
  106. <%
  107. } // Template
  108. %>
  109. ;;;;;;;;;;;;;;;;;;;;;;
  110. ; rawMontgomeryMul
  111. ;;;;;;;;;;;;;;;;;;;;;;
  112. ; Multiply two elements in montgomery form
  113. ; Params:
  114. ; rsi <= Pointer to the long data of element 1
  115. ; rdx <= Pointer to the long data of element 2
  116. ; rdi <= Pointer to the long data of result
  117. ; Modified registers:
  118. ; r8, r9, 10, r11, rax, rcx
  119. ;;;;;;;;;;;;;;;;;;;;;;
  120. <%
  121. montgomeryTemplate("rawMontgomeryMul", function(i, r0, r1, r2) {
  122. // Same Digit
  123. for (let o1=Math.max(0, i-n64+1); (o1<=i)&&(o1<n64); o1++) {
  124. const o2= i-o1;
  125. %>
  126. mov rax, [rsi + <%= 8*o1 %>]
  127. mul qword [rcx + <%= 8*o2 %>]
  128. add <%= r0 %>, rax
  129. adc <%= r1 %>, rdx
  130. adc <%= r2 %>, 0x0
  131. <%
  132. } // Same digit
  133. })
  134. %>
  135. ;;;;;;;;;;;;;;;;;;;;;;
  136. ; rawMontgomerySquare
  137. ;;;;;;;;;;;;;;;;;;;;;;
  138. ; Square an element
  139. ; Params:
  140. ; rsi <= Pointer to the long data of element 1
  141. ; rdi <= Pointer to the long data of result
  142. ; Modified registers:
  143. ; r8, r9, 10, r11, rax, rcx
  144. ;;;;;;;;;;;;;;;;;;;;;;
  145. <%
  146. montgomeryTemplate("rawMontgomerySquare", function(i, r0, r1, r2) {
  147. // Same Digit
  148. for (let o1=Math.max(0, i-n64+1); (o1<((i+1)>>1) )&&(o1<n64); o1++) {
  149. const o2= i-o1;
  150. %>
  151. mov rax, [rsi + <%= 8*o1 %>]
  152. mul qword [rsi + <%= 8*o2 %>]
  153. add <%= r0 %>, rax
  154. adc <%= r1 %>, rdx
  155. adc <%= r2 %>, 0x0
  156. add <%= r0 %>, rax
  157. adc <%= r1 %>, rdx
  158. adc <%= r2 %>, 0x0
  159. <%
  160. } // Same digit
  161. %>
  162. <% if (i%2 == 0) { %>
  163. mov rax, [rsi + <%= 8*(i/2) %>]
  164. mul rax
  165. add <%= r0 %>, rax
  166. adc <%= r1 %>, rdx
  167. adc <%= r2 %>, 0x0
  168. <% } %>
  169. <%
  170. })
  171. %>
  172. ;;;;;;;;;;;;;;;;;;;;;;
  173. ; rawMontgomeryMul1
  174. ;;;;;;;;;;;;;;;;;;;;;;
  175. ; Multiply two elements in montgomery form
  176. ; Params:
  177. ; rsi <= Pointer to the long data of element 1
  178. ; rdx <= second operand
  179. ; rdi <= Pointer to the long data of result
  180. ; Modified registers:
  181. ; r8, r9, 10, r11, rax, rcx
  182. ;;;;;;;;;;;;;;;;;;;;;;
  183. <%
  184. montgomeryTemplate("rawMontgomeryMul1", function(i, r0, r1, r2) {
  185. // Same Digit
  186. if (i<n64) {
  187. %>
  188. mov rax, [rsi + <%= 8*i %>]
  189. mul rcx
  190. add <%= r0 %>, rax
  191. adc <%= r1 %>, rdx
  192. adc <%= r2 %>, 0x0
  193. <%
  194. } // Same digit
  195. })
  196. %>
  197. ;;;;;;;;;;;;;;;;;;;;;;
  198. ; rawFromMontgomery
  199. ;;;;;;;;;;;;;;;;;;;;;;
  200. ; Multiply two elements in montgomery form
  201. ; Params:
  202. ; rsi <= Pointer to the long data of element 1
  203. ; rdi <= Pointer to the long data of result
  204. ; Modified registers:
  205. ; r8, r9, 10, r11, rax, rcx
  206. ;;;;;;;;;;;;;;;;;;;;;;
  207. <%
  208. montgomeryTemplate("rawFromMontgomery", function(i, r0, r1, r2) {
  209. // Same Digit
  210. if (i<n64) {
  211. %>
  212. add <%= r0 %>, [rdi + <%= 8*i %>]
  213. adc <%= r1 %>, 0x0
  214. adc <%= r2 %>, 0x0
  215. <%
  216. } // Same digit
  217. })
  218. %>
  219. ;;;;;;;;;;;;;;;;;;;;;;
  220. ; toMontgomery
  221. ;;;;;;;;;;;;;;;;;;;;;;
  222. ; Convert a number to Montgomery
  223. ; rdi <= Pointer element to convert
  224. ; Modified registers:
  225. ; r8, r9, 10, r11, rax, rcx
  226. ;;;;;;;;;;;;;;;;;;;;
  227. <%=name%>_toMontgomery:
  228. mov rax, [rdi]
  229. bts rax, 62 ; check if montgomery
  230. jc toMontgomery_doNothing
  231. bts rax, 63
  232. jc toMontgomeryLong
  233. toMontgomeryShort:
  234. mov [rdi], rax
  235. add rdi, 8
  236. push rsi
  237. lea rsi, [R2]
  238. movsx rdx, eax
  239. cmp rdx, 0
  240. js negMontgomeryShort
  241. posMontgomeryShort:
  242. call rawMontgomeryMul1
  243. pop rsi
  244. sub rdi, 8
  245. ret
  246. negMontgomeryShort:
  247. neg rdx ; Do the multiplication positive and then negate the result.
  248. call rawMontgomeryMul1
  249. mov rsi, rdi
  250. call rawNegL
  251. pop rsi
  252. sub rdi, 8
  253. ret
  254. toMontgomeryLong:
  255. mov [rdi], rax
  256. add rdi, 8
  257. push rsi
  258. mov rdx, rdi
  259. lea rsi, [R2]
  260. call rawMontgomeryMul
  261. pop rsi
  262. sub rdi, 8
  263. toMontgomery_doNothing:
  264. ret
  265. ;;;;;;;;;;;;;;;;;;;;;;
  266. ; toNormal
  267. ;;;;;;;;;;;;;;;;;;;;;;
  268. ; Convert a number from Montgomery
  269. ; rdi <= Pointer element to convert
  270. ; Modified registers:
  271. ; r8, r9, 10, r11, rax, rcx
  272. ;;;;;;;;;;;;;;;;;;;;
  273. <%=name%>_toNormal:
  274. mov rax, [rdi]
  275. btc rax, 62 ; check if montgomery
  276. jnc toNormal_doNothing
  277. bt rax, 63 ; if short, it means it's converted
  278. jnc toNormal_doNothing
  279. toNormalLong:
  280. mov [rdi], rax
  281. add rdi, 8
  282. call rawFromMontgomery
  283. sub rdi, 8
  284. toNormal_doNothing:
  285. ret
  286. ;;;;;;;;;;;;;;;;;;;;;;
  287. ; toLongNormal
  288. ;;;;;;;;;;;;;;;;;;;;;;
  289. ; Convert a number to long normal
  290. ; rdi <= Pointer element to convert
  291. ; Modified registers:
  292. ; r8, r9, 10, r11, rax, rcx
  293. ;;;;;;;;;;;;;;;;;;;;
  294. <%=name%>_toLongNormal:
  295. mov rax, [rdi]
  296. bt rax, 62 ; check if montgomery
  297. jc toLongNormal_fromMontgomery
  298. bt rax, 63 ; check if long
  299. jnc toLongNormal_fromShort
  300. ret ; It is already long
  301. toLongNormal_fromMontgomery:
  302. add rdi, 8
  303. call rawFromMontgomery
  304. sub rdi, 8
  305. ret
  306. toLongNormal_fromShort:
  307. mov r8, rsi ; save rsi
  308. movsx rsi, eax
  309. call rawCopyS2L
  310. mov rsi, r8 ; recover rsi
  311. ret