You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

275 lines
6.0 KiB

  1. <% function mulS1S2() { %>
  2. xor rax, rax
  3. mov eax, r8d
  4. imul r9d
  5. jo mul_manageOverflow ; rsi already is the 64bits result
  6. mov [rdi], rax ; not necessary to adjust so just save and return
  7. mul_manageOverflow: ; Do the operation in 64 bits
  8. push rsi
  9. movsx rax, r8d
  10. movsx rcx, r9d
  11. imul rcx
  12. mov rsi, rax
  13. call rawCopyS2L
  14. pop rsi
  15. <% } %>
  16. <% function squareS1() { %>
  17. xor rax, rax
  18. mov eax, r8d
  19. imul eax
  20. jo square_manageOverflow ; rsi already is the 64bits result
  21. mov [rdi], rax ; not necessary to adjust so just save and return
  22. square_manageOverflow: ; Do the operation in 64 bits
  23. push rsi
  24. movsx rax, r8d
  25. imul rax
  26. mov rsi, rax
  27. call rawCopyS2L
  28. pop rsi
  29. <% } %>
  30. <% function mulL1S2(t) { %>
  31. push rsi
  32. add rsi, 8
  33. movsx rdx, r9d
  34. add rdi, 8
  35. cmp rdx, 0
  36. <% const rawPositiveLabel = global.tmpLabel() %>
  37. jns <%= rawPositiveLabel %>
  38. neg rdx
  39. call rawMontgomeryMul1
  40. mov rsi, rdi
  41. call rawNegL
  42. sub rdi, 8
  43. pop rsi
  44. <% const done = global.tmpLabel() %>
  45. jmp <%= done %>
  46. <%= rawPositiveLabel %>:
  47. call rawMontgomeryMul1
  48. sub rdi, 8
  49. pop rsi
  50. <%= done %>:
  51. <% } %>
  52. <% function mulS1L2() { %>
  53. push rsi
  54. lea rsi, [rdx + 8]
  55. movsx rdx, r8d
  56. add rdi, 8
  57. cmp rdx, 0
  58. <% const rawPositiveLabel = global.tmpLabel() %>
  59. jns <%= rawPositiveLabel %>
  60. neg rdx
  61. call rawMontgomeryMul1
  62. mov rsi, rdi
  63. call rawNegL
  64. sub rdi, 8
  65. pop rsi
  66. <% const done = global.tmpLabel() %>
  67. jmp <%= done %>
  68. <%= rawPositiveLabel %>:
  69. call rawMontgomeryMul1
  70. sub rdi, 8
  71. pop rsi
  72. <%= done %>:
  73. <% } %>
  74. <% function mulL1L2() { %>
  75. add rdi, 8
  76. add rsi, 8
  77. add rdx, 8
  78. call rawMontgomeryMul
  79. sub rdi, 8
  80. sub rsi, 8
  81. <% } %>
  82. <% function squareL1() { %>
  83. add rdi, 8
  84. add rsi, 8
  85. call rawMontgomerySquare
  86. sub rdi, 8
  87. sub rsi, 8
  88. <% } %>
  89. <% function mulR3() { %>
  90. push rsi
  91. add rdi, 8
  92. mov rsi, rdi
  93. lea rdx, [R3]
  94. call rawMontgomeryMul
  95. sub rdi, 8
  96. pop rsi
  97. <% } %>
  98. ;;;;;;;;;;;;;;;;;;;;;;
  99. ; square
  100. ;;;;;;;;;;;;;;;;;;;;;;
  101. ; Squares a field element
  102. ; Params:
  103. ; rsi <= Pointer to element 1
  104. ; rdi <= Pointer to result
  105. ; [rdi] = [rsi] * [rsi]
  106. ; Modified Registers:
  107. ; r8, r9, 10, r11, rax, rcx
  108. ;;;;;;;;;;;;;;;;;;;;;;
  109. <%=name%>_square:
  110. mov r8, [rsi]
  111. bt r8, 63 ; Check if is short first operand
  112. jc square_l1
  113. square_s1: ; Both operands are short
  114. <%= squareS1() %>
  115. ret
  116. square_l1:
  117. bt r8, 62 ; check if montgomery first
  118. jc square_l1m
  119. square_l1n:
  120. <%= global.setTypeDest("0xC0"); %>
  121. <%= squareL1() %>
  122. <%= mulR3() %>
  123. ret
  124. square_l1m:
  125. <%= global.setTypeDest("0xC0"); %>
  126. <%= squareL1() %>
  127. ret
  128. ;;;;;;;;;;;;;;;;;;;;;;
  129. ; mul
  130. ;;;;;;;;;;;;;;;;;;;;;;
  131. ; Multiplies two elements of any kind
  132. ; Params:
  133. ; rsi <= Pointer to element 1
  134. ; rdx <= Pointer to element 2
  135. ; rdi <= Pointer to result
  136. ; [rdi] = [rsi] * [rdi]
  137. ; Modified Registers:
  138. ; r8, r9, 10, r11, rax, rcx
  139. ;;;;;;;;;;;;;;;;;;;;;;
  140. <%=name%>_mul:
  141. mov r8, [rsi]
  142. mov r9, [rdx]
  143. bt r8, 63 ; Check if is short first operand
  144. jc mul_l1
  145. bt r9, 63 ; Check if is short second operand
  146. jc mul_s1l2
  147. mul_s1s2: ; Both operands are short
  148. <%= mulS1S2() %>
  149. ret
  150. mul_l1:
  151. bt r9, 63 ; Check if is short second operand
  152. jc mul_l1l2
  153. ;;;;;;;;
  154. mul_l1s2:
  155. bt r8, 62 ; check if montgomery first
  156. jc mul_l1ms2
  157. mul_l1ns2:
  158. bt r9, 62 ; check if montgomery first
  159. jc mul_l1ns2m
  160. mul_l1ns2n:
  161. <%= global.setTypeDest("0xC0"); %>
  162. <%= mulL1S2() %>
  163. <%= mulR3() %>
  164. ret
  165. mul_l1ns2m:
  166. <%= global.setTypeDest("0x80"); %>
  167. <%= mulL1L2() %>
  168. ret
  169. mul_l1ms2:
  170. bt r9, 62 ; check if montgomery second
  171. jc mul_l1ms2m
  172. mul_l1ms2n:
  173. <%= global.setTypeDest("0x80"); %>
  174. <%= mulL1S2() %>
  175. ret
  176. mul_l1ms2m:
  177. <%= global.setTypeDest("0xC0"); %>
  178. <%= mulL1L2() %>
  179. ret
  180. ;;;;;;;;
  181. mul_s1l2:
  182. bt r8, 62 ; check if montgomery first
  183. jc mul_s1ml2
  184. mul_s1nl2:
  185. bt r9, 62 ; check if montgomery first
  186. jc mul_s1nl2m
  187. mul_s1nl2n:
  188. <%= global.setTypeDest("0xC0"); %>
  189. <%= mulS1L2() %>
  190. <%= mulR3() %>
  191. ret
  192. mul_s1nl2m:
  193. <%= global.setTypeDest("0x80"); %>
  194. <%= mulS1L2(); %>
  195. ret
  196. mul_s1ml2:
  197. bt r9, 62 ; check if montgomery first
  198. jc mul_s1ml2m
  199. mul_s1ml2n:
  200. <%= global.setTypeDest("0x80"); %>
  201. <%= mulL1L2() %>
  202. ret
  203. mul_s1ml2m:
  204. <%= global.setTypeDest("0xC0"); %>
  205. <%= mulL1L2() %>
  206. ret
  207. ;;;;
  208. mul_l1l2:
  209. bt r8, 62 ; check if montgomery first
  210. jc mul_l1ml2
  211. mul_l1nl2:
  212. bt r9, 62 ; check if montgomery second
  213. jc mul_l1nl2m
  214. mul_l1nl2n:
  215. <%= global.setTypeDest("0xC0"); %>
  216. <%= mulL1L2() %>
  217. <%= mulR3() %>
  218. ret
  219. mul_l1nl2m:
  220. <%= global.setTypeDest("0x80"); %>
  221. <%= mulL1L2() %>
  222. ret
  223. mul_l1ml2:
  224. bt r9, 62 ; check if montgomery seconf
  225. jc mul_l1ml2m
  226. mul_l1ml2n:
  227. <%= global.setTypeDest("0x80"); %>
  228. <%= mulL1L2() %>
  229. ret
  230. mul_l1ml2m:
  231. <%= global.setTypeDest("0xC0"); %>
  232. <%= mulL1L2() %>
  233. ret