You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

340 lines
6.8 KiB

  1. // Copyright 2020 ConsenSys Software Inc.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "textflag.h"
  15. #include "funcdata.h"
  16. // modulus q
  17. DATA q<>+0(SB)/8, $0x43e1f593f0000001
  18. DATA q<>+8(SB)/8, $0x2833e84879b97091
  19. DATA q<>+16(SB)/8, $0xb85045b68181585d
  20. DATA q<>+24(SB)/8, $0x30644e72e131a029
  21. GLOBL q<>(SB), (RODATA+NOPTR), $32
  22. // qInv0 q'[0]
  23. DATA qInv0<>(SB)/8, $0xc2e1f593efffffff
  24. GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
  25. #define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
  26. MOVQ ra0, rb0; \
  27. SUBQ q<>(SB), ra0; \
  28. MOVQ ra1, rb1; \
  29. SBBQ q<>+8(SB), ra1; \
  30. MOVQ ra2, rb2; \
  31. SBBQ q<>+16(SB), ra2; \
  32. MOVQ ra3, rb3; \
  33. SBBQ q<>+24(SB), ra3; \
  34. CMOVQCS rb0, ra0; \
  35. CMOVQCS rb1, ra1; \
  36. CMOVQCS rb2, ra2; \
  37. CMOVQCS rb3, ra3; \
  38. // add(res, x, y *Element)
  39. TEXT ·add(SB), NOSPLIT, $0-24
  40. MOVQ x+8(FP), AX
  41. MOVQ 0(AX), CX
  42. MOVQ 8(AX), BX
  43. MOVQ 16(AX), SI
  44. MOVQ 24(AX), DI
  45. MOVQ y+16(FP), DX
  46. ADDQ 0(DX), CX
  47. ADCQ 8(DX), BX
  48. ADCQ 16(DX), SI
  49. ADCQ 24(DX), DI
  50. // reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
  51. REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
  52. MOVQ res+0(FP), R12
  53. MOVQ CX, 0(R12)
  54. MOVQ BX, 8(R12)
  55. MOVQ SI, 16(R12)
  56. MOVQ DI, 24(R12)
  57. RET
  58. // sub(res, x, y *Element)
  59. TEXT ·sub(SB), NOSPLIT, $0-24
  60. XORQ DI, DI
  61. MOVQ x+8(FP), SI
  62. MOVQ 0(SI), AX
  63. MOVQ 8(SI), DX
  64. MOVQ 16(SI), CX
  65. MOVQ 24(SI), BX
  66. MOVQ y+16(FP), SI
  67. SUBQ 0(SI), AX
  68. SBBQ 8(SI), DX
  69. SBBQ 16(SI), CX
  70. SBBQ 24(SI), BX
  71. MOVQ $0x43e1f593f0000001, R8
  72. MOVQ $0x2833e84879b97091, R9
  73. MOVQ $0xb85045b68181585d, R10
  74. MOVQ $0x30644e72e131a029, R11
  75. CMOVQCC DI, R8
  76. CMOVQCC DI, R9
  77. CMOVQCC DI, R10
  78. CMOVQCC DI, R11
  79. ADDQ R8, AX
  80. ADCQ R9, DX
  81. ADCQ R10, CX
  82. ADCQ R11, BX
  83. MOVQ res+0(FP), R12
  84. MOVQ AX, 0(R12)
  85. MOVQ DX, 8(R12)
  86. MOVQ CX, 16(R12)
  87. MOVQ BX, 24(R12)
  88. RET
  89. // double(res, x *Element)
  90. TEXT ·double(SB), NOSPLIT, $0-16
  91. MOVQ x+8(FP), AX
  92. MOVQ 0(AX), DX
  93. MOVQ 8(AX), CX
  94. MOVQ 16(AX), BX
  95. MOVQ 24(AX), SI
  96. ADDQ DX, DX
  97. ADCQ CX, CX
  98. ADCQ BX, BX
  99. ADCQ SI, SI
  100. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  101. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  102. MOVQ res+0(FP), R11
  103. MOVQ DX, 0(R11)
  104. MOVQ CX, 8(R11)
  105. MOVQ BX, 16(R11)
  106. MOVQ SI, 24(R11)
  107. RET
  108. // neg(res, x *Element)
  109. TEXT ·neg(SB), NOSPLIT, $0-16
  110. MOVQ res+0(FP), DI
  111. MOVQ x+8(FP), AX
  112. MOVQ 0(AX), DX
  113. MOVQ 8(AX), CX
  114. MOVQ 16(AX), BX
  115. MOVQ 24(AX), SI
  116. MOVQ DX, AX
  117. ORQ CX, AX
  118. ORQ BX, AX
  119. ORQ SI, AX
  120. TESTQ AX, AX
  121. JEQ l1
  122. MOVQ $0x43e1f593f0000001, R8
  123. SUBQ DX, R8
  124. MOVQ R8, 0(DI)
  125. MOVQ $0x2833e84879b97091, R8
  126. SBBQ CX, R8
  127. MOVQ R8, 8(DI)
  128. MOVQ $0xb85045b68181585d, R8
  129. SBBQ BX, R8
  130. MOVQ R8, 16(DI)
  131. MOVQ $0x30644e72e131a029, R8
  132. SBBQ SI, R8
  133. MOVQ R8, 24(DI)
  134. RET
  135. l1:
  136. MOVQ AX, 0(DI)
  137. MOVQ AX, 8(DI)
  138. MOVQ AX, 16(DI)
  139. MOVQ AX, 24(DI)
  140. RET
  141. TEXT ·reduce(SB), NOSPLIT, $0-8
  142. MOVQ res+0(FP), AX
  143. MOVQ 0(AX), DX
  144. MOVQ 8(AX), CX
  145. MOVQ 16(AX), BX
  146. MOVQ 24(AX), SI
  147. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  148. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  149. MOVQ DX, 0(AX)
  150. MOVQ CX, 8(AX)
  151. MOVQ BX, 16(AX)
  152. MOVQ SI, 24(AX)
  153. RET
  154. // MulBy3(x *Element)
  155. TEXT ·MulBy3(SB), NOSPLIT, $0-8
  156. MOVQ x+0(FP), AX
  157. MOVQ 0(AX), DX
  158. MOVQ 8(AX), CX
  159. MOVQ 16(AX), BX
  160. MOVQ 24(AX), SI
  161. ADDQ DX, DX
  162. ADCQ CX, CX
  163. ADCQ BX, BX
  164. ADCQ SI, SI
  165. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  166. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  167. ADDQ 0(AX), DX
  168. ADCQ 8(AX), CX
  169. ADCQ 16(AX), BX
  170. ADCQ 24(AX), SI
  171. // reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
  172. REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
  173. MOVQ DX, 0(AX)
  174. MOVQ CX, 8(AX)
  175. MOVQ BX, 16(AX)
  176. MOVQ SI, 24(AX)
  177. RET
  178. // MulBy5(x *Element)
  179. TEXT ·MulBy5(SB), NOSPLIT, $0-8
  180. MOVQ x+0(FP), AX
  181. MOVQ 0(AX), DX
  182. MOVQ 8(AX), CX
  183. MOVQ 16(AX), BX
  184. MOVQ 24(AX), SI
  185. ADDQ DX, DX
  186. ADCQ CX, CX
  187. ADCQ BX, BX
  188. ADCQ SI, SI
  189. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  190. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  191. ADDQ DX, DX
  192. ADCQ CX, CX
  193. ADCQ BX, BX
  194. ADCQ SI, SI
  195. // reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
  196. REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
  197. ADDQ 0(AX), DX
  198. ADCQ 8(AX), CX
  199. ADCQ 16(AX), BX
  200. ADCQ 24(AX), SI
  201. // reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
  202. REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
  203. MOVQ DX, 0(AX)
  204. MOVQ CX, 8(AX)
  205. MOVQ BX, 16(AX)
  206. MOVQ SI, 24(AX)
  207. RET
  208. // MulBy13(x *Element)
  209. TEXT ·MulBy13(SB), NOSPLIT, $0-8
  210. MOVQ x+0(FP), AX
  211. MOVQ 0(AX), DX
  212. MOVQ 8(AX), CX
  213. MOVQ 16(AX), BX
  214. MOVQ 24(AX), SI
  215. ADDQ DX, DX
  216. ADCQ CX, CX
  217. ADCQ BX, BX
  218. ADCQ SI, SI
  219. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  220. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  221. ADDQ DX, DX
  222. ADCQ CX, CX
  223. ADCQ BX, BX
  224. ADCQ SI, SI
  225. // reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
  226. REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
  227. MOVQ DX, R11
  228. MOVQ CX, R12
  229. MOVQ BX, R13
  230. MOVQ SI, R14
  231. ADDQ DX, DX
  232. ADCQ CX, CX
  233. ADCQ BX, BX
  234. ADCQ SI, SI
  235. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  236. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  237. ADDQ R11, DX
  238. ADCQ R12, CX
  239. ADCQ R13, BX
  240. ADCQ R14, SI
  241. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  242. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  243. ADDQ 0(AX), DX
  244. ADCQ 8(AX), CX
  245. ADCQ 16(AX), BX
  246. ADCQ 24(AX), SI
  247. // reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
  248. REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
  249. MOVQ DX, 0(AX)
  250. MOVQ CX, 8(AX)
  251. MOVQ BX, 16(AX)
  252. MOVQ SI, 24(AX)
  253. RET
  254. // Butterfly(a, b *Element) sets a = a + b; b = a - b
  255. TEXT ·Butterfly(SB), NOSPLIT, $0-16
  256. MOVQ a+0(FP), AX
  257. MOVQ 0(AX), CX
  258. MOVQ 8(AX), BX
  259. MOVQ 16(AX), SI
  260. MOVQ 24(AX), DI
  261. MOVQ CX, R8
  262. MOVQ BX, R9
  263. MOVQ SI, R10
  264. MOVQ DI, R11
  265. XORQ AX, AX
  266. MOVQ b+8(FP), DX
  267. ADDQ 0(DX), CX
  268. ADCQ 8(DX), BX
  269. ADCQ 16(DX), SI
  270. ADCQ 24(DX), DI
  271. SUBQ 0(DX), R8
  272. SBBQ 8(DX), R9
  273. SBBQ 16(DX), R10
  274. SBBQ 24(DX), R11
  275. MOVQ $0x43e1f593f0000001, R12
  276. MOVQ $0x2833e84879b97091, R13
  277. MOVQ $0xb85045b68181585d, R14
  278. MOVQ $0x30644e72e131a029, R15
  279. CMOVQCC AX, R12
  280. CMOVQCC AX, R13
  281. CMOVQCC AX, R14
  282. CMOVQCC AX, R15
  283. ADDQ R12, R8
  284. ADCQ R13, R9
  285. ADCQ R14, R10
  286. ADCQ R15, R11
  287. MOVQ R8, 0(DX)
  288. MOVQ R9, 8(DX)
  289. MOVQ R10, 16(DX)
  290. MOVQ R11, 24(DX)
  291. // reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
  292. REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
  293. MOVQ a+0(FP), AX
  294. MOVQ CX, 0(AX)
  295. MOVQ BX, 8(AX)
  296. MOVQ SI, 16(AX)
  297. MOVQ DI, 24(AX)
  298. RET