You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

4854 lines
90 KiB

  1. global Fr_add
  2. global Fr_sub
  3. global Fr_neg
  4. global Fr_mul
  5. global Fr_band
  6. global Fr_bor
  7. global Fr_bxor
  8. global Fr_eq
  9. global Fr_neq
  10. global Fr_lt
  11. global Fr_gt
  12. global Fr_leq
  13. global Fr_geq
  14. global Fr_toNormal
  15. global Fr_toMontgomery
  16. global Fr_q
  17. DEFAULT REL
  18. section .text
  19. ;;;;;;;;;;;;;;;;;;;;;;
  20. ; rawCopyS2L
  21. ;;;;;;;;;;;;;;;;;;;;;;
  22. ; Convert a 64 bit integer to a long format field element
  23. ; Params:
  24. ; rsi <= the integer
  25. ; rdi <= Pointer to the overwritted element
  26. ;
  27. ; Nidified registers:
  28. ; rax
  29. ;;;;;;;;;;;;;;;;;;;;;;;
  30. rawCopyS2L:
  31. mov al, 0x80
  32. shl rax, 56
  33. mov [rdi], rax ; set the result to LONG normal
  34. cmp rsi, 0
  35. js u64toLong_adjust_neg
  36. mov [rdi + 8], rsi
  37. xor rax, rax
  38. mov [rdi + 16], rax
  39. mov [rdi + 24], rax
  40. mov [rdi + 32], rax
  41. ret
  42. u64toLong_adjust_neg:
  43. add rsi, [q] ; Set the first digit
  44. mov [rdi + 8], rsi ;
  45. mov rsi, -1 ; all ones
  46. mov rax, rsi ; Add to q
  47. adc rax, [q + 8 ]
  48. mov [rdi + 16], rax
  49. mov rax, rsi ; Add to q
  50. adc rax, [q + 16 ]
  51. mov [rdi + 24], rax
  52. mov rax, rsi ; Add to q
  53. adc rax, [q + 24 ]
  54. mov [rdi + 32], rax
  55. ret
  56. ;;;;;;;;;;;;;;;;;;;;;;
  57. ; rawMontgomeryMul
  58. ;;;;;;;;;;;;;;;;;;;;;;
  59. ; Multiply two elements in montgomery form
  60. ; Params:
  61. ; rsi <= Pointer to the long data of element 1
  62. ; rdx <= Pointer to the long data of element 2
  63. ; rdi <= Pointer to the long data of result
  64. ; Modified registers:
  65. ; r8, r9, 10, r11, rax, rcx
  66. ;;;;;;;;;;;;;;;;;;;;;;
  67. rawMontgomeryMul:
  68. sub rsp, 32 ; Reserve space for ms
  69. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  70. mov r11, 0xc2e1f593efffffff ; np
  71. xor r8,r8
  72. xor r9,r9
  73. xor r10,r10
  74. mov rax, [rsi + 0]
  75. mul qword [rcx + 0]
  76. add r8, rax
  77. adc r9, rdx
  78. adc r10, 0x0
  79. mov rax, r8
  80. mul r11
  81. mov [rsp + 0], rax
  82. mul qword [q]
  83. add r8, rax
  84. adc r9, rdx
  85. adc r10, 0x0
  86. mov rax, [rsi + 0]
  87. mul qword [rcx + 8]
  88. add r9, rax
  89. adc r10, rdx
  90. adc r8, 0x0
  91. mov rax, [rsi + 8]
  92. mul qword [rcx + 0]
  93. add r9, rax
  94. adc r10, rdx
  95. adc r8, 0x0
  96. mov rax, [rsp + 0]
  97. mul qword [q + 8]
  98. add r9, rax
  99. adc r10, rdx
  100. adc r8, 0x0
  101. mov rax, r9
  102. mul r11
  103. mov [rsp + 8], rax
  104. mul qword [q]
  105. add r9, rax
  106. adc r10, rdx
  107. adc r8, 0x0
  108. mov rax, [rsi + 0]
  109. mul qword [rcx + 16]
  110. add r10, rax
  111. adc r8, rdx
  112. adc r9, 0x0
  113. mov rax, [rsi + 8]
  114. mul qword [rcx + 8]
  115. add r10, rax
  116. adc r8, rdx
  117. adc r9, 0x0
  118. mov rax, [rsi + 16]
  119. mul qword [rcx + 0]
  120. add r10, rax
  121. adc r8, rdx
  122. adc r9, 0x0
  123. mov rax, [rsp + 8]
  124. mul qword [q + 8]
  125. add r10, rax
  126. adc r8, rdx
  127. adc r9, 0x0
  128. mov rax, [rsp + 0]
  129. mul qword [q + 16]
  130. add r10, rax
  131. adc r8, rdx
  132. adc r9, 0x0
  133. mov rax, r10
  134. mul r11
  135. mov [rsp + 16], rax
  136. mul qword [q]
  137. add r10, rax
  138. adc r8, rdx
  139. adc r9, 0x0
  140. mov rax, [rsi + 0]
  141. mul qword [rcx + 24]
  142. add r8, rax
  143. adc r9, rdx
  144. adc r10, 0x0
  145. mov rax, [rsi + 8]
  146. mul qword [rcx + 16]
  147. add r8, rax
  148. adc r9, rdx
  149. adc r10, 0x0
  150. mov rax, [rsi + 16]
  151. mul qword [rcx + 8]
  152. add r8, rax
  153. adc r9, rdx
  154. adc r10, 0x0
  155. mov rax, [rsi + 24]
  156. mul qword [rcx + 0]
  157. add r8, rax
  158. adc r9, rdx
  159. adc r10, 0x0
  160. mov rax, [rsp + 16]
  161. mul qword [q + 8]
  162. add r8, rax
  163. adc r9, rdx
  164. adc r10, 0x0
  165. mov rax, [rsp + 8]
  166. mul qword [q + 16]
  167. add r8, rax
  168. adc r9, rdx
  169. adc r10, 0x0
  170. mov rax, [rsp + 0]
  171. mul qword [q + 24]
  172. add r8, rax
  173. adc r9, rdx
  174. adc r10, 0x0
  175. mov rax, r8
  176. mul r11
  177. mov [rsp + 24], rax
  178. mul qword [q]
  179. add r8, rax
  180. adc r9, rdx
  181. adc r10, 0x0
  182. mov rax, [rsi + 8]
  183. mul qword [rcx + 24]
  184. add r9, rax
  185. adc r10, rdx
  186. adc r8, 0x0
  187. mov rax, [rsi + 16]
  188. mul qword [rcx + 16]
  189. add r9, rax
  190. adc r10, rdx
  191. adc r8, 0x0
  192. mov rax, [rsi + 24]
  193. mul qword [rcx + 8]
  194. add r9, rax
  195. adc r10, rdx
  196. adc r8, 0x0
  197. mov rax, [rsp + 24]
  198. mul qword [q + 8]
  199. add r9, rax
  200. adc r10, rdx
  201. adc r8, 0x0
  202. mov rax, [rsp + 16]
  203. mul qword [q + 16]
  204. add r9, rax
  205. adc r10, rdx
  206. adc r8, 0x0
  207. mov rax, [rsp + 8]
  208. mul qword [q + 24]
  209. add r9, rax
  210. adc r10, rdx
  211. adc r8, 0x0
  212. mov [rdi + 0 ], r9
  213. xor r9,r9
  214. mov rax, [rsi + 16]
  215. mul qword [rcx + 24]
  216. add r10, rax
  217. adc r8, rdx
  218. adc r9, 0x0
  219. mov rax, [rsi + 24]
  220. mul qword [rcx + 16]
  221. add r10, rax
  222. adc r8, rdx
  223. adc r9, 0x0
  224. mov rax, [rsp + 24]
  225. mul qword [q + 16]
  226. add r10, rax
  227. adc r8, rdx
  228. adc r9, 0x0
  229. mov rax, [rsp + 16]
  230. mul qword [q + 24]
  231. add r10, rax
  232. adc r8, rdx
  233. adc r9, 0x0
  234. mov [rdi + 8 ], r10
  235. xor r10,r10
  236. mov rax, [rsi + 24]
  237. mul qword [rcx + 24]
  238. add r8, rax
  239. adc r9, rdx
  240. adc r10, 0x0
  241. mov rax, [rsp + 24]
  242. mul qword [q + 24]
  243. add r8, rax
  244. adc r9, rdx
  245. adc r10, 0x0
  246. mov [rdi + 16 ], r8
  247. xor r8,r8
  248. mov [rdi + 24 ], r9
  249. xor r9,r9
  250. test r10, r10
  251. jnz rawMontgomeryMul_mulM_sq
  252. ; Compare with q
  253. mov rax, [rdi + 24]
  254. cmp rax, [q + 24]
  255. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  256. jnz rawMontgomeryMul_mulM_sq ; q is lower
  257. mov rax, [rdi + 16]
  258. cmp rax, [q + 16]
  259. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  260. jnz rawMontgomeryMul_mulM_sq ; q is lower
  261. mov rax, [rdi + 8]
  262. cmp rax, [q + 8]
  263. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  264. jnz rawMontgomeryMul_mulM_sq ; q is lower
  265. mov rax, [rdi + 0]
  266. cmp rax, [q + 0]
  267. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  268. jnz rawMontgomeryMul_mulM_sq ; q is lower
  269. ; If equal substract q
  270. rawMontgomeryMul_mulM_sq:
  271. mov rax, [q + 0]
  272. sub [rdi + 0], rax
  273. mov rax, [q + 8]
  274. sbb [rdi + 8], rax
  275. mov rax, [q + 16]
  276. sbb [rdi + 16], rax
  277. mov rax, [q + 24]
  278. sbb [rdi + 24], rax
  279. rawMontgomeryMul_mulM_done:
  280. mov rdx, rcx ; recover rdx to its original place.
  281. add rsp, 32 ; recover rsp
  282. ret
  283. ;;;;;;;;;;;;;;;;;;;;;;
  284. ; rawMontgomeryMul1
  285. ;;;;;;;;;;;;;;;;;;;;;;
  286. ; Multiply two elements in montgomery form
  287. ; Params:
  288. ; rsi <= Pointer to the long data of element 1
  289. ; rdx <= second operand
  290. ; rdi <= Pointer to the long data of result
  291. ; Modified registers:
  292. ; r8, r9, 10, r11, rax, rcx
  293. ;;;;;;;;;;;;;;;;;;;;;;
  294. rawMontgomeryMul1:
  295. sub rsp, 32 ; Reserve space for ms
  296. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  297. mov r11, 0xc2e1f593efffffff ; np
  298. xor r8,r8
  299. xor r9,r9
  300. xor r10,r10
  301. mov rax, [rsi + 0]
  302. mul rcx
  303. add r8, rax
  304. adc r9, rdx
  305. adc r10, 0x0
  306. mov rax, r8
  307. mul r11
  308. mov [rsp + 0], rax
  309. mul qword [q]
  310. add r8, rax
  311. adc r9, rdx
  312. adc r10, 0x0
  313. mov rax, [rsi + 8]
  314. mul rcx
  315. add r9, rax
  316. adc r10, rdx
  317. adc r8, 0x0
  318. mov rax, [rsp + 0]
  319. mul qword [q + 8]
  320. add r9, rax
  321. adc r10, rdx
  322. adc r8, 0x0
  323. mov rax, r9
  324. mul r11
  325. mov [rsp + 8], rax
  326. mul qword [q]
  327. add r9, rax
  328. adc r10, rdx
  329. adc r8, 0x0
  330. mov rax, [rsi + 16]
  331. mul rcx
  332. add r10, rax
  333. adc r8, rdx
  334. adc r9, 0x0
  335. mov rax, [rsp + 8]
  336. mul qword [q + 8]
  337. add r10, rax
  338. adc r8, rdx
  339. adc r9, 0x0
  340. mov rax, [rsp + 0]
  341. mul qword [q + 16]
  342. add r10, rax
  343. adc r8, rdx
  344. adc r9, 0x0
  345. mov rax, r10
  346. mul r11
  347. mov [rsp + 16], rax
  348. mul qword [q]
  349. add r10, rax
  350. adc r8, rdx
  351. adc r9, 0x0
  352. mov rax, [rsi + 24]
  353. mul rcx
  354. add r8, rax
  355. adc r9, rdx
  356. adc r10, 0x0
  357. mov rax, [rsp + 16]
  358. mul qword [q + 8]
  359. add r8, rax
  360. adc r9, rdx
  361. adc r10, 0x0
  362. mov rax, [rsp + 8]
  363. mul qword [q + 16]
  364. add r8, rax
  365. adc r9, rdx
  366. adc r10, 0x0
  367. mov rax, [rsp + 0]
  368. mul qword [q + 24]
  369. add r8, rax
  370. adc r9, rdx
  371. adc r10, 0x0
  372. mov rax, r8
  373. mul r11
  374. mov [rsp + 24], rax
  375. mul qword [q]
  376. add r8, rax
  377. adc r9, rdx
  378. adc r10, 0x0
  379. mov rax, [rsp + 24]
  380. mul qword [q + 8]
  381. add r9, rax
  382. adc r10, rdx
  383. adc r8, 0x0
  384. mov rax, [rsp + 16]
  385. mul qword [q + 16]
  386. add r9, rax
  387. adc r10, rdx
  388. adc r8, 0x0
  389. mov rax, [rsp + 8]
  390. mul qword [q + 24]
  391. add r9, rax
  392. adc r10, rdx
  393. adc r8, 0x0
  394. mov [rdi + 0 ], r9
  395. xor r9,r9
  396. mov rax, [rsp + 24]
  397. mul qword [q + 16]
  398. add r10, rax
  399. adc r8, rdx
  400. adc r9, 0x0
  401. mov rax, [rsp + 16]
  402. mul qword [q + 24]
  403. add r10, rax
  404. adc r8, rdx
  405. adc r9, 0x0
  406. mov [rdi + 8 ], r10
  407. xor r10,r10
  408. mov rax, [rsp + 24]
  409. mul qword [q + 24]
  410. add r8, rax
  411. adc r9, rdx
  412. adc r10, 0x0
  413. mov [rdi + 16 ], r8
  414. xor r8,r8
  415. mov [rdi + 24 ], r9
  416. xor r9,r9
  417. test r10, r10
  418. jnz rawMontgomeryMul1_mulM_sq
  419. ; Compare with q
  420. mov rax, [rdi + 24]
  421. cmp rax, [q + 24]
  422. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  423. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  424. mov rax, [rdi + 16]
  425. cmp rax, [q + 16]
  426. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  427. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  428. mov rax, [rdi + 8]
  429. cmp rax, [q + 8]
  430. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  431. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  432. mov rax, [rdi + 0]
  433. cmp rax, [q + 0]
  434. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  435. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  436. ; If equal substract q
  437. rawMontgomeryMul1_mulM_sq:
  438. mov rax, [q + 0]
  439. sub [rdi + 0], rax
  440. mov rax, [q + 8]
  441. sbb [rdi + 8], rax
  442. mov rax, [q + 16]
  443. sbb [rdi + 16], rax
  444. mov rax, [q + 24]
  445. sbb [rdi + 24], rax
  446. rawMontgomeryMul1_mulM_done:
  447. mov rdx, rcx ; recover rdx to its original place.
  448. add rsp, 32 ; recover rsp
  449. ret
  450. ;;;;;;;;;;;;;;;;;;;;;;
  451. ; rawFromMontgomery
  452. ;;;;;;;;;;;;;;;;;;;;;;
  453. ; Multiply two elements in montgomery form
  454. ; Params:
  455. ; rsi <= Pointer to the long data of element 1
  456. ; rdi <= Pointer to the long data of result
  457. ; Modified registers:
  458. ; r8, r9, 10, r11, rax, rcx
  459. ;;;;;;;;;;;;;;;;;;;;;;
  460. rawFromMontgomery:
  461. sub rsp, 32 ; Reserve space for ms
  462. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  463. mov r11, 0xc2e1f593efffffff ; np
  464. xor r8,r8
  465. xor r9,r9
  466. xor r10,r10
  467. add r8, [rdi + 0]
  468. adc r9, 0x0
  469. adc r10, 0x0
  470. mov rax, r8
  471. mul r11
  472. mov [rsp + 0], rax
  473. mul qword [q]
  474. add r8, rax
  475. adc r9, rdx
  476. adc r10, 0x0
  477. add r9, [rdi + 8]
  478. adc r10, 0x0
  479. adc r8, 0x0
  480. mov rax, [rsp + 0]
  481. mul qword [q + 8]
  482. add r9, rax
  483. adc r10, rdx
  484. adc r8, 0x0
  485. mov rax, r9
  486. mul r11
  487. mov [rsp + 8], rax
  488. mul qword [q]
  489. add r9, rax
  490. adc r10, rdx
  491. adc r8, 0x0
  492. add r10, [rdi + 16]
  493. adc r8, 0x0
  494. adc r9, 0x0
  495. mov rax, [rsp + 8]
  496. mul qword [q + 8]
  497. add r10, rax
  498. adc r8, rdx
  499. adc r9, 0x0
  500. mov rax, [rsp + 0]
  501. mul qword [q + 16]
  502. add r10, rax
  503. adc r8, rdx
  504. adc r9, 0x0
  505. mov rax, r10
  506. mul r11
  507. mov [rsp + 16], rax
  508. mul qword [q]
  509. add r10, rax
  510. adc r8, rdx
  511. adc r9, 0x0
  512. add r8, [rdi + 24]
  513. adc r9, 0x0
  514. adc r10, 0x0
  515. mov rax, [rsp + 16]
  516. mul qword [q + 8]
  517. add r8, rax
  518. adc r9, rdx
  519. adc r10, 0x0
  520. mov rax, [rsp + 8]
  521. mul qword [q + 16]
  522. add r8, rax
  523. adc r9, rdx
  524. adc r10, 0x0
  525. mov rax, [rsp + 0]
  526. mul qword [q + 24]
  527. add r8, rax
  528. adc r9, rdx
  529. adc r10, 0x0
  530. mov rax, r8
  531. mul r11
  532. mov [rsp + 24], rax
  533. mul qword [q]
  534. add r8, rax
  535. adc r9, rdx
  536. adc r10, 0x0
  537. mov rax, [rsp + 24]
  538. mul qword [q + 8]
  539. add r9, rax
  540. adc r10, rdx
  541. adc r8, 0x0
  542. mov rax, [rsp + 16]
  543. mul qword [q + 16]
  544. add r9, rax
  545. adc r10, rdx
  546. adc r8, 0x0
  547. mov rax, [rsp + 8]
  548. mul qword [q + 24]
  549. add r9, rax
  550. adc r10, rdx
  551. adc r8, 0x0
  552. mov [rdi + 0 ], r9
  553. xor r9,r9
  554. mov rax, [rsp + 24]
  555. mul qword [q + 16]
  556. add r10, rax
  557. adc r8, rdx
  558. adc r9, 0x0
  559. mov rax, [rsp + 16]
  560. mul qword [q + 24]
  561. add r10, rax
  562. adc r8, rdx
  563. adc r9, 0x0
  564. mov [rdi + 8 ], r10
  565. xor r10,r10
  566. mov rax, [rsp + 24]
  567. mul qword [q + 24]
  568. add r8, rax
  569. adc r9, rdx
  570. adc r10, 0x0
  571. mov [rdi + 16 ], r8
  572. xor r8,r8
  573. mov [rdi + 24 ], r9
  574. xor r9,r9
  575. test r10, r10
  576. jnz rawFromMontgomery_mulM_sq
  577. ; Compare with q
  578. mov rax, [rdi + 24]
  579. cmp rax, [q + 24]
  580. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  581. jnz rawFromMontgomery_mulM_sq ; q is lower
  582. mov rax, [rdi + 16]
  583. cmp rax, [q + 16]
  584. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  585. jnz rawFromMontgomery_mulM_sq ; q is lower
  586. mov rax, [rdi + 8]
  587. cmp rax, [q + 8]
  588. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  589. jnz rawFromMontgomery_mulM_sq ; q is lower
  590. mov rax, [rdi + 0]
  591. cmp rax, [q + 0]
  592. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  593. jnz rawFromMontgomery_mulM_sq ; q is lower
  594. ; If equal substract q
  595. rawFromMontgomery_mulM_sq:
  596. mov rax, [q + 0]
  597. sub [rdi + 0], rax
  598. mov rax, [q + 8]
  599. sbb [rdi + 8], rax
  600. mov rax, [q + 16]
  601. sbb [rdi + 16], rax
  602. mov rax, [q + 24]
  603. sbb [rdi + 24], rax
  604. rawFromMontgomery_mulM_done:
  605. mov rdx, rcx ; recover rdx to its original place.
  606. add rsp, 32 ; recover rsp
  607. ret
  608. ;;;;;;;;;;;;;;;;;;;;;;
  609. ; toMontgomery
  610. ;;;;;;;;;;;;;;;;;;;;;;
  611. ; Convert a number to Montgomery
  612. ; rdi <= Pointer element to convert
  613. ; Modified registers:
  614. ; r8, r9, 10, r11, rax, rcx
  615. ;;;;;;;;;;;;;;;;;;;;
  616. Fr_toMontgomery:
  617. mov rax, [rdi]
  618. bts rax, 62 ; check if montgomery
  619. jc toMontgomery_doNothing
  620. bts rax, 63
  621. jc toMontgomeryLong
  622. toMontgomeryShort:
  623. mov [rdi], rax
  624. add rdi, 8
  625. push rsi
  626. lea rsi, [R2]
  627. movsx rdx, eax
  628. cmp rdx, 0
  629. js negMontgomeryShort
  630. posMontgomeryShort:
  631. call rawMontgomeryMul1
  632. pop rsi
  633. sub rdi, 8
  634. ret
  635. negMontgomeryShort:
  636. neg rdx ; Do the multiplication positive and then negate the result.
  637. call rawMontgomeryMul1
  638. mov rsi, rdi
  639. call rawNegL
  640. pop rsi
  641. sub rdi, 8
  642. ret
  643. toMontgomeryLong:
  644. mov [rdi], rax
  645. add rdi, 8
  646. push rsi
  647. mov rdx, rdi
  648. lea rsi, [R2]
  649. call rawMontgomeryMul
  650. pop rsi
  651. sub rdi, 8
  652. toMontgomery_doNothing:
  653. ret
  654. ;;;;;;;;;;;;;;;;;;;;;;
  655. ; toNormal
  656. ;;;;;;;;;;;;;;;;;;;;;;
  657. ; Convert a number from Montgomery
  658. ; rdi <= Pointer element to convert
  659. ; Modified registers:
  660. ; r8, r9, 10, r11, rax, rcx
  661. ;;;;;;;;;;;;;;;;;;;;
  662. Fr_toNormal:
  663. mov rax, [rdi]
  664. btc rax, 62 ; check if montgomery
  665. jnc fromMontgomery_doNothing
  666. bt rax, 63 ; if short, it means it's converted
  667. jnc fromMontgomery_doNothing
  668. fromMontgomeryLong:
  669. mov [rdi], rax
  670. add rdi, 8
  671. call rawFromMontgomery
  672. sub rdi, 8
  673. fromMontgomery_doNothing:
  674. ret
  675. ;;;;;;;;;;;;;;;;;;;;;;
  676. ; add
  677. ;;;;;;;;;;;;;;;;;;;;;;
  678. ; Adds two elements of any kind
  679. ; Params:
  680. ; rsi <= Pointer to element 1
  681. ; rdx <= Pointer to element 2
  682. ; rdi <= Pointer to result
  683. ; Modified Registers:
  684. ; r8, r9, 10, r11, rax, rcx
  685. ;;;;;;;;;;;;;;;;;;;;;;
  686. Fr_add:
  687. mov rax, [rsi]
  688. mov rcx, [rdx]
  689. bt rax, 63 ; Check if is short first operand
  690. jc add_l1
  691. bt rcx, 63 ; Check if is short second operand
  692. jc add_s1l2
  693. add_s1s2: ; Both operands are short
  694. xor rdx, rdx
  695. mov edx, eax
  696. add edx, ecx
  697. jo add_manageOverflow ; rsi already is the 64bits result
  698. mov [rdi], rdx ; not necessary to adjust so just save and return
  699. ret
  700. add_manageOverflow: ; Do the operation in 64 bits
  701. push rsi
  702. movsx rsi, eax
  703. movsx rdx, ecx
  704. add rsi, rdx
  705. call rawCopyS2L
  706. pop rsi
  707. ret
  708. add_l1:
  709. bt rcx, 63 ; Check if is short second operand
  710. jc add_l1l2
  711. ;;;;;;;;
  712. add_l1s2:
  713. bt rax, 62 ; check if montgomery first
  714. jc add_l1ms2
  715. add_l1ns2:
  716. mov r11b, 0x80
  717. shl r11, 56
  718. mov [rdi], r11
  719. add rsi, 8
  720. movsx rdx, ecx
  721. add rdi, 8
  722. cmp rdx, 0
  723. jns tmp1
  724. neg rdx
  725. call rawSubLS
  726. sub rdi, 8
  727. sub rsi, 8
  728. ret
  729. tmp1:
  730. call rawAddLS
  731. sub rdi, 8
  732. sub rsi, 8
  733. ret
  734. add_l1ms2:
  735. bt rcx, 62 ; check if montgomery second
  736. jc add_l1ms2m
  737. add_l1ms2n:
  738. mov r11b, 0xC0
  739. shl r11, 56
  740. mov [rdi], r11
  741. push rdi
  742. mov rdi, rdx
  743. call Fr_toMontgomery
  744. mov rdx, rdi
  745. pop rdi
  746. add rdi, 8
  747. add rsi, 8
  748. add rdx, 8
  749. call rawAddLL
  750. sub rdi, 8
  751. sub rsi, 8
  752. ret
  753. add_l1ms2m:
  754. mov r11b, 0xC0
  755. shl r11, 56
  756. mov [rdi], r11
  757. add rdi, 8
  758. add rsi, 8
  759. add rdx, 8
  760. call rawAddLL
  761. sub rdi, 8
  762. sub rsi, 8
  763. ret
  764. ;;;;;;;;
  765. add_s1l2:
  766. bt rcx, 62 ; check if montgomery first
  767. jc add_s1l2m
  768. add_s1l2n:
  769. mov r11b, 0x80
  770. shl r11, 56
  771. mov [rdi], r11
  772. lea rsi, [rdx + 8]
  773. movsx rdx, eax
  774. add rdi, 8
  775. cmp rdx, 0
  776. jns tmp2
  777. neg rdx
  778. call rawSubLS
  779. sub rdi, 8
  780. sub rsi, 8
  781. ret
  782. tmp2:
  783. call rawAddLS
  784. sub rdi, 8
  785. sub rsi, 8
  786. ret
  787. add_s1l2m:
  788. bt rax, 62 ; check if montgomery second
  789. jc add_s1ml2m
  790. add_s1nl2m:
  791. mov r11b, 0xC0
  792. shl r11, 56
  793. mov [rdi], r11
  794. push rdi
  795. mov rdi, rsi
  796. mov rsi, rdx
  797. call Fr_toMontgomery
  798. mov rdx, rsi
  799. mov rsi, rdi
  800. pop rdi
  801. add rdi, 8
  802. add rsi, 8
  803. add rdx, 8
  804. call rawAddLL
  805. sub rdi, 8
  806. sub rsi, 8
  807. ret
  808. add_s1ml2m:
  809. mov r11b, 0xC0
  810. shl r11, 56
  811. mov [rdi], r11
  812. add rdi, 8
  813. add rsi, 8
  814. add rdx, 8
  815. call rawAddLL
  816. sub rdi, 8
  817. sub rsi, 8
  818. ret
  819. ;;;;
  820. add_l1l2:
  821. bt rax, 62 ; check if montgomery first
  822. jc add_l1ml2
  823. add_l1nl2:
  824. bt rcx, 62 ; check if montgomery second
  825. jc add_l1nl2m
  826. add_l1nl2n:
  827. mov r11b, 0x80
  828. shl r11, 56
  829. mov [rdi], r11
  830. add rdi, 8
  831. add rsi, 8
  832. add rdx, 8
  833. call rawAddLL
  834. sub rdi, 8
  835. sub rsi, 8
  836. ret
  837. add_l1nl2m:
  838. mov r11b, 0xC0
  839. shl r11, 56
  840. mov [rdi], r11
  841. push rdi
  842. mov rdi, rsi
  843. mov rsi, rdx
  844. call Fr_toMontgomery
  845. mov rdx, rsi
  846. mov rsi, rdi
  847. pop rdi
  848. add rdi, 8
  849. add rsi, 8
  850. add rdx, 8
  851. call rawAddLL
  852. sub rdi, 8
  853. sub rsi, 8
  854. ret
  855. add_l1ml2:
  856. bt rcx, 62 ; check if montgomery seconf
  857. jc add_l1ml2m
  858. add_l1ml2n:
  859. mov r11b, 0xC0
  860. shl r11, 56
  861. mov [rdi], r11
  862. push rdi
  863. mov rdi, rdx
  864. call Fr_toMontgomery
  865. mov rdx, rdi
  866. pop rdi
  867. add rdi, 8
  868. add rsi, 8
  869. add rdx, 8
  870. call rawAddLL
  871. sub rdi, 8
  872. sub rsi, 8
  873. ret
  874. add_l1ml2m:
  875. mov r11b, 0xC0
  876. shl r11, 56
  877. mov [rdi], r11
  878. add rdi, 8
  879. add rsi, 8
  880. add rdx, 8
  881. call rawAddLL
  882. sub rdi, 8
  883. sub rsi, 8
  884. ret
  885. ;;;;;;;;;;;;;;;;;;;;;;
  886. ; rawAddLL
  887. ;;;;;;;;;;;;;;;;;;;;;;
  888. ; Adds two elements of type long
  889. ; Params:
  890. ; rsi <= Pointer to the long data of element 1
  891. ; rdx <= Pointer to the long data of element 2
  892. ; rdi <= Pointer to the long data of result
  893. ; Modified Registers:
  894. ; rax
  895. ;;;;;;;;;;;;;;;;;;;;;;
  896. rawAddLL:
  897. ; Add component by component with carry
  898. mov rax, [rsi + 0]
  899. add rax, [rdx + 0]
  900. mov [rdi + 0], rax
  901. mov rax, [rsi + 8]
  902. adc rax, [rdx + 8]
  903. mov [rdi + 8], rax
  904. mov rax, [rsi + 16]
  905. adc rax, [rdx + 16]
  906. mov [rdi + 16], rax
  907. mov rax, [rsi + 24]
  908. adc rax, [rdx + 24]
  909. mov [rdi + 24], rax
  910. jc rawAddLL_sq ; if overflow, substract q
  911. ; Compare with q
  912. cmp rax, [q + 24]
  913. jc rawAddLL_done ; q is bigget so done.
  914. jnz rawAddLL_sq ; q is lower
  915. mov rax, [rdi + 16]
  916. cmp rax, [q + 16]
  917. jc rawAddLL_done ; q is bigget so done.
  918. jnz rawAddLL_sq ; q is lower
  919. mov rax, [rdi + 8]
  920. cmp rax, [q + 8]
  921. jc rawAddLL_done ; q is bigget so done.
  922. jnz rawAddLL_sq ; q is lower
  923. mov rax, [rdi + 0]
  924. cmp rax, [q + 0]
  925. jc rawAddLL_done ; q is bigget so done.
  926. jnz rawAddLL_sq ; q is lower
  927. ; If equal substract q
  928. rawAddLL_sq:
  929. mov rax, [q + 0]
  930. sub [rdi + 0], rax
  931. mov rax, [q + 8]
  932. sbb [rdi + 8], rax
  933. mov rax, [q + 16]
  934. sbb [rdi + 16], rax
  935. mov rax, [q + 24]
  936. sbb [rdi + 24], rax
  937. rawAddLL_done:
  938. ret
  939. ;;;;;;;;;;;;;;;;;;;;;;
  940. ; rawAddLS
  941. ;;;;;;;;;;;;;;;;;;;;;;
  942. ; Adds two elements of type long
  943. ; Params:
  944. ; rdi <= Pointer to the long data of result
  945. ; rsi <= Pointer to the long data of element 1
  946. ; rdx <= Value to be added
  947. ;;;;;;;;;;;;;;;;;;;;;;
  948. rawAddLS:
  949. ; Add component by component with carry
  950. add rdx, [rsi]
  951. mov [rdi] ,rdx
  952. mov rdx, 0
  953. adc rdx, [rsi + 8]
  954. mov [rdi + 8], rdx
  955. mov rdx, 0
  956. adc rdx, [rsi + 16]
  957. mov [rdi + 16], rdx
  958. mov rdx, 0
  959. adc rdx, [rsi + 24]
  960. mov [rdi + 24], rdx
  961. jc rawAddLS_sq ; if overflow, substract q
  962. ; Compare with q
  963. mov rax, [rdi + 24]
  964. cmp rax, [q + 24]
  965. jc rawAddLS_done ; q is bigget so done.
  966. jnz rawAddLS_sq ; q is lower
  967. mov rax, [rdi + 16]
  968. cmp rax, [q + 16]
  969. jc rawAddLS_done ; q is bigget so done.
  970. jnz rawAddLS_sq ; q is lower
  971. mov rax, [rdi + 8]
  972. cmp rax, [q + 8]
  973. jc rawAddLS_done ; q is bigget so done.
  974. jnz rawAddLS_sq ; q is lower
  975. mov rax, [rdi + 0]
  976. cmp rax, [q + 0]
  977. jc rawAddLS_done ; q is bigget so done.
  978. jnz rawAddLS_sq ; q is lower
  979. ; If equal substract q
  980. rawAddLS_sq:
  981. mov rax, [q + 0]
  982. sub [rdi + 0], rax
  983. mov rax, [q + 8]
  984. sbb [rdi + 8], rax
  985. mov rax, [q + 16]
  986. sbb [rdi + 16], rax
  987. mov rax, [q + 24]
  988. sbb [rdi + 24], rax
  989. rawAddLS_done:
  990. ret
  991. ;;;;;;;;;;;;;;;;;;;;;;
  992. ; sub
  993. ;;;;;;;;;;;;;;;;;;;;;;
  994. ; Substracts two elements of any kind
  995. ; Params:
  996. ; rsi <= Pointer to element 1
  997. ; rdx <= Pointer to element 2
  998. ; rdi <= Pointer to result
  999. ; Modified Registers:
  1000. ; r8, r9, 10, r11, rax, rcx
  1001. ;;;;;;;;;;;;;;;;;;;;;;
  1002. Fr_sub:
  1003. mov rax, [rsi]
  1004. mov rcx, [rdx]
  1005. bt rax, 63 ; Check if is long first operand
  1006. jc sub_l1
  1007. bt rcx, 63 ; Check if is long second operand
  1008. jc sub_s1l2
  1009. sub_s1s2: ; Both operands are short
  1010. xor rdx, rdx
  1011. mov edx, eax
  1012. sub edx, ecx
  1013. jo sub_manageOverflow ; rsi already is the 64bits result
  1014. mov [rdi], rdx ; not necessary to adjust so just save and return
  1015. ret
  1016. sub_manageOverflow: ; Do the operation in 64 bits
  1017. push rsi
  1018. movsx rsi, eax
  1019. movsx rdx, ecx
  1020. sub rsi, rdx
  1021. call rawCopyS2L
  1022. pop rsi
  1023. ret
  1024. sub_l1:
  1025. bt rcx, 63 ; Check if is short second operand
  1026. jc sub_l1l2
  1027. ;;;;;;;;
  1028. sub_l1s2:
  1029. bt rax, 62 ; check if montgomery first
  1030. jc sub_l1ms2
  1031. sub_l1ns2:
  1032. mov r11b, 0x80
  1033. shl r11, 56
  1034. mov [rdi], r11
  1035. add rsi, 8
  1036. movsx rdx, ecx
  1037. add rdi, 8
  1038. cmp rdx, 0
  1039. jns tmp3
  1040. neg rdx
  1041. call rawAddLS
  1042. sub rdi, 8
  1043. sub rsi, 8
  1044. ret
  1045. tmp3:
  1046. call rawSubLS
  1047. sub rdi, 8
  1048. sub rsi, 8
  1049. ret
  1050. sub_l1ms2:
  1051. bt rcx, 62 ; check if montgomery second
  1052. jc sub_l1ms2m
  1053. sub_l1ms2n:
  1054. mov r11b, 0xC0
  1055. shl r11, 56
  1056. mov [rdi], r11
  1057. push rdi
  1058. mov rdi, rdx
  1059. call Fr_toMontgomery
  1060. mov rdx, rdi
  1061. pop rdi
  1062. add rdi, 8
  1063. add rsi, 8
  1064. add rdx, 8
  1065. call rawSubLL
  1066. sub rdi, 8
  1067. sub rsi, 8
  1068. ret
  1069. sub_l1ms2m:
  1070. mov r11b, 0xC0
  1071. shl r11, 56
  1072. mov [rdi], r11
  1073. add rdi, 8
  1074. add rsi, 8
  1075. add rdx, 8
  1076. call rawSubLL
  1077. sub rdi, 8
  1078. sub rsi, 8
  1079. ret
  1080. ;;;;;;;;
  1081. sub_s1l2:
  1082. bt rcx, 62 ; check if montgomery first
  1083. jc sub_s1l2m
  1084. sub_s1l2n:
  1085. mov r11b, 0x80
  1086. shl r11, 56
  1087. mov [rdi], r11
  1088. cmp eax, 0
  1089. js tmp4
  1090. ; First Operand is positive
  1091. push rsi
  1092. add rdi, 8
  1093. movsx rsi, eax
  1094. add rdx, 8
  1095. call rawSubSL
  1096. sub rdi, 8
  1097. pop rsi
  1098. ret
  1099. tmp4: ; First operand is negative
  1100. push rsi
  1101. lea rsi, [rdx + 8]
  1102. movsx rdx, eax
  1103. add rdi, 8
  1104. neg rdx
  1105. call rawNegLS
  1106. sub rdi, 8
  1107. pop rsi
  1108. ret
  1109. sub_s1l2m:
  1110. bt rax, 62 ; check if montgomery second
  1111. jc sub_s1ml2m
  1112. sub_s1nl2m:
  1113. mov r11b, 0xC0
  1114. shl r11, 56
  1115. mov [rdi], r11
  1116. push rdi
  1117. mov rdi, rsi
  1118. mov rsi, rdx
  1119. call Fr_toMontgomery
  1120. mov rdx, rsi
  1121. mov rsi, rdi
  1122. pop rdi
  1123. add rdi, 8
  1124. add rsi, 8
  1125. add rdx, 8
  1126. call rawSubLL
  1127. sub rdi, 8
  1128. sub rsi, 8
  1129. ret
  1130. sub_s1ml2m:
  1131. mov r11b, 0xC0
  1132. shl r11, 56
  1133. mov [rdi], r11
  1134. add rdi, 8
  1135. add rsi, 8
  1136. add rdx, 8
  1137. call rawSubLL
  1138. sub rdi, 8
  1139. sub rsi, 8
  1140. ret
  1141. ;;;;
  1142. sub_l1l2:
  1143. bt rax, 62 ; check if montgomery first
  1144. jc sub_l1ml2
  1145. sub_l1nl2:
  1146. bt rcx, 62 ; check if montgomery second
  1147. jc sub_l1nl2m
  1148. sub_l1nl2n:
  1149. mov r11b, 0x80
  1150. shl r11, 56
  1151. mov [rdi], r11
  1152. add rdi, 8
  1153. add rsi, 8
  1154. add rdx, 8
  1155. call rawSubLL
  1156. sub rdi, 8
  1157. sub rsi, 8
  1158. ret
  1159. sub_l1nl2m:
  1160. mov r11b, 0xC0
  1161. shl r11, 56
  1162. mov [rdi], r11
  1163. push rdi
  1164. mov rdi, rsi
  1165. mov rsi, rdx
  1166. call Fr_toMontgomery
  1167. mov rdx, rsi
  1168. mov rsi, rdi
  1169. pop rdi
  1170. add rdi, 8
  1171. add rsi, 8
  1172. add rdx, 8
  1173. call rawSubLL
  1174. sub rdi, 8
  1175. sub rsi, 8
  1176. ret
  1177. sub_l1ml2:
  1178. bt rcx, 62 ; check if montgomery seconf
  1179. jc sub_l1ml2m
  1180. sub_l1ml2n:
  1181. mov r11b, 0xC0
  1182. shl r11, 56
  1183. mov [rdi], r11
  1184. push rdi
  1185. mov rdi, rdx
  1186. call Fr_toMontgomery
  1187. mov rdx, rdi
  1188. pop rdi
  1189. add rdi, 8
  1190. add rsi, 8
  1191. add rdx, 8
  1192. call rawSubLL
  1193. sub rdi, 8
  1194. sub rsi, 8
  1195. ret
  1196. sub_l1ml2m:
  1197. mov r11b, 0xC0
  1198. shl r11, 56
  1199. mov [rdi], r11
  1200. add rdi, 8
  1201. add rsi, 8
  1202. add rdx, 8
  1203. call rawSubLL
  1204. sub rdi, 8
  1205. sub rsi, 8
  1206. ret
  1207. ;;;;;;;;;;;;;;;;;;;;;;
  1208. ; rawSubLS
  1209. ;;;;;;;;;;;;;;;;;;;;;;
  1210. ; Substracts a short element from the long element
  1211. ; Params:
  1212. ; rdi <= Pointer to the long data of result
  1213. ; rsi <= Pointer to the long data of element 1 where will be substracted
  1214. ; rdx <= Value to be substracted
  1215. ; [rdi] = [rsi] - rdx
  1216. ; Modified Registers:
  1217. ; rax
  1218. ;;;;;;;;;;;;;;;;;;;;;;
  1219. rawSubLS:
  1220. ; Substract first digit
  1221. mov rax, [rsi]
  1222. sub rax, rdx
  1223. mov [rdi] ,rax
  1224. mov rdx, 0
  1225. mov rax, [rsi + 8]
  1226. sbb rax, rdx
  1227. mov [rdi + 8], rax
  1228. mov rax, [rsi + 16]
  1229. sbb rax, rdx
  1230. mov [rdi + 16], rax
  1231. mov rax, [rsi + 24]
  1232. sbb rax, rdx
  1233. mov [rdi + 24], rax
  1234. jnc rawSubLS_done ; if overflow, add q
  1235. ; Add q
  1236. rawSubLS_aq:
  1237. mov rax, [q + 0]
  1238. add [rdi + 0], rax
  1239. mov rax, [q + 8]
  1240. adc [rdi + 8], rax
  1241. mov rax, [q + 16]
  1242. adc [rdi + 16], rax
  1243. mov rax, [q + 24]
  1244. adc [rdi + 24], rax
  1245. rawSubLS_done:
  1246. ret
  1247. ;;;;;;;;;;;;;;;;;;;;;;
  1248. ; rawSubSL
  1249. ;;;;;;;;;;;;;;;;;;;;;;
  1250. ; Substracts a long element from a short element
  1251. ; Params:
  1252. ; rdi <= Pointer to the long data of result
  1253. ; rsi <= Value from where will bo substracted
  1254. ; rdx <= Pointer to long of the value to be substracted
  1255. ;
  1256. ; [rdi] = rsi - [rdx]
  1257. ; Modified Registers:
  1258. ; rax
  1259. ;;;;;;;;;;;;;;;;;;;;;;
  1260. rawSubSL:
  1261. ; Substract first digit
  1262. sub rsi, [rdx]
  1263. mov [rdi] ,rsi
  1264. mov rax, 0
  1265. sbb rax, [rdx + 8]
  1266. mov [rdi + 8], rax
  1267. mov rax, 0
  1268. sbb rax, [rdx + 16]
  1269. mov [rdi + 16], rax
  1270. mov rax, 0
  1271. sbb rax, [rdx + 24]
  1272. mov [rdi + 24], rax
  1273. jnc rawSubSL_done ; if overflow, add q
  1274. ; Add q
  1275. rawSubSL_aq:
  1276. mov rax, [q + 0]
  1277. add [rdi + 0], rax
  1278. mov rax, [q + 8]
  1279. adc [rdi + 8], rax
  1280. mov rax, [q + 16]
  1281. adc [rdi + 16], rax
  1282. mov rax, [q + 24]
  1283. adc [rdi + 24], rax
  1284. rawSubSL_done:
  1285. ret
  1286. ;;;;;;;;;;;;;;;;;;;;;;
  1287. ; rawSubLL
  1288. ;;;;;;;;;;;;;;;;;;;;;;
  1289. ; Substracts a long element from a short element
  1290. ; Params:
  1291. ; rdi <= Pointer to the long data of result
  1292. ; rsi <= Pointer to long from where substracted
  1293. ; rdx <= Pointer to long of the value to be substracted
  1294. ;
  1295. ; [rdi] = [rsi] - [rdx]
  1296. ; Modified Registers:
  1297. ; rax
  1298. ;;;;;;;;;;;;;;;;;;;;;;
  1299. rawSubLL:
  1300. ; Substract first digit
  1301. mov rax, [rsi + 0]
  1302. sub rax, [rdx + 0]
  1303. mov [rdi + 0], rax
  1304. mov rax, [rsi + 8]
  1305. sbb rax, [rdx + 8]
  1306. mov [rdi + 8], rax
  1307. mov rax, [rsi + 16]
  1308. sbb rax, [rdx + 16]
  1309. mov [rdi + 16], rax
  1310. mov rax, [rsi + 24]
  1311. sbb rax, [rdx + 24]
  1312. mov [rdi + 24], rax
  1313. jnc rawSubLL_done ; if overflow, add q
  1314. ; Add q
  1315. rawSubLL_aq:
  1316. mov rax, [q + 0]
  1317. add [rdi + 0], rax
  1318. mov rax, [q + 8]
  1319. adc [rdi + 8], rax
  1320. mov rax, [q + 16]
  1321. adc [rdi + 16], rax
  1322. mov rax, [q + 24]
  1323. adc [rdi + 24], rax
  1324. rawSubLL_done:
  1325. ret
  1326. ;;;;;;;;;;;;;;;;;;;;;;
  1327. ; rawNegLS
  1328. ;;;;;;;;;;;;;;;;;;;;;;
  1329. ; Substracts a long element and a short element form 0
  1330. ; Params:
  1331. ; rdi <= Pointer to the long data of result
  1332. ; rsi <= Pointer to long from where substracted
  1333. ; rdx <= short value to be substracted too
  1334. ;
  1335. ; [rdi] = -[rsi] - rdx
  1336. ; Modified Registers:
  1337. ; rax
  1338. ;;;;;;;;;;;;;;;;;;;;;;
  1339. rawNegLS:
  1340. mov rax, [q]
  1341. sub rax, rdx
  1342. mov [rdi], rax
  1343. mov rax, [q + 8 ]
  1344. sbb rax, 0
  1345. mov [rdi + 8], rax
  1346. mov rax, [q + 16 ]
  1347. sbb rax, 0
  1348. mov [rdi + 16], rax
  1349. mov rax, [q + 24 ]
  1350. sbb rax, 0
  1351. mov [rdi + 24], rax
  1352. setc dl
  1353. mov rax, [rdi + 0 ]
  1354. sub rax, [rsi + 0]
  1355. mov [rdi + 0], rax
  1356. mov rax, [rdi + 8 ]
  1357. sbb rax, [rsi + 8]
  1358. mov [rdi + 8], rax
  1359. mov rax, [rdi + 16 ]
  1360. sbb rax, [rsi + 16]
  1361. mov [rdi + 16], rax
  1362. mov rax, [rdi + 24 ]
  1363. sbb rax, [rsi + 24]
  1364. mov [rdi + 24], rax
  1365. setc dh
  1366. or dl, dh
  1367. jz rawNegSL_done
  1368. ; it is a negative value, so add q
  1369. mov rax, [q + 0]
  1370. add [rdi + 0], rax
  1371. mov rax, [q + 8]
  1372. adc [rdi + 8], rax
  1373. mov rax, [q + 16]
  1374. adc [rdi + 16], rax
  1375. mov rax, [q + 24]
  1376. adc [rdi + 24], rax
  1377. rawNegSL_done:
  1378. ret
  1379. ;;;;;;;;;;;;;;;;;;;;;;
  1380. ; neg
  1381. ;;;;;;;;;;;;;;;;;;;;;;
  1382. ; Adds two elements of any kind
  1383. ; Params:
  1384. ; rsi <= Pointer to element to be negated
  1385. ; rdi <= Pointer to result
  1386. ; [rdi] = -[rsi]
  1387. ;;;;;;;;;;;;;;;;;;;;;;
  1388. Fr_neg:
  1389. mov rax, [rsi]
  1390. bt rax, 63 ; Check if is short first operand
  1391. jc neg_l
  1392. neg_s: ; Operand is short
  1393. neg eax
  1394. jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case)
  1395. mov [rdi], rax ; not necessary to adjust so just save and return
  1396. ret
  1397. neg_manageOverflow: ; Do the operation in 64 bits
  1398. push rsi
  1399. movsx rsi, eax
  1400. neg rsi
  1401. call rawCopyS2L
  1402. pop rsi
  1403. ret
  1404. neg_l:
  1405. mov [rdi], rax ; Copy the type
  1406. add rdi, 8
  1407. add rsi, 8
  1408. call rawNegL
  1409. sub rdi, 8
  1410. sub rsi, 8
  1411. ret
  1412. ;;;;;;;;;;;;;;;;;;;;;;
  1413. ; rawNeg
  1414. ;;;;;;;;;;;;;;;;;;;;;;
  1415. ; Negates a value
  1416. ; Params:
  1417. ; rdi <= Pointer to the long data of result
  1418. ; rsi <= Pointer to the long data of element 1
  1419. ;
  1420. ; [rdi] = - [rsi]
  1421. ;;;;;;;;;;;;;;;;;;;;;;
  1422. rawNegL:
  1423. ; Compare is zero
  1424. xor rax, rax
  1425. cmp [rsi + 0], rax
  1426. jnz doNegate
  1427. cmp [rsi + 8], rax
  1428. jnz doNegate
  1429. cmp [rsi + 16], rax
  1430. jnz doNegate
  1431. cmp [rsi + 24], rax
  1432. jnz doNegate
  1433. ; it's zero so just set to zero
  1434. mov [rdi + 0], rax
  1435. mov [rdi + 8], rax
  1436. mov [rdi + 16], rax
  1437. mov [rdi + 24], rax
  1438. ret
  1439. doNegate:
  1440. mov rax, [q + 0]
  1441. sub rax, [rsi + 0]
  1442. mov [rdi + 0], rax
  1443. mov rax, [q + 8]
  1444. sbb rax, [rsi + 8]
  1445. mov [rdi + 8], rax
  1446. mov rax, [q + 16]
  1447. sbb rax, [rsi + 16]
  1448. mov [rdi + 16], rax
  1449. mov rax, [q + 24]
  1450. sbb rax, [rsi + 24]
  1451. mov [rdi + 24], rax
  1452. ret
  1453. ;;;;;;;;;;;;;;;;;;;;;;
  1454. ; mul
  1455. ;;;;;;;;;;;;;;;;;;;;;;
  1456. ; Multiplies two elements of any kind
  1457. ; Params:
  1458. ; rsi <= Pointer to element 1
  1459. ; rdx <= Pointer to element 2
  1460. ; rdi <= Pointer to result
  1461. ; [rdi] = [rsi] * [rdi]
  1462. ; Modified Registers:
  1463. ; r8, r9, 10, r11, rax, rcx
  1464. ;;;;;;;;;;;;;;;;;;;;;;
  1465. Fr_mul:
  1466. mov r8, [rsi]
  1467. mov r9, [rdx]
  1468. bt r8, 63 ; Check if is short first operand
  1469. jc mul_l1
  1470. bt r9, 63 ; Check if is short second operand
  1471. jc mul_s1l2
  1472. mul_s1s2: ; Both operands are short
  1473. xor rax, rax
  1474. mov eax, r8d
  1475. imul r9d
  1476. jo mul_manageOverflow ; rsi already is the 64bits result
  1477. mov [rdi], rax ; not necessary to adjust so just save and return
  1478. mul_manageOverflow: ; Do the operation in 64 bits
  1479. push rsi
  1480. movsx rax, r8d
  1481. movsx rcx, r9d
  1482. imul rcx
  1483. mov rsi, rax
  1484. call rawCopyS2L
  1485. pop rsi
  1486. ret
  1487. mul_l1:
  1488. bt r9, 63 ; Check if is short second operand
  1489. jc mul_l1l2
  1490. ;;;;;;;;
  1491. mul_l1s2:
  1492. bt r8, 62 ; check if montgomery first
  1493. jc mul_l1ms2
  1494. mul_l1ns2:
  1495. bt r9, 62 ; check if montgomery first
  1496. jc mul_l1ns2m
  1497. mul_l1ns2n:
  1498. mov r11b, 0xC0
  1499. shl r11, 56
  1500. mov [rdi], r11
  1501. push rsi
  1502. add rsi, 8
  1503. movsx rdx, r9d
  1504. add rdi, 8
  1505. cmp rdx, 0
  1506. jns tmp5
  1507. neg rdx
  1508. call rawMontgomeryMul1
  1509. mov rsi, rdi
  1510. call rawNegL
  1511. sub rdi, 8
  1512. pop rsi
  1513. jmp tmp6
  1514. tmp5:
  1515. call rawMontgomeryMul1
  1516. sub rdi, 8
  1517. pop rsi
  1518. tmp6:
  1519. push rsi
  1520. add rdi, 8
  1521. mov rsi, rdi
  1522. lea rdx, [R3]
  1523. call rawMontgomeryMul
  1524. sub rdi, 8
  1525. pop rsi
  1526. ret
  1527. mul_l1ns2m:
  1528. mov r11b, 0x80
  1529. shl r11, 56
  1530. mov [rdi], r11
  1531. add rdi, 8
  1532. add rsi, 8
  1533. add rdx, 8
  1534. call rawMontgomeryMul
  1535. sub rdi, 8
  1536. sub rsi, 8
  1537. ret
  1538. mul_l1ms2:
  1539. bt r9, 62 ; check if montgomery second
  1540. jc mul_l1ms2m
  1541. mul_l1ms2n:
  1542. mov r11b, 0x80
  1543. shl r11, 56
  1544. mov [rdi], r11
  1545. push rsi
  1546. add rsi, 8
  1547. movsx rdx, r9d
  1548. add rdi, 8
  1549. cmp rdx, 0
  1550. jns tmp7
  1551. neg rdx
  1552. call rawMontgomeryMul1
  1553. mov rsi, rdi
  1554. call rawNegL
  1555. sub rdi, 8
  1556. pop rsi
  1557. jmp tmp8
  1558. tmp7:
  1559. call rawMontgomeryMul1
  1560. sub rdi, 8
  1561. pop rsi
  1562. tmp8:
  1563. ret
  1564. mul_l1ms2m:
  1565. mov r11b, 0xC0
  1566. shl r11, 56
  1567. mov [rdi], r11
  1568. add rdi, 8
  1569. add rsi, 8
  1570. add rdx, 8
  1571. call rawMontgomeryMul
  1572. sub rdi, 8
  1573. sub rsi, 8
  1574. ret
  1575. ;;;;;;;;
  1576. mul_s1l2:
  1577. bt r8, 62 ; check if montgomery first
  1578. jc mul_s1ml2
  1579. mul_s1nl2:
  1580. bt r9, 62 ; check if montgomery first
  1581. jc mul_s1nl2m
  1582. mul_s1nl2n:
  1583. mov r11b, 0xC0
  1584. shl r11, 56
  1585. mov [rdi], r11
  1586. push rsi
  1587. lea rsi, [rdx + 8]
  1588. movsx rdx, r8d
  1589. add rdi, 8
  1590. cmp rdx, 0
  1591. jns tmp9
  1592. neg rdx
  1593. call rawMontgomeryMul1
  1594. mov rsi, rdi
  1595. call rawNegL
  1596. sub rdi, 8
  1597. pop rsi
  1598. jmp tmp10
  1599. tmp9:
  1600. call rawMontgomeryMul1
  1601. sub rdi, 8
  1602. pop rsi
  1603. tmp10:
  1604. push rsi
  1605. add rdi, 8
  1606. mov rsi, rdi
  1607. lea rdx, [R3]
  1608. call rawMontgomeryMul
  1609. sub rdi, 8
  1610. pop rsi
  1611. ret
  1612. mul_s1nl2m:
  1613. mov r11b, 0x80
  1614. shl r11, 56
  1615. mov [rdi], r11
  1616. push rsi
  1617. lea rsi, [rdx + 8]
  1618. movsx rdx, r8d
  1619. add rdi, 8
  1620. cmp rdx, 0
  1621. jns tmp11
  1622. neg rdx
  1623. call rawMontgomeryMul1
  1624. mov rsi, rdi
  1625. call rawNegL
  1626. sub rdi, 8
  1627. pop rsi
  1628. jmp tmp12
  1629. tmp11:
  1630. call rawMontgomeryMul1
  1631. sub rdi, 8
  1632. pop rsi
  1633. tmp12:
  1634. ret
  1635. mul_s1ml2:
  1636. bt r9, 62 ; check if montgomery first
  1637. jc mul_s1ml2m
  1638. mul_s1ml2n:
  1639. mov r11b, 0x80
  1640. shl r11, 56
  1641. mov [rdi], r11
  1642. add rdi, 8
  1643. add rsi, 8
  1644. add rdx, 8
  1645. call rawMontgomeryMul
  1646. sub rdi, 8
  1647. sub rsi, 8
  1648. ret
  1649. mul_s1ml2m:
  1650. mov r11b, 0xC0
  1651. shl r11, 56
  1652. mov [rdi], r11
  1653. add rdi, 8
  1654. add rsi, 8
  1655. add rdx, 8
  1656. call rawMontgomeryMul
  1657. sub rdi, 8
  1658. sub rsi, 8
  1659. ret
  1660. ;;;;
  1661. mul_l1l2:
  1662. bt r8, 62 ; check if montgomery first
  1663. jc mul_l1ml2
  1664. mul_l1nl2:
  1665. bt r9, 62 ; check if montgomery second
  1666. jc mul_l1nl2m
  1667. mul_l1nl2n:
  1668. mov r11b, 0xC0
  1669. shl r11, 56
  1670. mov [rdi], r11
  1671. add rdi, 8
  1672. add rsi, 8
  1673. add rdx, 8
  1674. call rawMontgomeryMul
  1675. sub rdi, 8
  1676. sub rsi, 8
  1677. push rsi
  1678. add rdi, 8
  1679. mov rsi, rdi
  1680. lea rdx, [R3]
  1681. call rawMontgomeryMul
  1682. sub rdi, 8
  1683. pop rsi
  1684. ret
  1685. mul_l1nl2m:
  1686. mov r11b, 0x80
  1687. shl r11, 56
  1688. mov [rdi], r11
  1689. add rdi, 8
  1690. add rsi, 8
  1691. add rdx, 8
  1692. call rawMontgomeryMul
  1693. sub rdi, 8
  1694. sub rsi, 8
  1695. ret
  1696. mul_l1ml2:
  1697. bt r9, 62 ; check if montgomery seconf
  1698. jc mul_l1ml2m
  1699. mul_l1ml2n:
  1700. mov r11b, 0x80
  1701. shl r11, 56
  1702. mov [rdi], r11
  1703. add rdi, 8
  1704. add rsi, 8
  1705. add rdx, 8
  1706. call rawMontgomeryMul
  1707. sub rdi, 8
  1708. sub rsi, 8
  1709. ret
  1710. mul_l1ml2m:
  1711. mov r11b, 0xC0
  1712. shl r11, 56
  1713. mov [rdi], r11
  1714. add rdi, 8
  1715. add rsi, 8
  1716. add rdx, 8
  1717. call rawMontgomeryMul
  1718. sub rdi, 8
  1719. sub rsi, 8
  1720. ret
  1721. ;;;;;;;;;;;;;;;;;;;;;;
  1722. ; and
  1723. ;;;;;;;;;;;;;;;;;;;;;;
  1724. ; Adds two elements of any kind
  1725. ; Params:
  1726. ; rsi <= Pointer to element 1
  1727. ; rdx <= Pointer to element 2
  1728. ; rdi <= Pointer to result
  1729. ; Modified Registers:
  1730. ; r8, r9, 10, r11, rax, rcx
  1731. ;;;;;;;;;;;;;;;;;;;;;;
  1732. Fr_band:
  1733. mov r8, [rsi]
  1734. mov r9, [rdx]
  1735. bt r8, 63 ; Check if is short first operand
  1736. jc and_l1
  1737. bt r9, 63 ; Check if is short second operand
  1738. jc and_s1l2
  1739. and_s1s2:
  1740. cmp r8d, 0
  1741. js tmp13
  1742. cmp r9d, 0
  1743. js tmp13
  1744. xor rdx, rdx ; both ops are positive so do the op and return
  1745. mov edx, r8d
  1746. and edx, r9d
  1747. mov [rdi], rdx ; not necessary to adjust so just save and return
  1748. ret
  1749. tmp13:
  1750. mov r11b, 0x80
  1751. shl r11, 56
  1752. mov [rdi], r11
  1753. push rdi
  1754. push rsi
  1755. mov rdi, rdx
  1756. movsx rsi, r9d
  1757. call rawCopyS2L
  1758. mov rdx, rdi
  1759. pop rsi
  1760. pop rdi
  1761. push rdi
  1762. push rdx
  1763. mov rdi, rsi
  1764. movsx rsi, r8d
  1765. call rawCopyS2L
  1766. mov rsi, rdi
  1767. pop rdx
  1768. pop rdi
  1769. mov rax, [rsi + 8]
  1770. and rax, [rdx + 8]
  1771. mov [rdi + 8 ], rax
  1772. mov rax, [rsi + 16]
  1773. and rax, [rdx + 16]
  1774. mov [rdi + 16 ], rax
  1775. mov rax, [rsi + 24]
  1776. and rax, [rdx + 24]
  1777. mov [rdi + 24 ], rax
  1778. mov rax, [rsi + 32]
  1779. and rax, [rdx + 32]
  1780. and rax, [lboMask]
  1781. mov [rdi + 32 ], rax
  1782. ret
  1783. and_l1:
  1784. bt r9, 63 ; Check if is short second operand
  1785. jc and_l1l2
  1786. and_l1s2:
  1787. bt r8, 62 ; check if montgomery first
  1788. jc and_l1ms2
  1789. and_l1ns2:
  1790. mov r11b, 0x80
  1791. shl r11, 56
  1792. mov [rdi], r11
  1793. cmp r9d, 0
  1794. js tmp14
  1795. movsx rax, r9d
  1796. and rax, [rsi +8]
  1797. mov [rdi+8], rax
  1798. xor rax, rax
  1799. and rax, [rsi + 16];
  1800. mov [rdi + 16 ], rax;
  1801. xor rax, rax
  1802. and rax, [rsi + 24];
  1803. mov [rdi + 24 ], rax;
  1804. xor rax, rax
  1805. and rax, [rsi + 32];
  1806. and rax, [lboMask] ;
  1807. mov [rdi + 32 ], rax;
  1808. ret
  1809. tmp14:
  1810. push rdi
  1811. push rsi
  1812. mov rdi, rdx
  1813. movsx rsi, r9d
  1814. call rawCopyS2L
  1815. mov rdx, rdi
  1816. pop rsi
  1817. pop rdi
  1818. mov r11b, 0x80
  1819. shl r11, 56
  1820. mov [rdi], r11
  1821. mov rax, [rsi + 8]
  1822. and rax, [rdx + 8]
  1823. mov [rdi + 8 ], rax
  1824. mov rax, [rsi + 16]
  1825. and rax, [rdx + 16]
  1826. mov [rdi + 16 ], rax
  1827. mov rax, [rsi + 24]
  1828. and rax, [rdx + 24]
  1829. mov [rdi + 24 ], rax
  1830. mov rax, [rsi + 32]
  1831. and rax, [rdx + 32]
  1832. and rax, [lboMask]
  1833. mov [rdi + 32 ], rax
  1834. ret
  1835. and_l1ms2:
  1836. mov r11b, 0x80
  1837. shl r11, 56
  1838. mov [rdi], r11
  1839. push r9 ; r9 is used in montgomery so we need to save it
  1840. push rdi
  1841. mov rdi, rsi
  1842. mov rsi, rdx
  1843. call Fr_toNormal
  1844. mov rdx, rsi
  1845. mov rsi, rdi
  1846. pop rdi
  1847. pop r9
  1848. cmp r9d, 0
  1849. js tmp15
  1850. movsx rax, r9d
  1851. and rax, [rsi +8]
  1852. mov [rdi+8], rax
  1853. xor rax, rax
  1854. and rax, [rsi + 16];
  1855. mov [rdi + 16 ], rax;
  1856. xor rax, rax
  1857. and rax, [rsi + 24];
  1858. mov [rdi + 24 ], rax;
  1859. xor rax, rax
  1860. and rax, [rsi + 32];
  1861. and rax, [lboMask] ;
  1862. mov [rdi + 32 ], rax;
  1863. ret
  1864. tmp15:
  1865. push rdi
  1866. push rsi
  1867. mov rdi, rdx
  1868. movsx rsi, r9d
  1869. call rawCopyS2L
  1870. mov rdx, rdi
  1871. pop rsi
  1872. pop rdi
  1873. mov r11b, 0x80
  1874. shl r11, 56
  1875. mov [rdi], r11
  1876. mov rax, [rsi + 8]
  1877. and rax, [rdx + 8]
  1878. mov [rdi + 8 ], rax
  1879. mov rax, [rsi + 16]
  1880. and rax, [rdx + 16]
  1881. mov [rdi + 16 ], rax
  1882. mov rax, [rsi + 24]
  1883. and rax, [rdx + 24]
  1884. mov [rdi + 24 ], rax
  1885. mov rax, [rsi + 32]
  1886. and rax, [rdx + 32]
  1887. and rax, [lboMask]
  1888. mov [rdi + 32 ], rax
  1889. ret
  1890. and_s1l2:
  1891. bt r9, 62 ; check if montgomery first
  1892. jc and_s1l2m
  1893. and_s1l2n:
  1894. mov r11b, 0x80
  1895. shl r11, 56
  1896. mov [rdi], r11
  1897. cmp r8d, 0
  1898. js tmp16
  1899. movsx rax, r8d
  1900. and rax, [rdx +8]
  1901. mov [rdi+8], rax
  1902. xor rax, rax
  1903. and rax, [rdx + 16]
  1904. mov [rdi + 16 ], rax
  1905. xor rax, rax
  1906. and rax, [rdx + 24]
  1907. mov [rdi + 24 ], rax
  1908. xor rax, rax
  1909. and rax, [rdx + 32]
  1910. and rax, [lboMask]
  1911. mov [rdi + 32 ], rax
  1912. ret
  1913. tmp16:
  1914. push rdi
  1915. push rdx
  1916. mov rdi, rsi
  1917. movsx rsi, r8d
  1918. call rawCopyS2L
  1919. mov rsi, rdi
  1920. pop rdx
  1921. pop rdi
  1922. mov r11b, 0x80
  1923. shl r11, 56
  1924. mov [rdi], r11
  1925. mov rax, [rsi + 8]
  1926. and rax, [rdx + 8]
  1927. mov [rdi + 8 ], rax
  1928. mov rax, [rsi + 16]
  1929. and rax, [rdx + 16]
  1930. mov [rdi + 16 ], rax
  1931. mov rax, [rsi + 24]
  1932. and rax, [rdx + 24]
  1933. mov [rdi + 24 ], rax
  1934. mov rax, [rsi + 32]
  1935. and rax, [rdx + 32]
  1936. and rax, [lboMask]
  1937. mov [rdi + 32 ], rax
  1938. ret
  1939. and_s1l2m:
  1940. mov r11b, 0x80
  1941. shl r11, 56
  1942. mov [rdi], r11
  1943. push r8 ; r8 is used in montgomery so we need to save it
  1944. push rdi
  1945. mov rdi, rdx
  1946. call Fr_toNormal
  1947. mov rdx, rdi
  1948. pop rdi
  1949. pop r8
  1950. cmp r8d, 0
  1951. js tmp17
  1952. movsx rax, r8d
  1953. and rax, [rdx +8]
  1954. mov [rdi+8], rax
  1955. xor rax, rax
  1956. and rax, [rdx + 16]
  1957. mov [rdi + 16 ], rax
  1958. xor rax, rax
  1959. and rax, [rdx + 24]
  1960. mov [rdi + 24 ], rax
  1961. xor rax, rax
  1962. and rax, [rdx + 32]
  1963. and rax, [lboMask]
  1964. mov [rdi + 32 ], rax
  1965. ret
  1966. tmp17:
  1967. push rdi
  1968. push rdx
  1969. mov rdi, rsi
  1970. movsx rsi, r8d
  1971. call rawCopyS2L
  1972. mov rsi, rdi
  1973. pop rdx
  1974. pop rdi
  1975. mov r11b, 0x80
  1976. shl r11, 56
  1977. mov [rdi], r11
  1978. mov rax, [rsi + 8]
  1979. and rax, [rdx + 8]
  1980. mov [rdi + 8 ], rax
  1981. mov rax, [rsi + 16]
  1982. and rax, [rdx + 16]
  1983. mov [rdi + 16 ], rax
  1984. mov rax, [rsi + 24]
  1985. and rax, [rdx + 24]
  1986. mov [rdi + 24 ], rax
  1987. mov rax, [rsi + 32]
  1988. and rax, [rdx + 32]
  1989. and rax, [lboMask]
  1990. mov [rdi + 32 ], rax
  1991. ret
  1992. and_l1l2:
  1993. bt r8, 62 ; check if montgomery first
  1994. jc and_l1ml2
  1995. bt r9, 62 ; check if montgomery first
  1996. jc and_l1nl2m
  1997. and_l1nl2n:
  1998. mov r11b, 0x80
  1999. shl r11, 56
  2000. mov [rdi], r11
  2001. mov rax, [rsi + 8]
  2002. and rax, [rdx + 8]
  2003. mov [rdi + 8 ], rax
  2004. mov rax, [rsi + 16]
  2005. and rax, [rdx + 16]
  2006. mov [rdi + 16 ], rax
  2007. mov rax, [rsi + 24]
  2008. and rax, [rdx + 24]
  2009. mov [rdi + 24 ], rax
  2010. mov rax, [rsi + 32]
  2011. and rax, [rdx + 32]
  2012. and rax, [lboMask]
  2013. mov [rdi + 32 ], rax
  2014. ret
  2015. and_l1nl2m:
  2016. mov r11b, 0x80
  2017. shl r11, 56
  2018. mov [rdi], r11
  2019. push rdi
  2020. mov rdi, rdx
  2021. call Fr_toNormal
  2022. mov rdx, rdi
  2023. pop rdi
  2024. mov rax, [rsi + 8]
  2025. and rax, [rdx + 8]
  2026. mov [rdi + 8 ], rax
  2027. mov rax, [rsi + 16]
  2028. and rax, [rdx + 16]
  2029. mov [rdi + 16 ], rax
  2030. mov rax, [rsi + 24]
  2031. and rax, [rdx + 24]
  2032. mov [rdi + 24 ], rax
  2033. mov rax, [rsi + 32]
  2034. and rax, [rdx + 32]
  2035. and rax, [lboMask]
  2036. mov [rdi + 32 ], rax
  2037. ret
  2038. and_l1ml2:
  2039. bt r9, 62 ; check if montgomery first
  2040. jc and_l1ml2m
  2041. and_l1ml2n:
  2042. mov r11b, 0x80
  2043. shl r11, 56
  2044. mov [rdi], r11
  2045. push rdi
  2046. mov rdi, rsi
  2047. mov rsi, rdx
  2048. call Fr_toNormal
  2049. mov rdx, rsi
  2050. mov rsi, rdi
  2051. pop rdi
  2052. mov rax, [rsi + 8]
  2053. and rax, [rdx + 8]
  2054. mov [rdi + 8 ], rax
  2055. mov rax, [rsi + 16]
  2056. and rax, [rdx + 16]
  2057. mov [rdi + 16 ], rax
  2058. mov rax, [rsi + 24]
  2059. and rax, [rdx + 24]
  2060. mov [rdi + 24 ], rax
  2061. mov rax, [rsi + 32]
  2062. and rax, [rdx + 32]
  2063. and rax, [lboMask]
  2064. mov [rdi + 32 ], rax
  2065. ret
  2066. and_l1ml2m:
  2067. mov r11b, 0x80
  2068. shl r11, 56
  2069. mov [rdi], r11
  2070. push rdi
  2071. mov rdi, rsi
  2072. mov rsi, rdx
  2073. call Fr_toNormal
  2074. mov rdx, rsi
  2075. mov rsi, rdi
  2076. pop rdi
  2077. push rdi
  2078. mov rdi, rdx
  2079. call Fr_toNormal
  2080. mov rdx, rdi
  2081. pop rdi
  2082. mov rax, [rsi + 8]
  2083. and rax, [rdx + 8]
  2084. mov [rdi + 8 ], rax
  2085. mov rax, [rsi + 16]
  2086. and rax, [rdx + 16]
  2087. mov [rdi + 16 ], rax
  2088. mov rax, [rsi + 24]
  2089. and rax, [rdx + 24]
  2090. mov [rdi + 24 ], rax
  2091. mov rax, [rsi + 32]
  2092. and rax, [rdx + 32]
  2093. and rax, [lboMask]
  2094. mov [rdi + 32 ], rax
  2095. ret
  2096. ;;;;;;;;;;;;;;;;;;;;;;
  2097. ; or
  2098. ;;;;;;;;;;;;;;;;;;;;;;
  2099. ; Adds two elements of any kind
  2100. ; Params:
  2101. ; rsi <= Pointer to element 1
  2102. ; rdx <= Pointer to element 2
  2103. ; rdi <= Pointer to result
  2104. ; Modified Registers:
  2105. ; r8, r9, 10, r11, rax, rcx
  2106. ;;;;;;;;;;;;;;;;;;;;;;
  2107. Fr_bor:
  2108. mov r8, [rsi]
  2109. mov r9, [rdx]
  2110. bt r8, 63 ; Check if is short first operand
  2111. jc or_l1
  2112. bt r9, 63 ; Check if is short second operand
  2113. jc or_s1l2
  2114. or_s1s2:
  2115. cmp r8d, 0
  2116. js tmp18
  2117. cmp r9d, 0
  2118. js tmp18
  2119. xor rdx, rdx ; both ops are positive so do the op and return
  2120. mov edx, r8d
  2121. or edx, r9d
  2122. mov [rdi], rdx ; not necessary to adjust so just save and return
  2123. ret
  2124. tmp18:
  2125. mov r11b, 0x80
  2126. shl r11, 56
  2127. mov [rdi], r11
  2128. push rdi
  2129. push rsi
  2130. mov rdi, rdx
  2131. movsx rsi, r9d
  2132. call rawCopyS2L
  2133. mov rdx, rdi
  2134. pop rsi
  2135. pop rdi
  2136. push rdi
  2137. push rdx
  2138. mov rdi, rsi
  2139. movsx rsi, r8d
  2140. call rawCopyS2L
  2141. mov rsi, rdi
  2142. pop rdx
  2143. pop rdi
  2144. mov rax, [rsi + 8]
  2145. or rax, [rdx + 8]
  2146. mov [rdi + 8 ], rax
  2147. mov rax, [rsi + 16]
  2148. or rax, [rdx + 16]
  2149. mov [rdi + 16 ], rax
  2150. mov rax, [rsi + 24]
  2151. or rax, [rdx + 24]
  2152. mov [rdi + 24 ], rax
  2153. mov rax, [rsi + 32]
  2154. or rax, [rdx + 32]
  2155. and rax, [lboMask]
  2156. mov [rdi + 32 ], rax
  2157. ret
  2158. or_l1:
  2159. bt r9, 63 ; Check if is short second operand
  2160. jc or_l1l2
  2161. or_l1s2:
  2162. bt r8, 62 ; check if montgomery first
  2163. jc or_l1ms2
  2164. or_l1ns2:
  2165. mov r11b, 0x80
  2166. shl r11, 56
  2167. mov [rdi], r11
  2168. cmp r9d, 0
  2169. js tmp19
  2170. movsx rax, r9d
  2171. or rax, [rsi +8]
  2172. mov [rdi+8], rax
  2173. xor rax, rax
  2174. or rax, [rsi + 16];
  2175. mov [rdi + 16 ], rax;
  2176. xor rax, rax
  2177. or rax, [rsi + 24];
  2178. mov [rdi + 24 ], rax;
  2179. xor rax, rax
  2180. or rax, [rsi + 32];
  2181. and rax, [lboMask] ;
  2182. mov [rdi + 32 ], rax;
  2183. ret
  2184. tmp19:
  2185. push rdi
  2186. push rsi
  2187. mov rdi, rdx
  2188. movsx rsi, r9d
  2189. call rawCopyS2L
  2190. mov rdx, rdi
  2191. pop rsi
  2192. pop rdi
  2193. mov r11b, 0x80
  2194. shl r11, 56
  2195. mov [rdi], r11
  2196. mov rax, [rsi + 8]
  2197. or rax, [rdx + 8]
  2198. mov [rdi + 8 ], rax
  2199. mov rax, [rsi + 16]
  2200. or rax, [rdx + 16]
  2201. mov [rdi + 16 ], rax
  2202. mov rax, [rsi + 24]
  2203. or rax, [rdx + 24]
  2204. mov [rdi + 24 ], rax
  2205. mov rax, [rsi + 32]
  2206. or rax, [rdx + 32]
  2207. and rax, [lboMask]
  2208. mov [rdi + 32 ], rax
  2209. ret
  2210. or_l1ms2:
  2211. mov r11b, 0x80
  2212. shl r11, 56
  2213. mov [rdi], r11
  2214. push r9 ; r9 is used in montgomery so we need to save it
  2215. push rdi
  2216. mov rdi, rsi
  2217. mov rsi, rdx
  2218. call Fr_toNormal
  2219. mov rdx, rsi
  2220. mov rsi, rdi
  2221. pop rdi
  2222. pop r9
  2223. cmp r9d, 0
  2224. js tmp20
  2225. movsx rax, r9d
  2226. or rax, [rsi +8]
  2227. mov [rdi+8], rax
  2228. xor rax, rax
  2229. or rax, [rsi + 16];
  2230. mov [rdi + 16 ], rax;
  2231. xor rax, rax
  2232. or rax, [rsi + 24];
  2233. mov [rdi + 24 ], rax;
  2234. xor rax, rax
  2235. or rax, [rsi + 32];
  2236. and rax, [lboMask] ;
  2237. mov [rdi + 32 ], rax;
  2238. ret
  2239. tmp20:
  2240. push rdi
  2241. push rsi
  2242. mov rdi, rdx
  2243. movsx rsi, r9d
  2244. call rawCopyS2L
  2245. mov rdx, rdi
  2246. pop rsi
  2247. pop rdi
  2248. mov r11b, 0x80
  2249. shl r11, 56
  2250. mov [rdi], r11
  2251. mov rax, [rsi + 8]
  2252. or rax, [rdx + 8]
  2253. mov [rdi + 8 ], rax
  2254. mov rax, [rsi + 16]
  2255. or rax, [rdx + 16]
  2256. mov [rdi + 16 ], rax
  2257. mov rax, [rsi + 24]
  2258. or rax, [rdx + 24]
  2259. mov [rdi + 24 ], rax
  2260. mov rax, [rsi + 32]
  2261. or rax, [rdx + 32]
  2262. and rax, [lboMask]
  2263. mov [rdi + 32 ], rax
  2264. ret
  2265. or_s1l2:
  2266. bt r9, 62 ; check if montgomery first
  2267. jc or_s1l2m
  2268. or_s1l2n:
  2269. mov r11b, 0x80
  2270. shl r11, 56
  2271. mov [rdi], r11
  2272. cmp r8d, 0
  2273. js tmp21
  2274. movsx rax, r8d
  2275. or rax, [rdx +8]
  2276. mov [rdi+8], rax
  2277. xor rax, rax
  2278. or rax, [rdx + 16]
  2279. mov [rdi + 16 ], rax
  2280. xor rax, rax
  2281. or rax, [rdx + 24]
  2282. mov [rdi + 24 ], rax
  2283. xor rax, rax
  2284. or rax, [rdx + 32]
  2285. and rax, [lboMask]
  2286. mov [rdi + 32 ], rax
  2287. ret
  2288. tmp21:
  2289. push rdi
  2290. push rdx
  2291. mov rdi, rsi
  2292. movsx rsi, r8d
  2293. call rawCopyS2L
  2294. mov rsi, rdi
  2295. pop rdx
  2296. pop rdi
  2297. mov r11b, 0x80
  2298. shl r11, 56
  2299. mov [rdi], r11
  2300. mov rax, [rsi + 8]
  2301. or rax, [rdx + 8]
  2302. mov [rdi + 8 ], rax
  2303. mov rax, [rsi + 16]
  2304. or rax, [rdx + 16]
  2305. mov [rdi + 16 ], rax
  2306. mov rax, [rsi + 24]
  2307. or rax, [rdx + 24]
  2308. mov [rdi + 24 ], rax
  2309. mov rax, [rsi + 32]
  2310. or rax, [rdx + 32]
  2311. and rax, [lboMask]
  2312. mov [rdi + 32 ], rax
  2313. ret
  2314. or_s1l2m:
  2315. mov r11b, 0x80
  2316. shl r11, 56
  2317. mov [rdi], r11
  2318. push r8 ; r8 is used in montgomery so we need to save it
  2319. push rdi
  2320. mov rdi, rdx
  2321. call Fr_toNormal
  2322. mov rdx, rdi
  2323. pop rdi
  2324. pop r8
  2325. cmp r8d, 0
  2326. js tmp22
  2327. movsx rax, r8d
  2328. or rax, [rdx +8]
  2329. mov [rdi+8], rax
  2330. xor rax, rax
  2331. or rax, [rdx + 16]
  2332. mov [rdi + 16 ], rax
  2333. xor rax, rax
  2334. or rax, [rdx + 24]
  2335. mov [rdi + 24 ], rax
  2336. xor rax, rax
  2337. or rax, [rdx + 32]
  2338. and rax, [lboMask]
  2339. mov [rdi + 32 ], rax
  2340. ret
  2341. tmp22:
  2342. push rdi
  2343. push rdx
  2344. mov rdi, rsi
  2345. movsx rsi, r8d
  2346. call rawCopyS2L
  2347. mov rsi, rdi
  2348. pop rdx
  2349. pop rdi
  2350. mov r11b, 0x80
  2351. shl r11, 56
  2352. mov [rdi], r11
  2353. mov rax, [rsi + 8]
  2354. or rax, [rdx + 8]
  2355. mov [rdi + 8 ], rax
  2356. mov rax, [rsi + 16]
  2357. or rax, [rdx + 16]
  2358. mov [rdi + 16 ], rax
  2359. mov rax, [rsi + 24]
  2360. or rax, [rdx + 24]
  2361. mov [rdi + 24 ], rax
  2362. mov rax, [rsi + 32]
  2363. or rax, [rdx + 32]
  2364. and rax, [lboMask]
  2365. mov [rdi + 32 ], rax
  2366. ret
  2367. or_l1l2:
  2368. bt r8, 62 ; check if montgomery first
  2369. jc or_l1ml2
  2370. bt r9, 62 ; check if montgomery first
  2371. jc or_l1nl2m
  2372. or_l1nl2n:
  2373. mov r11b, 0x80
  2374. shl r11, 56
  2375. mov [rdi], r11
  2376. mov rax, [rsi + 8]
  2377. or rax, [rdx + 8]
  2378. mov [rdi + 8 ], rax
  2379. mov rax, [rsi + 16]
  2380. or rax, [rdx + 16]
  2381. mov [rdi + 16 ], rax
  2382. mov rax, [rsi + 24]
  2383. or rax, [rdx + 24]
  2384. mov [rdi + 24 ], rax
  2385. mov rax, [rsi + 32]
  2386. or rax, [rdx + 32]
  2387. and rax, [lboMask]
  2388. mov [rdi + 32 ], rax
  2389. ret
  2390. or_l1nl2m:
  2391. mov r11b, 0x80
  2392. shl r11, 56
  2393. mov [rdi], r11
  2394. push rdi
  2395. mov rdi, rdx
  2396. call Fr_toNormal
  2397. mov rdx, rdi
  2398. pop rdi
  2399. mov rax, [rsi + 8]
  2400. or rax, [rdx + 8]
  2401. mov [rdi + 8 ], rax
  2402. mov rax, [rsi + 16]
  2403. or rax, [rdx + 16]
  2404. mov [rdi + 16 ], rax
  2405. mov rax, [rsi + 24]
  2406. or rax, [rdx + 24]
  2407. mov [rdi + 24 ], rax
  2408. mov rax, [rsi + 32]
  2409. or rax, [rdx + 32]
  2410. and rax, [lboMask]
  2411. mov [rdi + 32 ], rax
  2412. ret
  2413. or_l1ml2:
  2414. bt r9, 62 ; check if montgomery first
  2415. jc or_l1ml2m
  2416. or_l1ml2n:
  2417. mov r11b, 0x80
  2418. shl r11, 56
  2419. mov [rdi], r11
  2420. push rdi
  2421. mov rdi, rsi
  2422. mov rsi, rdx
  2423. call Fr_toNormal
  2424. mov rdx, rsi
  2425. mov rsi, rdi
  2426. pop rdi
  2427. mov rax, [rsi + 8]
  2428. or rax, [rdx + 8]
  2429. mov [rdi + 8 ], rax
  2430. mov rax, [rsi + 16]
  2431. or rax, [rdx + 16]
  2432. mov [rdi + 16 ], rax
  2433. mov rax, [rsi + 24]
  2434. or rax, [rdx + 24]
  2435. mov [rdi + 24 ], rax
  2436. mov rax, [rsi + 32]
  2437. or rax, [rdx + 32]
  2438. and rax, [lboMask]
  2439. mov [rdi + 32 ], rax
  2440. ret
  2441. or_l1ml2m:
  2442. mov r11b, 0x80
  2443. shl r11, 56
  2444. mov [rdi], r11
  2445. push rdi
  2446. mov rdi, rsi
  2447. mov rsi, rdx
  2448. call Fr_toNormal
  2449. mov rdx, rsi
  2450. mov rsi, rdi
  2451. pop rdi
  2452. push rdi
  2453. mov rdi, rdx
  2454. call Fr_toNormal
  2455. mov rdx, rdi
  2456. pop rdi
  2457. mov rax, [rsi + 8]
  2458. or rax, [rdx + 8]
  2459. mov [rdi + 8 ], rax
  2460. mov rax, [rsi + 16]
  2461. or rax, [rdx + 16]
  2462. mov [rdi + 16 ], rax
  2463. mov rax, [rsi + 24]
  2464. or rax, [rdx + 24]
  2465. mov [rdi + 24 ], rax
  2466. mov rax, [rsi + 32]
  2467. or rax, [rdx + 32]
  2468. and rax, [lboMask]
  2469. mov [rdi + 32 ], rax
  2470. ret
  2471. ;;;;;;;;;;;;;;;;;;;;;;
  2472. ; xor
  2473. ;;;;;;;;;;;;;;;;;;;;;;
  2474. ; Adds two elements of any kind
  2475. ; Params:
  2476. ; rsi <= Pointer to element 1
  2477. ; rdx <= Pointer to element 2
  2478. ; rdi <= Pointer to result
  2479. ; Modified Registers:
  2480. ; r8, r9, 10, r11, rax, rcx
  2481. ;;;;;;;;;;;;;;;;;;;;;;
  2482. Fr_bxor:
  2483. mov r8, [rsi]
  2484. mov r9, [rdx]
  2485. bt r8, 63 ; Check if is short first operand
  2486. jc xor_l1
  2487. bt r9, 63 ; Check if is short second operand
  2488. jc xor_s1l2
  2489. xor_s1s2:
  2490. cmp r8d, 0
  2491. js tmp23
  2492. cmp r9d, 0
  2493. js tmp23
  2494. xor rdx, rdx ; both ops are positive so do the op and return
  2495. mov edx, r8d
  2496. xor edx, r9d
  2497. mov [rdi], rdx ; not necessary to adjust so just save and return
  2498. ret
  2499. tmp23:
  2500. mov r11b, 0x80
  2501. shl r11, 56
  2502. mov [rdi], r11
  2503. push rdi
  2504. push rsi
  2505. mov rdi, rdx
  2506. movsx rsi, r9d
  2507. call rawCopyS2L
  2508. mov rdx, rdi
  2509. pop rsi
  2510. pop rdi
  2511. push rdi
  2512. push rdx
  2513. mov rdi, rsi
  2514. movsx rsi, r8d
  2515. call rawCopyS2L
  2516. mov rsi, rdi
  2517. pop rdx
  2518. pop rdi
  2519. mov rax, [rsi + 8]
  2520. xor rax, [rdx + 8]
  2521. mov [rdi + 8 ], rax
  2522. mov rax, [rsi + 16]
  2523. xor rax, [rdx + 16]
  2524. mov [rdi + 16 ], rax
  2525. mov rax, [rsi + 24]
  2526. xor rax, [rdx + 24]
  2527. mov [rdi + 24 ], rax
  2528. mov rax, [rsi + 32]
  2529. xor rax, [rdx + 32]
  2530. and rax, [lboMask]
  2531. mov [rdi + 32 ], rax
  2532. ret
  2533. xor_l1:
  2534. bt r9, 63 ; Check if is short second operand
  2535. jc xor_l1l2
  2536. xor_l1s2:
  2537. bt r8, 62 ; check if montgomery first
  2538. jc xor_l1ms2
  2539. xor_l1ns2:
  2540. mov r11b, 0x80
  2541. shl r11, 56
  2542. mov [rdi], r11
  2543. cmp r9d, 0
  2544. js tmp24
  2545. movsx rax, r9d
  2546. xor rax, [rsi +8]
  2547. mov [rdi+8], rax
  2548. xor rax, rax
  2549. xor rax, [rsi + 16];
  2550. mov [rdi + 16 ], rax;
  2551. xor rax, rax
  2552. xor rax, [rsi + 24];
  2553. mov [rdi + 24 ], rax;
  2554. xor rax, rax
  2555. xor rax, [rsi + 32];
  2556. and rax, [lboMask] ;
  2557. mov [rdi + 32 ], rax;
  2558. ret
  2559. tmp24:
  2560. push rdi
  2561. push rsi
  2562. mov rdi, rdx
  2563. movsx rsi, r9d
  2564. call rawCopyS2L
  2565. mov rdx, rdi
  2566. pop rsi
  2567. pop rdi
  2568. mov r11b, 0x80
  2569. shl r11, 56
  2570. mov [rdi], r11
  2571. mov rax, [rsi + 8]
  2572. xor rax, [rdx + 8]
  2573. mov [rdi + 8 ], rax
  2574. mov rax, [rsi + 16]
  2575. xor rax, [rdx + 16]
  2576. mov [rdi + 16 ], rax
  2577. mov rax, [rsi + 24]
  2578. xor rax, [rdx + 24]
  2579. mov [rdi + 24 ], rax
  2580. mov rax, [rsi + 32]
  2581. xor rax, [rdx + 32]
  2582. and rax, [lboMask]
  2583. mov [rdi + 32 ], rax
  2584. ret
  2585. xor_l1ms2:
  2586. mov r11b, 0x80
  2587. shl r11, 56
  2588. mov [rdi], r11
  2589. push r9 ; r9 is used in montgomery so we need to save it
  2590. push rdi
  2591. mov rdi, rsi
  2592. mov rsi, rdx
  2593. call Fr_toNormal
  2594. mov rdx, rsi
  2595. mov rsi, rdi
  2596. pop rdi
  2597. pop r9
  2598. cmp r9d, 0
  2599. js tmp25
  2600. movsx rax, r9d
  2601. xor rax, [rsi +8]
  2602. mov [rdi+8], rax
  2603. xor rax, rax
  2604. xor rax, [rsi + 16];
  2605. mov [rdi + 16 ], rax;
  2606. xor rax, rax
  2607. xor rax, [rsi + 24];
  2608. mov [rdi + 24 ], rax;
  2609. xor rax, rax
  2610. xor rax, [rsi + 32];
  2611. and rax, [lboMask] ;
  2612. mov [rdi + 32 ], rax;
  2613. ret
  2614. tmp25:
  2615. push rdi
  2616. push rsi
  2617. mov rdi, rdx
  2618. movsx rsi, r9d
  2619. call rawCopyS2L
  2620. mov rdx, rdi
  2621. pop rsi
  2622. pop rdi
  2623. mov r11b, 0x80
  2624. shl r11, 56
  2625. mov [rdi], r11
  2626. mov rax, [rsi + 8]
  2627. xor rax, [rdx + 8]
  2628. mov [rdi + 8 ], rax
  2629. mov rax, [rsi + 16]
  2630. xor rax, [rdx + 16]
  2631. mov [rdi + 16 ], rax
  2632. mov rax, [rsi + 24]
  2633. xor rax, [rdx + 24]
  2634. mov [rdi + 24 ], rax
  2635. mov rax, [rsi + 32]
  2636. xor rax, [rdx + 32]
  2637. and rax, [lboMask]
  2638. mov [rdi + 32 ], rax
  2639. ret
  2640. xor_s1l2:
  2641. bt r9, 62 ; check if montgomery first
  2642. jc xor_s1l2m
  2643. xor_s1l2n:
  2644. mov r11b, 0x80
  2645. shl r11, 56
  2646. mov [rdi], r11
  2647. cmp r8d, 0
  2648. js tmp26
  2649. movsx rax, r8d
  2650. xor rax, [rdx +8]
  2651. mov [rdi+8], rax
  2652. xor rax, rax
  2653. xor rax, [rdx + 16]
  2654. mov [rdi + 16 ], rax
  2655. xor rax, rax
  2656. xor rax, [rdx + 24]
  2657. mov [rdi + 24 ], rax
  2658. xor rax, rax
  2659. xor rax, [rdx + 32]
  2660. and rax, [lboMask]
  2661. mov [rdi + 32 ], rax
  2662. ret
  2663. tmp26:
  2664. push rdi
  2665. push rdx
  2666. mov rdi, rsi
  2667. movsx rsi, r8d
  2668. call rawCopyS2L
  2669. mov rsi, rdi
  2670. pop rdx
  2671. pop rdi
  2672. mov r11b, 0x80
  2673. shl r11, 56
  2674. mov [rdi], r11
  2675. mov rax, [rsi + 8]
  2676. xor rax, [rdx + 8]
  2677. mov [rdi + 8 ], rax
  2678. mov rax, [rsi + 16]
  2679. xor rax, [rdx + 16]
  2680. mov [rdi + 16 ], rax
  2681. mov rax, [rsi + 24]
  2682. xor rax, [rdx + 24]
  2683. mov [rdi + 24 ], rax
  2684. mov rax, [rsi + 32]
  2685. xor rax, [rdx + 32]
  2686. and rax, [lboMask]
  2687. mov [rdi + 32 ], rax
  2688. ret
  2689. xor_s1l2m:
  2690. mov r11b, 0x80
  2691. shl r11, 56
  2692. mov [rdi], r11
  2693. push r8 ; r8 is used in montgomery so we need to save it
  2694. push rdi
  2695. mov rdi, rdx
  2696. call Fr_toNormal
  2697. mov rdx, rdi
  2698. pop rdi
  2699. pop r8
  2700. cmp r8d, 0
  2701. js tmp27
  2702. movsx rax, r8d
  2703. xor rax, [rdx +8]
  2704. mov [rdi+8], rax
  2705. xor rax, rax
  2706. xor rax, [rdx + 16]
  2707. mov [rdi + 16 ], rax
  2708. xor rax, rax
  2709. xor rax, [rdx + 24]
  2710. mov [rdi + 24 ], rax
  2711. xor rax, rax
  2712. xor rax, [rdx + 32]
  2713. and rax, [lboMask]
  2714. mov [rdi + 32 ], rax
  2715. ret
  2716. tmp27:
  2717. push rdi
  2718. push rdx
  2719. mov rdi, rsi
  2720. movsx rsi, r8d
  2721. call rawCopyS2L
  2722. mov rsi, rdi
  2723. pop rdx
  2724. pop rdi
  2725. mov r11b, 0x80
  2726. shl r11, 56
  2727. mov [rdi], r11
  2728. mov rax, [rsi + 8]
  2729. xor rax, [rdx + 8]
  2730. mov [rdi + 8 ], rax
  2731. mov rax, [rsi + 16]
  2732. xor rax, [rdx + 16]
  2733. mov [rdi + 16 ], rax
  2734. mov rax, [rsi + 24]
  2735. xor rax, [rdx + 24]
  2736. mov [rdi + 24 ], rax
  2737. mov rax, [rsi + 32]
  2738. xor rax, [rdx + 32]
  2739. and rax, [lboMask]
  2740. mov [rdi + 32 ], rax
  2741. ret
  2742. xor_l1l2:
  2743. bt r8, 62 ; check if montgomery first
  2744. jc xor_l1ml2
  2745. bt r9, 62 ; check if montgomery first
  2746. jc xor_l1nl2m
  2747. xor_l1nl2n:
  2748. mov r11b, 0x80
  2749. shl r11, 56
  2750. mov [rdi], r11
  2751. mov rax, [rsi + 8]
  2752. xor rax, [rdx + 8]
  2753. mov [rdi + 8 ], rax
  2754. mov rax, [rsi + 16]
  2755. xor rax, [rdx + 16]
  2756. mov [rdi + 16 ], rax
  2757. mov rax, [rsi + 24]
  2758. xor rax, [rdx + 24]
  2759. mov [rdi + 24 ], rax
  2760. mov rax, [rsi + 32]
  2761. xor rax, [rdx + 32]
  2762. and rax, [lboMask]
  2763. mov [rdi + 32 ], rax
  2764. ret
  2765. xor_l1nl2m:
  2766. mov r11b, 0x80
  2767. shl r11, 56
  2768. mov [rdi], r11
  2769. push rdi
  2770. mov rdi, rdx
  2771. call Fr_toNormal
  2772. mov rdx, rdi
  2773. pop rdi
  2774. mov rax, [rsi + 8]
  2775. xor rax, [rdx + 8]
  2776. mov [rdi + 8 ], rax
  2777. mov rax, [rsi + 16]
  2778. xor rax, [rdx + 16]
  2779. mov [rdi + 16 ], rax
  2780. mov rax, [rsi + 24]
  2781. xor rax, [rdx + 24]
  2782. mov [rdi + 24 ], rax
  2783. mov rax, [rsi + 32]
  2784. xor rax, [rdx + 32]
  2785. and rax, [lboMask]
  2786. mov [rdi + 32 ], rax
  2787. ret
  2788. xor_l1ml2:
  2789. bt r9, 62 ; check if montgomery first
  2790. jc xor_l1ml2m
  2791. xor_l1ml2n:
  2792. mov r11b, 0x80
  2793. shl r11, 56
  2794. mov [rdi], r11
  2795. push rdi
  2796. mov rdi, rsi
  2797. mov rsi, rdx
  2798. call Fr_toNormal
  2799. mov rdx, rsi
  2800. mov rsi, rdi
  2801. pop rdi
  2802. mov rax, [rsi + 8]
  2803. xor rax, [rdx + 8]
  2804. mov [rdi + 8 ], rax
  2805. mov rax, [rsi + 16]
  2806. xor rax, [rdx + 16]
  2807. mov [rdi + 16 ], rax
  2808. mov rax, [rsi + 24]
  2809. xor rax, [rdx + 24]
  2810. mov [rdi + 24 ], rax
  2811. mov rax, [rsi + 32]
  2812. xor rax, [rdx + 32]
  2813. and rax, [lboMask]
  2814. mov [rdi + 32 ], rax
  2815. ret
  2816. xor_l1ml2m:
  2817. mov r11b, 0x80
  2818. shl r11, 56
  2819. mov [rdi], r11
  2820. push rdi
  2821. mov rdi, rsi
  2822. mov rsi, rdx
  2823. call Fr_toNormal
  2824. mov rdx, rsi
  2825. mov rsi, rdi
  2826. pop rdi
  2827. push rdi
  2828. mov rdi, rdx
  2829. call Fr_toNormal
  2830. mov rdx, rdi
  2831. pop rdi
  2832. mov rax, [rsi + 8]
  2833. xor rax, [rdx + 8]
  2834. mov [rdi + 8 ], rax
  2835. mov rax, [rsi + 16]
  2836. xor rax, [rdx + 16]
  2837. mov [rdi + 16 ], rax
  2838. mov rax, [rsi + 24]
  2839. xor rax, [rdx + 24]
  2840. mov [rdi + 24 ], rax
  2841. mov rax, [rsi + 32]
  2842. xor rax, [rdx + 32]
  2843. and rax, [lboMask]
  2844. mov [rdi + 32 ], rax
  2845. ret
  2846. ;;;;;;;;;;;;;;;;;;;;;;
  2847. ; eq
  2848. ;;;;;;;;;;;;;;;;;;;;;;
  2849. ; Adds two elements of any kind
  2850. ; Params:
  2851. ; rsi <= Pointer to element 1
  2852. ; rdx <= Pointer to element 2
  2853. ; rdi <= Pointer to result can be zero or one.
  2854. ; Modified Registers:
  2855. ; r8, r9, 10, r11, rax, rcx
  2856. ;;;;;;;;;;;;;;;;;;;;;;
  2857. Fr_eq:
  2858. sub rsp, 40 ; Save space for the result of the substraction
  2859. push rdi ; Save rdi
  2860. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  2861. call Fr_sub ; Do a substraction
  2862. call Fr_toNormal ; Convert it to normal
  2863. pop rdi
  2864. mov rax, [rsp] ; We already poped do no need to add 8
  2865. bt rax, 63 ; check is result is long
  2866. jc eq_longCmp
  2867. eq_shortCmp:
  2868. cmp eax, 0
  2869. je eq_s_eq
  2870. js eq_s_lt
  2871. eq_s_gt:
  2872. mov qword [rdi], 0
  2873. add rsp, 40
  2874. ret
  2875. eq_s_lt:
  2876. mov qword [rdi], 0
  2877. add rsp, 40
  2878. ret
  2879. eq_s_eq:
  2880. mov qword [rdi], 1
  2881. add rsp, 40
  2882. ret
  2883. eq_longCmp:
  2884. cmp qword [rsp + 32], 0
  2885. jnz eq_neq
  2886. cmp qword [rsp + 24], 0
  2887. jnz eq_neq
  2888. cmp qword [rsp + 16], 0
  2889. jnz eq_neq
  2890. cmp qword [rsp + 8], 0
  2891. jnz eq_neq
  2892. eq_eq:
  2893. mov qword [rdi], 1
  2894. add rsp, 40
  2895. ret
  2896. eq_neq:
  2897. mov qword [rdi], 0
  2898. add rsp, 40
  2899. ret
  2900. ;;;;;;;;;;;;;;;;;;;;;;
  2901. ; neq
  2902. ;;;;;;;;;;;;;;;;;;;;;;
  2903. ; Adds two elements of any kind
  2904. ; Params:
  2905. ; rsi <= Pointer to element 1
  2906. ; rdx <= Pointer to element 2
  2907. ; rdi <= Pointer to result can be zero or one.
  2908. ; Modified Registers:
  2909. ; r8, r9, 10, r11, rax, rcx
  2910. ;;;;;;;;;;;;;;;;;;;;;;
  2911. Fr_neq:
  2912. sub rsp, 40 ; Save space for the result of the substraction
  2913. push rdi ; Save rdi
  2914. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  2915. call Fr_sub ; Do a substraction
  2916. call Fr_toNormal ; Convert it to normal
  2917. pop rdi
  2918. mov rax, [rsp] ; We already poped do no need to add 8
  2919. bt rax, 63 ; check is result is long
  2920. jc neq_longCmp
  2921. neq_shortCmp:
  2922. cmp eax, 0
  2923. je neq_s_eq
  2924. js neq_s_lt
  2925. neq_s_gt:
  2926. mov qword [rdi], 1
  2927. add rsp, 40
  2928. ret
  2929. neq_s_lt:
  2930. mov qword [rdi], 1
  2931. add rsp, 40
  2932. ret
  2933. neq_s_eq:
  2934. mov qword [rdi], 0
  2935. add rsp, 40
  2936. ret
  2937. neq_longCmp:
  2938. cmp qword [rsp + 32], 0
  2939. jnz neq_neq
  2940. cmp qword [rsp + 24], 0
  2941. jnz neq_neq
  2942. cmp qword [rsp + 16], 0
  2943. jnz neq_neq
  2944. cmp qword [rsp + 8], 0
  2945. jnz neq_neq
  2946. neq_eq:
  2947. mov qword [rdi], 0
  2948. add rsp, 40
  2949. ret
  2950. neq_neq:
  2951. mov qword [rdi], 1
  2952. add rsp, 40
  2953. ret
  2954. ;;;;;;;;;;;;;;;;;;;;;;
  2955. ; lt
  2956. ;;;;;;;;;;;;;;;;;;;;;;
  2957. ; Adds two elements of any kind
  2958. ; Params:
  2959. ; rsi <= Pointer to element 1
  2960. ; rdx <= Pointer to element 2
  2961. ; rdi <= Pointer to result can be zero or one.
  2962. ; Modified Registers:
  2963. ; r8, r9, 10, r11, rax, rcx
  2964. ;;;;;;;;;;;;;;;;;;;;;;
  2965. Fr_lt:
  2966. sub rsp, 40 ; Save space for the result of the substraction
  2967. push rdi ; Save rdi
  2968. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  2969. call Fr_sub ; Do a substraction
  2970. call Fr_toNormal ; Convert it to normal
  2971. pop rdi
  2972. mov rax, [rsp] ; We already poped do no need to add 8
  2973. bt rax, 63 ; check is result is long
  2974. jc lt_longCmp
  2975. lt_shortCmp:
  2976. cmp eax, 0
  2977. je lt_s_eq
  2978. js lt_s_lt
  2979. lt_s_gt:
  2980. mov qword [rdi], 0
  2981. add rsp, 40
  2982. ret
  2983. lt_s_lt:
  2984. mov qword [rdi], 1
  2985. add rsp, 40
  2986. ret
  2987. lt_s_eq:
  2988. mov qword [rdi], 0
  2989. add rsp, 40
  2990. ret
  2991. lt_longCmp:
  2992. cmp qword [rsp + 32], 0
  2993. jnz lt_neq
  2994. cmp qword [rsp + 24], 0
  2995. jnz lt_neq
  2996. cmp qword [rsp + 16], 0
  2997. jnz lt_neq
  2998. cmp qword [rsp + 8], 0
  2999. jnz lt_neq
  3000. lt_eq:
  3001. mov qword [rdi], 0
  3002. add rsp, 40
  3003. ret
  3004. mov rax, [rsp + 32]
  3005. cmp [half + 24], rax ; comare with (q-1)/2
  3006. jc tmp29 ; half<rax => e1-e2 is neg => e1 < e2
  3007. jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2
  3008. mov rax, [rsp + 24]
  3009. cmp [half + 16], rax ; comare with (q-1)/2
  3010. jc tmp29 ; half<rax => e1-e2 is neg => e1 < e2
  3011. jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2
  3012. mov rax, [rsp + 16]
  3013. cmp [half + 8], rax ; comare with (q-1)/2
  3014. jc tmp29 ; half<rax => e1-e2 is neg => e1 < e2
  3015. jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2
  3016. mov rax, [rsp + 8]
  3017. cmp [half + 0], rax ; comare with (q-1)/2
  3018. jc tmp29 ; half<rax => e1-e2 is neg => e1 < e2
  3019. jnz tmp28 ; half>rax => e1 -e2 is pos => e1 > e2
  3020. ; half == rax => e1-e2 is pos => e1 > e2
  3021. tmp28:
  3022. mov qword [rdi], 0
  3023. add rsp, 40
  3024. ret
  3025. tmp29:
  3026. mov qword [rdi], 1
  3027. add rsp, 40
  3028. ret
  3029. lt_neq:
  3030. mov rax, [rsp + 32]
  3031. cmp [half + 24], rax ; comare with (q-1)/2
  3032. jc tmp31 ; half<rax => e1-e2 is neg => e1 < e2
  3033. jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2
  3034. mov rax, [rsp + 24]
  3035. cmp [half + 16], rax ; comare with (q-1)/2
  3036. jc tmp31 ; half<rax => e1-e2 is neg => e1 < e2
  3037. jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2
  3038. mov rax, [rsp + 16]
  3039. cmp [half + 8], rax ; comare with (q-1)/2
  3040. jc tmp31 ; half<rax => e1-e2 is neg => e1 < e2
  3041. jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2
  3042. mov rax, [rsp + 8]
  3043. cmp [half + 0], rax ; comare with (q-1)/2
  3044. jc tmp31 ; half<rax => e1-e2 is neg => e1 < e2
  3045. jnz tmp30 ; half>rax => e1 -e2 is pos => e1 > e2
  3046. ; half == rax => e1-e2 is pos => e1 > e2
  3047. tmp30:
  3048. mov qword [rdi], 0
  3049. add rsp, 40
  3050. ret
  3051. tmp31:
  3052. mov qword [rdi], 1
  3053. add rsp, 40
  3054. ret
  3055. ;;;;;;;;;;;;;;;;;;;;;;
  3056. ; gt
  3057. ;;;;;;;;;;;;;;;;;;;;;;
  3058. ; Adds two elements of any kind
  3059. ; Params:
  3060. ; rsi <= Pointer to element 1
  3061. ; rdx <= Pointer to element 2
  3062. ; rdi <= Pointer to result can be zero or one.
  3063. ; Modified Registers:
  3064. ; r8, r9, 10, r11, rax, rcx
  3065. ;;;;;;;;;;;;;;;;;;;;;;
  3066. Fr_gt:
  3067. sub rsp, 40 ; Save space for the result of the substraction
  3068. push rdi ; Save rdi
  3069. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3070. call Fr_sub ; Do a substraction
  3071. call Fr_toNormal ; Convert it to normal
  3072. pop rdi
  3073. mov rax, [rsp] ; We already poped do no need to add 8
  3074. bt rax, 63 ; check is result is long
  3075. jc gt_longCmp
  3076. gt_shortCmp:
  3077. cmp eax, 0
  3078. je gt_s_eq
  3079. js gt_s_lt
  3080. gt_s_gt:
  3081. mov qword [rdi], 1
  3082. add rsp, 40
  3083. ret
  3084. gt_s_lt:
  3085. mov qword [rdi], 0
  3086. add rsp, 40
  3087. ret
  3088. gt_s_eq:
  3089. mov qword [rdi], 0
  3090. add rsp, 40
  3091. ret
  3092. gt_longCmp:
  3093. cmp qword [rsp + 32], 0
  3094. jnz gt_neq
  3095. cmp qword [rsp + 24], 0
  3096. jnz gt_neq
  3097. cmp qword [rsp + 16], 0
  3098. jnz gt_neq
  3099. cmp qword [rsp + 8], 0
  3100. jnz gt_neq
  3101. gt_eq:
  3102. mov qword [rdi], 0
  3103. add rsp, 40
  3104. ret
  3105. mov rax, [rsp + 32]
  3106. cmp [half + 24], rax ; comare with (q-1)/2
  3107. jc tmp33 ; half<rax => e1-e2 is neg => e1 < e2
  3108. jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2
  3109. mov rax, [rsp + 24]
  3110. cmp [half + 16], rax ; comare with (q-1)/2
  3111. jc tmp33 ; half<rax => e1-e2 is neg => e1 < e2
  3112. jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2
  3113. mov rax, [rsp + 16]
  3114. cmp [half + 8], rax ; comare with (q-1)/2
  3115. jc tmp33 ; half<rax => e1-e2 is neg => e1 < e2
  3116. jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2
  3117. mov rax, [rsp + 8]
  3118. cmp [half + 0], rax ; comare with (q-1)/2
  3119. jc tmp33 ; half<rax => e1-e2 is neg => e1 < e2
  3120. jnz tmp32 ; half>rax => e1 -e2 is pos => e1 > e2
  3121. ; half == rax => e1-e2 is pos => e1 > e2
  3122. tmp32:
  3123. mov qword [rdi], 1
  3124. add rsp, 40
  3125. ret
  3126. tmp33:
  3127. mov qword [rdi], 0
  3128. add rsp, 40
  3129. ret
  3130. gt_neq:
  3131. mov rax, [rsp + 32]
  3132. cmp [half + 24], rax ; comare with (q-1)/2
  3133. jc tmp35 ; half<rax => e1-e2 is neg => e1 < e2
  3134. jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2
  3135. mov rax, [rsp + 24]
  3136. cmp [half + 16], rax ; comare with (q-1)/2
  3137. jc tmp35 ; half<rax => e1-e2 is neg => e1 < e2
  3138. jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2
  3139. mov rax, [rsp + 16]
  3140. cmp [half + 8], rax ; comare with (q-1)/2
  3141. jc tmp35 ; half<rax => e1-e2 is neg => e1 < e2
  3142. jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2
  3143. mov rax, [rsp + 8]
  3144. cmp [half + 0], rax ; comare with (q-1)/2
  3145. jc tmp35 ; half<rax => e1-e2 is neg => e1 < e2
  3146. jnz tmp34 ; half>rax => e1 -e2 is pos => e1 > e2
  3147. ; half == rax => e1-e2 is pos => e1 > e2
  3148. tmp34:
  3149. mov qword [rdi], 1
  3150. add rsp, 40
  3151. ret
  3152. tmp35:
  3153. mov qword [rdi], 0
  3154. add rsp, 40
  3155. ret
  3156. ;;;;;;;;;;;;;;;;;;;;;;
  3157. ; leq
  3158. ;;;;;;;;;;;;;;;;;;;;;;
  3159. ; Adds two elements of any kind
  3160. ; Params:
  3161. ; rsi <= Pointer to element 1
  3162. ; rdx <= Pointer to element 2
  3163. ; rdi <= Pointer to result can be zero or one.
  3164. ; Modified Registers:
  3165. ; r8, r9, 10, r11, rax, rcx
  3166. ;;;;;;;;;;;;;;;;;;;;;;
  3167. Fr_leq:
  3168. sub rsp, 40 ; Save space for the result of the substraction
  3169. push rdi ; Save rdi
  3170. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3171. call Fr_sub ; Do a substraction
  3172. call Fr_toNormal ; Convert it to normal
  3173. pop rdi
  3174. mov rax, [rsp] ; We already poped do no need to add 8
  3175. bt rax, 63 ; check is result is long
  3176. jc leq_longCmp
  3177. leq_shortCmp:
  3178. cmp eax, 0
  3179. je leq_s_eq
  3180. js leq_s_lt
  3181. leq_s_gt:
  3182. mov qword [rdi], 0
  3183. add rsp, 40
  3184. ret
  3185. leq_s_lt:
  3186. mov qword [rdi], 1
  3187. add rsp, 40
  3188. ret
  3189. leq_s_eq:
  3190. mov qword [rdi], 1
  3191. add rsp, 40
  3192. ret
  3193. leq_longCmp:
  3194. cmp qword [rsp + 32], 0
  3195. jnz leq_neq
  3196. cmp qword [rsp + 24], 0
  3197. jnz leq_neq
  3198. cmp qword [rsp + 16], 0
  3199. jnz leq_neq
  3200. cmp qword [rsp + 8], 0
  3201. jnz leq_neq
  3202. leq_eq:
  3203. mov qword [rdi], 1
  3204. add rsp, 40
  3205. ret
  3206. mov rax, [rsp + 32]
  3207. cmp [half + 24], rax ; comare with (q-1)/2
  3208. jc tmp37 ; half<rax => e1-e2 is neg => e1 < e2
  3209. jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2
  3210. mov rax, [rsp + 24]
  3211. cmp [half + 16], rax ; comare with (q-1)/2
  3212. jc tmp37 ; half<rax => e1-e2 is neg => e1 < e2
  3213. jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2
  3214. mov rax, [rsp + 16]
  3215. cmp [half + 8], rax ; comare with (q-1)/2
  3216. jc tmp37 ; half<rax => e1-e2 is neg => e1 < e2
  3217. jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2
  3218. mov rax, [rsp + 8]
  3219. cmp [half + 0], rax ; comare with (q-1)/2
  3220. jc tmp37 ; half<rax => e1-e2 is neg => e1 < e2
  3221. jnz tmp36 ; half>rax => e1 -e2 is pos => e1 > e2
  3222. ; half == rax => e1-e2 is pos => e1 > e2
  3223. tmp36:
  3224. mov qword [rdi], 0
  3225. add rsp, 40
  3226. ret
  3227. tmp37:
  3228. mov qword [rdi], 1
  3229. add rsp, 40
  3230. ret
  3231. leq_neq:
  3232. mov rax, [rsp + 32]
  3233. cmp [half + 24], rax ; comare with (q-1)/2
  3234. jc tmp39 ; half<rax => e1-e2 is neg => e1 < e2
  3235. jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2
  3236. mov rax, [rsp + 24]
  3237. cmp [half + 16], rax ; comare with (q-1)/2
  3238. jc tmp39 ; half<rax => e1-e2 is neg => e1 < e2
  3239. jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2
  3240. mov rax, [rsp + 16]
  3241. cmp [half + 8], rax ; comare with (q-1)/2
  3242. jc tmp39 ; half<rax => e1-e2 is neg => e1 < e2
  3243. jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2
  3244. mov rax, [rsp + 8]
  3245. cmp [half + 0], rax ; comare with (q-1)/2
  3246. jc tmp39 ; half<rax => e1-e2 is neg => e1 < e2
  3247. jnz tmp38 ; half>rax => e1 -e2 is pos => e1 > e2
  3248. ; half == rax => e1-e2 is pos => e1 > e2
  3249. tmp38:
  3250. mov qword [rdi], 0
  3251. add rsp, 40
  3252. ret
  3253. tmp39:
  3254. mov qword [rdi], 1
  3255. add rsp, 40
  3256. ret
  3257. ;;;;;;;;;;;;;;;;;;;;;;
  3258. ; geq
  3259. ;;;;;;;;;;;;;;;;;;;;;;
  3260. ; Adds two elements of any kind
  3261. ; Params:
  3262. ; rsi <= Pointer to element 1
  3263. ; rdx <= Pointer to element 2
  3264. ; rdi <= Pointer to result can be zero or one.
  3265. ; Modified Registers:
  3266. ; r8, r9, 10, r11, rax, rcx
  3267. ;;;;;;;;;;;;;;;;;;;;;;
  3268. Fr_geq:
  3269. sub rsp, 40 ; Save space for the result of the substraction
  3270. push rdi ; Save rdi
  3271. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3272. call Fr_sub ; Do a substraction
  3273. call Fr_toNormal ; Convert it to normal
  3274. pop rdi
  3275. mov rax, [rsp] ; We already poped do no need to add 8
  3276. bt rax, 63 ; check is result is long
  3277. jc geq_longCmp
  3278. geq_shortCmp:
  3279. cmp eax, 0
  3280. je geq_s_eq
  3281. js geq_s_lt
  3282. geq_s_gt:
  3283. mov qword [rdi], 1
  3284. add rsp, 40
  3285. ret
  3286. geq_s_lt:
  3287. mov qword [rdi], 0
  3288. add rsp, 40
  3289. ret
  3290. geq_s_eq:
  3291. mov qword [rdi], 1
  3292. add rsp, 40
  3293. ret
  3294. geq_longCmp:
  3295. cmp qword [rsp + 32], 0
  3296. jnz geq_neq
  3297. cmp qword [rsp + 24], 0
  3298. jnz geq_neq
  3299. cmp qword [rsp + 16], 0
  3300. jnz geq_neq
  3301. cmp qword [rsp + 8], 0
  3302. jnz geq_neq
  3303. geq_eq:
  3304. mov qword [rdi], 1
  3305. add rsp, 40
  3306. ret
  3307. mov rax, [rsp + 32]
  3308. cmp [half + 24], rax ; comare with (q-1)/2
  3309. jc tmp41 ; half<rax => e1-e2 is neg => e1 < e2
  3310. jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2
  3311. mov rax, [rsp + 24]
  3312. cmp [half + 16], rax ; comare with (q-1)/2
  3313. jc tmp41 ; half<rax => e1-e2 is neg => e1 < e2
  3314. jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2
  3315. mov rax, [rsp + 16]
  3316. cmp [half + 8], rax ; comare with (q-1)/2
  3317. jc tmp41 ; half<rax => e1-e2 is neg => e1 < e2
  3318. jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2
  3319. mov rax, [rsp + 8]
  3320. cmp [half + 0], rax ; comare with (q-1)/2
  3321. jc tmp41 ; half<rax => e1-e2 is neg => e1 < e2
  3322. jnz tmp40 ; half>rax => e1 -e2 is pos => e1 > e2
  3323. ; half == rax => e1-e2 is pos => e1 > e2
  3324. tmp40:
  3325. mov qword [rdi], 1
  3326. add rsp, 40
  3327. ret
  3328. tmp41:
  3329. mov qword [rdi], 0
  3330. add rsp, 40
  3331. ret
  3332. geq_neq:
  3333. mov rax, [rsp + 32]
  3334. cmp [half + 24], rax ; comare with (q-1)/2
  3335. jc tmp43 ; half<rax => e1-e2 is neg => e1 < e2
  3336. jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2
  3337. mov rax, [rsp + 24]
  3338. cmp [half + 16], rax ; comare with (q-1)/2
  3339. jc tmp43 ; half<rax => e1-e2 is neg => e1 < e2
  3340. jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2
  3341. mov rax, [rsp + 16]
  3342. cmp [half + 8], rax ; comare with (q-1)/2
  3343. jc tmp43 ; half<rax => e1-e2 is neg => e1 < e2
  3344. jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2
  3345. mov rax, [rsp + 8]
  3346. cmp [half + 0], rax ; comare with (q-1)/2
  3347. jc tmp43 ; half<rax => e1-e2 is neg => e1 < e2
  3348. jnz tmp42 ; half>rax => e1 -e2 is pos => e1 > e2
  3349. ; half == rax => e1-e2 is pos => e1 > e2
  3350. tmp42:
  3351. mov qword [rdi], 1
  3352. add rsp, 40
  3353. ret
  3354. tmp43:
  3355. mov qword [rdi], 0
  3356. add rsp, 40
  3357. ret
  3358. section .data
  3359. Fr_q:
  3360. dd 0
  3361. dd 0x80000000
  3362. q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029
  3363. half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014
  3364. R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5
  3365. R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c
  3366. lboMask dq 0x1fffffffffffffff