You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

5841 lines
107 KiB

  1. global Fr_copy
  2. global Fr_copyn
  3. global Fr_add
  4. global Fr_sub
  5. global Fr_neg
  6. global Fr_mul
  7. global Fr_square
  8. global Fr_band
  9. global Fr_bor
  10. global Fr_bxor
  11. global Fr_bnot
  12. global Fr_eq
  13. global Fr_neq
  14. global Fr_lt
  15. global Fr_gt
  16. global Fr_leq
  17. global Fr_geq
  18. global Fr_land
  19. global Fr_lor
  20. global Fr_lnot
  21. global Fr_toNormal
  22. global Fr_toLongNormal
  23. global Fr_toMontgomery
  24. global Fr_toInt
  25. global Fr_isTrue
  26. global Fr_q
  27. extern Fr_fail
  28. DEFAULT REL
  29. section .text
  30. ;;;;;;;;;;;;;;;;;;;;;;
  31. ; copy
  32. ;;;;;;;;;;;;;;;;;;;;;;
  33. ; Copies
  34. ; Params:
  35. ; rsi <= the src
  36. ; rdi <= the dest
  37. ;
  38. ; Nidified registers:
  39. ; rax
  40. ;;;;;;;;;;;;;;;;;;;;;;;
  41. Fr_copy:
  42. mov rax, [rsi + 0]
  43. mov [rdi + 0], rax
  44. mov rax, [rsi + 8]
  45. mov [rdi + 8], rax
  46. mov rax, [rsi + 16]
  47. mov [rdi + 16], rax
  48. mov rax, [rsi + 24]
  49. mov [rdi + 24], rax
  50. mov rax, [rsi + 32]
  51. mov [rdi + 32], rax
  52. ret
  53. ;;;;;;;;;;;;;;;;;;;;;;
  54. ; copy an array of integers
  55. ;;;;;;;;;;;;;;;;;;;;;;
  56. ; Copies
  57. ; Params:
  58. ; rsi <= the src
  59. ; rdi <= the dest
  60. ; rdx <= number of integers to copy
  61. ;
  62. ; Nidified registers:
  63. ; rax
  64. ;;;;;;;;;;;;;;;;;;;;;;;
  65. Fr_copyn:
  66. Fr_copyn_loop:
  67. mov r8, rsi
  68. mov r9, rdi
  69. mov rax, 5
  70. mul rdx
  71. mov rcx, rax
  72. cld
  73. rep movsq
  74. mov rsi, r8
  75. mov rdi, r9
  76. ret
  77. ;;;;;;;;;;;;;;;;;;;;;;
  78. ; rawCopyS2L
  79. ;;;;;;;;;;;;;;;;;;;;;;
  80. ; Convert a 64 bit integer to a long format field element
  81. ; Params:
  82. ; rsi <= the integer
  83. ; rdi <= Pointer to the overwritted element
  84. ;
  85. ; Nidified registers:
  86. ; rax
  87. ;;;;;;;;;;;;;;;;;;;;;;;
  88. rawCopyS2L:
  89. mov al, 0x80
  90. shl rax, 56
  91. mov [rdi], rax ; set the result to LONG normal
  92. cmp rsi, 0
  93. js u64toLong_adjust_neg
  94. mov [rdi + 8], rsi
  95. xor rax, rax
  96. mov [rdi + 16], rax
  97. mov [rdi + 24], rax
  98. mov [rdi + 32], rax
  99. ret
  100. u64toLong_adjust_neg:
  101. add rsi, [q] ; Set the first digit
  102. mov [rdi + 8], rsi ;
  103. mov rsi, -1 ; all ones
  104. mov rax, rsi ; Add to q
  105. adc rax, [q + 8 ]
  106. mov [rdi + 16], rax
  107. mov rax, rsi ; Add to q
  108. adc rax, [q + 16 ]
  109. mov [rdi + 24], rax
  110. mov rax, rsi ; Add to q
  111. adc rax, [q + 24 ]
  112. mov [rdi + 32], rax
  113. ret
  114. ;;;;;;;;;;;;;;;;;;;;;;
  115. ; toInt
  116. ;;;;;;;;;;;;;;;;;;;;;;
  117. ; Convert a 64 bit integer to a long format field element
  118. ; Params:
  119. ; rsi <= Pointer to the element
  120. ; Returs:
  121. ; rax <= The value
  122. ;;;;;;;;;;;;;;;;;;;;;;;
  123. Fr_toInt:
  124. mov rax, [rdi]
  125. bt rax, 63
  126. jc Fr_long
  127. movsx rax, eax
  128. ret
  129. Fr_long:
  130. mov rax, [rdi + 8]
  131. mov rcx, rax
  132. shr rcx, 31
  133. jnz Fr_longNeg
  134. mov rcx, [rdi + 16]
  135. test rcx, rcx
  136. jnz Fr_longNeg
  137. mov rcx, [rdi + 24]
  138. test rcx, rcx
  139. jnz Fr_longNeg
  140. mov rcx, [rdi + 32]
  141. test rcx, rcx
  142. jnz Fr_longNeg
  143. ret
  144. Fr_longNeg:
  145. mov rax, [rdi + 8]
  146. sub rax, [q]
  147. jnc Fr_longErr
  148. mov rcx, [rdi + 16]
  149. sbb rcx, [q + 8]
  150. jnc Fr_longErr
  151. mov rcx, [rdi + 24]
  152. sbb rcx, [q + 16]
  153. jnc Fr_longErr
  154. mov rcx, [rdi + 32]
  155. sbb rcx, [q + 24]
  156. jnc Fr_longErr
  157. mov rcx, rax
  158. sar rcx, 31
  159. add rcx, 1
  160. jnz Fr_longErr
  161. ret
  162. Fr_longErr:
  163. push rdi
  164. mov rdi, 0
  165. call Fr_fail
  166. pop rdi
  167. ;;;;;;;;;;;;;;;;;;;;;;
  168. ; rawMontgomeryMul
  169. ;;;;;;;;;;;;;;;;;;;;;;
  170. ; Multiply two elements in montgomery form
  171. ; Params:
  172. ; rsi <= Pointer to the long data of element 1
  173. ; rdx <= Pointer to the long data of element 2
  174. ; rdi <= Pointer to the long data of result
  175. ; Modified registers:
  176. ; r8, r9, 10, r11, rax, rcx
  177. ;;;;;;;;;;;;;;;;;;;;;;
  178. rawMontgomeryMul:
  179. sub rsp, 32 ; Reserve space for ms
  180. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  181. mov r11, 0xc2e1f593efffffff ; np
  182. xor r8,r8
  183. xor r9,r9
  184. xor r10,r10
  185. mov rax, [rsi + 0]
  186. mul qword [rcx + 0]
  187. add r8, rax
  188. adc r9, rdx
  189. adc r10, 0x0
  190. mov rax, r8
  191. mul r11
  192. mov [rsp + 0], rax
  193. mul qword [q]
  194. add r8, rax
  195. adc r9, rdx
  196. adc r10, 0x0
  197. mov rax, [rsi + 0]
  198. mul qword [rcx + 8]
  199. add r9, rax
  200. adc r10, rdx
  201. adc r8, 0x0
  202. mov rax, [rsi + 8]
  203. mul qword [rcx + 0]
  204. add r9, rax
  205. adc r10, rdx
  206. adc r8, 0x0
  207. mov rax, [rsp + 0]
  208. mul qword [q + 8]
  209. add r9, rax
  210. adc r10, rdx
  211. adc r8, 0x0
  212. mov rax, r9
  213. mul r11
  214. mov [rsp + 8], rax
  215. mul qword [q]
  216. add r9, rax
  217. adc r10, rdx
  218. adc r8, 0x0
  219. mov rax, [rsi + 0]
  220. mul qword [rcx + 16]
  221. add r10, rax
  222. adc r8, rdx
  223. adc r9, 0x0
  224. mov rax, [rsi + 8]
  225. mul qword [rcx + 8]
  226. add r10, rax
  227. adc r8, rdx
  228. adc r9, 0x0
  229. mov rax, [rsi + 16]
  230. mul qword [rcx + 0]
  231. add r10, rax
  232. adc r8, rdx
  233. adc r9, 0x0
  234. mov rax, [rsp + 8]
  235. mul qword [q + 8]
  236. add r10, rax
  237. adc r8, rdx
  238. adc r9, 0x0
  239. mov rax, [rsp + 0]
  240. mul qword [q + 16]
  241. add r10, rax
  242. adc r8, rdx
  243. adc r9, 0x0
  244. mov rax, r10
  245. mul r11
  246. mov [rsp + 16], rax
  247. mul qword [q]
  248. add r10, rax
  249. adc r8, rdx
  250. adc r9, 0x0
  251. mov rax, [rsi + 0]
  252. mul qword [rcx + 24]
  253. add r8, rax
  254. adc r9, rdx
  255. adc r10, 0x0
  256. mov rax, [rsi + 8]
  257. mul qword [rcx + 16]
  258. add r8, rax
  259. adc r9, rdx
  260. adc r10, 0x0
  261. mov rax, [rsi + 16]
  262. mul qword [rcx + 8]
  263. add r8, rax
  264. adc r9, rdx
  265. adc r10, 0x0
  266. mov rax, [rsi + 24]
  267. mul qword [rcx + 0]
  268. add r8, rax
  269. adc r9, rdx
  270. adc r10, 0x0
  271. mov rax, [rsp + 16]
  272. mul qword [q + 8]
  273. add r8, rax
  274. adc r9, rdx
  275. adc r10, 0x0
  276. mov rax, [rsp + 8]
  277. mul qword [q + 16]
  278. add r8, rax
  279. adc r9, rdx
  280. adc r10, 0x0
  281. mov rax, [rsp + 0]
  282. mul qword [q + 24]
  283. add r8, rax
  284. adc r9, rdx
  285. adc r10, 0x0
  286. mov rax, r8
  287. mul r11
  288. mov [rsp + 24], rax
  289. mul qword [q]
  290. add r8, rax
  291. adc r9, rdx
  292. adc r10, 0x0
  293. mov rax, [rsi + 8]
  294. mul qword [rcx + 24]
  295. add r9, rax
  296. adc r10, rdx
  297. adc r8, 0x0
  298. mov rax, [rsi + 16]
  299. mul qword [rcx + 16]
  300. add r9, rax
  301. adc r10, rdx
  302. adc r8, 0x0
  303. mov rax, [rsi + 24]
  304. mul qword [rcx + 8]
  305. add r9, rax
  306. adc r10, rdx
  307. adc r8, 0x0
  308. mov rax, [rsp + 24]
  309. mul qword [q + 8]
  310. add r9, rax
  311. adc r10, rdx
  312. adc r8, 0x0
  313. mov rax, [rsp + 16]
  314. mul qword [q + 16]
  315. add r9, rax
  316. adc r10, rdx
  317. adc r8, 0x0
  318. mov rax, [rsp + 8]
  319. mul qword [q + 24]
  320. add r9, rax
  321. adc r10, rdx
  322. adc r8, 0x0
  323. mov [rdi + 0 ], r9
  324. xor r9,r9
  325. mov rax, [rsi + 16]
  326. mul qword [rcx + 24]
  327. add r10, rax
  328. adc r8, rdx
  329. adc r9, 0x0
  330. mov rax, [rsi + 24]
  331. mul qword [rcx + 16]
  332. add r10, rax
  333. adc r8, rdx
  334. adc r9, 0x0
  335. mov rax, [rsp + 24]
  336. mul qword [q + 16]
  337. add r10, rax
  338. adc r8, rdx
  339. adc r9, 0x0
  340. mov rax, [rsp + 16]
  341. mul qword [q + 24]
  342. add r10, rax
  343. adc r8, rdx
  344. adc r9, 0x0
  345. mov [rdi + 8 ], r10
  346. xor r10,r10
  347. mov rax, [rsi + 24]
  348. mul qword [rcx + 24]
  349. add r8, rax
  350. adc r9, rdx
  351. adc r10, 0x0
  352. mov rax, [rsp + 24]
  353. mul qword [q + 24]
  354. add r8, rax
  355. adc r9, rdx
  356. adc r10, 0x0
  357. mov [rdi + 16 ], r8
  358. xor r8,r8
  359. mov [rdi + 24 ], r9
  360. xor r9,r9
  361. test r10, r10
  362. jnz rawMontgomeryMul_mulM_sq
  363. ; Compare with q
  364. mov rax, [rdi + 24]
  365. cmp rax, [q + 24]
  366. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  367. jnz rawMontgomeryMul_mulM_sq ; q is lower
  368. mov rax, [rdi + 16]
  369. cmp rax, [q + 16]
  370. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  371. jnz rawMontgomeryMul_mulM_sq ; q is lower
  372. mov rax, [rdi + 8]
  373. cmp rax, [q + 8]
  374. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  375. jnz rawMontgomeryMul_mulM_sq ; q is lower
  376. mov rax, [rdi + 0]
  377. cmp rax, [q + 0]
  378. jc rawMontgomeryMul_mulM_done ; q is bigget so done.
  379. jnz rawMontgomeryMul_mulM_sq ; q is lower
  380. ; If equal substract q
  381. rawMontgomeryMul_mulM_sq:
  382. mov rax, [q + 0]
  383. sub [rdi + 0], rax
  384. mov rax, [q + 8]
  385. sbb [rdi + 8], rax
  386. mov rax, [q + 16]
  387. sbb [rdi + 16], rax
  388. mov rax, [q + 24]
  389. sbb [rdi + 24], rax
  390. rawMontgomeryMul_mulM_done:
  391. mov rdx, rcx ; recover rdx to its original place.
  392. add rsp, 32 ; recover rsp
  393. ret
  394. ;;;;;;;;;;;;;;;;;;;;;;
  395. ; rawMontgomerySquare
  396. ;;;;;;;;;;;;;;;;;;;;;;
  397. ; Square an element
  398. ; Params:
  399. ; rsi <= Pointer to the long data of element 1
  400. ; rdi <= Pointer to the long data of result
  401. ; Modified registers:
  402. ; r8, r9, 10, r11, rax, rcx
  403. ;;;;;;;;;;;;;;;;;;;;;;
  404. rawMontgomerySquare:
  405. sub rsp, 32 ; Reserve space for ms
  406. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  407. mov r11, 0xc2e1f593efffffff ; np
  408. xor r8,r8
  409. xor r9,r9
  410. xor r10,r10
  411. mov rax, [rsi + 0]
  412. mul rax
  413. add r8, rax
  414. adc r9, rdx
  415. adc r10, 0x0
  416. mov rax, r8
  417. mul r11
  418. mov [rsp + 0], rax
  419. mul qword [q]
  420. add r8, rax
  421. adc r9, rdx
  422. adc r10, 0x0
  423. mov rax, [rsi + 0]
  424. mul qword [rsi + 8]
  425. add r9, rax
  426. adc r10, rdx
  427. adc r8, 0x0
  428. add r9, rax
  429. adc r10, rdx
  430. adc r8, 0x0
  431. mov rax, [rsp + 0]
  432. mul qword [q + 8]
  433. add r9, rax
  434. adc r10, rdx
  435. adc r8, 0x0
  436. mov rax, r9
  437. mul r11
  438. mov [rsp + 8], rax
  439. mul qword [q]
  440. add r9, rax
  441. adc r10, rdx
  442. adc r8, 0x0
  443. mov rax, [rsi + 0]
  444. mul qword [rsi + 16]
  445. add r10, rax
  446. adc r8, rdx
  447. adc r9, 0x0
  448. add r10, rax
  449. adc r8, rdx
  450. adc r9, 0x0
  451. mov rax, [rsi + 8]
  452. mul rax
  453. add r10, rax
  454. adc r8, rdx
  455. adc r9, 0x0
  456. mov rax, [rsp + 8]
  457. mul qword [q + 8]
  458. add r10, rax
  459. adc r8, rdx
  460. adc r9, 0x0
  461. mov rax, [rsp + 0]
  462. mul qword [q + 16]
  463. add r10, rax
  464. adc r8, rdx
  465. adc r9, 0x0
  466. mov rax, r10
  467. mul r11
  468. mov [rsp + 16], rax
  469. mul qword [q]
  470. add r10, rax
  471. adc r8, rdx
  472. adc r9, 0x0
  473. mov rax, [rsi + 0]
  474. mul qword [rsi + 24]
  475. add r8, rax
  476. adc r9, rdx
  477. adc r10, 0x0
  478. add r8, rax
  479. adc r9, rdx
  480. adc r10, 0x0
  481. mov rax, [rsi + 8]
  482. mul qword [rsi + 16]
  483. add r8, rax
  484. adc r9, rdx
  485. adc r10, 0x0
  486. add r8, rax
  487. adc r9, rdx
  488. adc r10, 0x0
  489. mov rax, [rsp + 16]
  490. mul qword [q + 8]
  491. add r8, rax
  492. adc r9, rdx
  493. adc r10, 0x0
  494. mov rax, [rsp + 8]
  495. mul qword [q + 16]
  496. add r8, rax
  497. adc r9, rdx
  498. adc r10, 0x0
  499. mov rax, [rsp + 0]
  500. mul qword [q + 24]
  501. add r8, rax
  502. adc r9, rdx
  503. adc r10, 0x0
  504. mov rax, r8
  505. mul r11
  506. mov [rsp + 24], rax
  507. mul qword [q]
  508. add r8, rax
  509. adc r9, rdx
  510. adc r10, 0x0
  511. mov rax, [rsi + 8]
  512. mul qword [rsi + 24]
  513. add r9, rax
  514. adc r10, rdx
  515. adc r8, 0x0
  516. add r9, rax
  517. adc r10, rdx
  518. adc r8, 0x0
  519. mov rax, [rsi + 16]
  520. mul rax
  521. add r9, rax
  522. adc r10, rdx
  523. adc r8, 0x0
  524. mov rax, [rsp + 24]
  525. mul qword [q + 8]
  526. add r9, rax
  527. adc r10, rdx
  528. adc r8, 0x0
  529. mov rax, [rsp + 16]
  530. mul qword [q + 16]
  531. add r9, rax
  532. adc r10, rdx
  533. adc r8, 0x0
  534. mov rax, [rsp + 8]
  535. mul qword [q + 24]
  536. add r9, rax
  537. adc r10, rdx
  538. adc r8, 0x0
  539. mov [rdi + 0 ], r9
  540. xor r9,r9
  541. mov rax, [rsi + 16]
  542. mul qword [rsi + 24]
  543. add r10, rax
  544. adc r8, rdx
  545. adc r9, 0x0
  546. add r10, rax
  547. adc r8, rdx
  548. adc r9, 0x0
  549. mov rax, [rsp + 24]
  550. mul qword [q + 16]
  551. add r10, rax
  552. adc r8, rdx
  553. adc r9, 0x0
  554. mov rax, [rsp + 16]
  555. mul qword [q + 24]
  556. add r10, rax
  557. adc r8, rdx
  558. adc r9, 0x0
  559. mov [rdi + 8 ], r10
  560. xor r10,r10
  561. mov rax, [rsi + 24]
  562. mul rax
  563. add r8, rax
  564. adc r9, rdx
  565. adc r10, 0x0
  566. mov rax, [rsp + 24]
  567. mul qword [q + 24]
  568. add r8, rax
  569. adc r9, rdx
  570. adc r10, 0x0
  571. mov [rdi + 16 ], r8
  572. xor r8,r8
  573. mov [rdi + 24 ], r9
  574. xor r9,r9
  575. test r10, r10
  576. jnz rawMontgomerySquare_mulM_sq
  577. ; Compare with q
  578. mov rax, [rdi + 24]
  579. cmp rax, [q + 24]
  580. jc rawMontgomerySquare_mulM_done ; q is bigget so done.
  581. jnz rawMontgomerySquare_mulM_sq ; q is lower
  582. mov rax, [rdi + 16]
  583. cmp rax, [q + 16]
  584. jc rawMontgomerySquare_mulM_done ; q is bigget so done.
  585. jnz rawMontgomerySquare_mulM_sq ; q is lower
  586. mov rax, [rdi + 8]
  587. cmp rax, [q + 8]
  588. jc rawMontgomerySquare_mulM_done ; q is bigget so done.
  589. jnz rawMontgomerySquare_mulM_sq ; q is lower
  590. mov rax, [rdi + 0]
  591. cmp rax, [q + 0]
  592. jc rawMontgomerySquare_mulM_done ; q is bigget so done.
  593. jnz rawMontgomerySquare_mulM_sq ; q is lower
  594. ; If equal substract q
  595. rawMontgomerySquare_mulM_sq:
  596. mov rax, [q + 0]
  597. sub [rdi + 0], rax
  598. mov rax, [q + 8]
  599. sbb [rdi + 8], rax
  600. mov rax, [q + 16]
  601. sbb [rdi + 16], rax
  602. mov rax, [q + 24]
  603. sbb [rdi + 24], rax
  604. rawMontgomerySquare_mulM_done:
  605. mov rdx, rcx ; recover rdx to its original place.
  606. add rsp, 32 ; recover rsp
  607. ret
  608. ;;;;;;;;;;;;;;;;;;;;;;
  609. ; rawMontgomeryMul1
  610. ;;;;;;;;;;;;;;;;;;;;;;
  611. ; Multiply two elements in montgomery form
  612. ; Params:
  613. ; rsi <= Pointer to the long data of element 1
  614. ; rdx <= second operand
  615. ; rdi <= Pointer to the long data of result
  616. ; Modified registers:
  617. ; r8, r9, 10, r11, rax, rcx
  618. ;;;;;;;;;;;;;;;;;;;;;;
  619. rawMontgomeryMul1:
  620. sub rsp, 32 ; Reserve space for ms
  621. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  622. mov r11, 0xc2e1f593efffffff ; np
  623. xor r8,r8
  624. xor r9,r9
  625. xor r10,r10
  626. mov rax, [rsi + 0]
  627. mul rcx
  628. add r8, rax
  629. adc r9, rdx
  630. adc r10, 0x0
  631. mov rax, r8
  632. mul r11
  633. mov [rsp + 0], rax
  634. mul qword [q]
  635. add r8, rax
  636. adc r9, rdx
  637. adc r10, 0x0
  638. mov rax, [rsi + 8]
  639. mul rcx
  640. add r9, rax
  641. adc r10, rdx
  642. adc r8, 0x0
  643. mov rax, [rsp + 0]
  644. mul qword [q + 8]
  645. add r9, rax
  646. adc r10, rdx
  647. adc r8, 0x0
  648. mov rax, r9
  649. mul r11
  650. mov [rsp + 8], rax
  651. mul qword [q]
  652. add r9, rax
  653. adc r10, rdx
  654. adc r8, 0x0
  655. mov rax, [rsi + 16]
  656. mul rcx
  657. add r10, rax
  658. adc r8, rdx
  659. adc r9, 0x0
  660. mov rax, [rsp + 8]
  661. mul qword [q + 8]
  662. add r10, rax
  663. adc r8, rdx
  664. adc r9, 0x0
  665. mov rax, [rsp + 0]
  666. mul qword [q + 16]
  667. add r10, rax
  668. adc r8, rdx
  669. adc r9, 0x0
  670. mov rax, r10
  671. mul r11
  672. mov [rsp + 16], rax
  673. mul qword [q]
  674. add r10, rax
  675. adc r8, rdx
  676. adc r9, 0x0
  677. mov rax, [rsi + 24]
  678. mul rcx
  679. add r8, rax
  680. adc r9, rdx
  681. adc r10, 0x0
  682. mov rax, [rsp + 16]
  683. mul qword [q + 8]
  684. add r8, rax
  685. adc r9, rdx
  686. adc r10, 0x0
  687. mov rax, [rsp + 8]
  688. mul qword [q + 16]
  689. add r8, rax
  690. adc r9, rdx
  691. adc r10, 0x0
  692. mov rax, [rsp + 0]
  693. mul qword [q + 24]
  694. add r8, rax
  695. adc r9, rdx
  696. adc r10, 0x0
  697. mov rax, r8
  698. mul r11
  699. mov [rsp + 24], rax
  700. mul qword [q]
  701. add r8, rax
  702. adc r9, rdx
  703. adc r10, 0x0
  704. mov rax, [rsp + 24]
  705. mul qword [q + 8]
  706. add r9, rax
  707. adc r10, rdx
  708. adc r8, 0x0
  709. mov rax, [rsp + 16]
  710. mul qword [q + 16]
  711. add r9, rax
  712. adc r10, rdx
  713. adc r8, 0x0
  714. mov rax, [rsp + 8]
  715. mul qword [q + 24]
  716. add r9, rax
  717. adc r10, rdx
  718. adc r8, 0x0
  719. mov [rdi + 0 ], r9
  720. xor r9,r9
  721. mov rax, [rsp + 24]
  722. mul qword [q + 16]
  723. add r10, rax
  724. adc r8, rdx
  725. adc r9, 0x0
  726. mov rax, [rsp + 16]
  727. mul qword [q + 24]
  728. add r10, rax
  729. adc r8, rdx
  730. adc r9, 0x0
  731. mov [rdi + 8 ], r10
  732. xor r10,r10
  733. mov rax, [rsp + 24]
  734. mul qword [q + 24]
  735. add r8, rax
  736. adc r9, rdx
  737. adc r10, 0x0
  738. mov [rdi + 16 ], r8
  739. xor r8,r8
  740. mov [rdi + 24 ], r9
  741. xor r9,r9
  742. test r10, r10
  743. jnz rawMontgomeryMul1_mulM_sq
  744. ; Compare with q
  745. mov rax, [rdi + 24]
  746. cmp rax, [q + 24]
  747. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  748. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  749. mov rax, [rdi + 16]
  750. cmp rax, [q + 16]
  751. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  752. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  753. mov rax, [rdi + 8]
  754. cmp rax, [q + 8]
  755. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  756. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  757. mov rax, [rdi + 0]
  758. cmp rax, [q + 0]
  759. jc rawMontgomeryMul1_mulM_done ; q is bigget so done.
  760. jnz rawMontgomeryMul1_mulM_sq ; q is lower
  761. ; If equal substract q
  762. rawMontgomeryMul1_mulM_sq:
  763. mov rax, [q + 0]
  764. sub [rdi + 0], rax
  765. mov rax, [q + 8]
  766. sbb [rdi + 8], rax
  767. mov rax, [q + 16]
  768. sbb [rdi + 16], rax
  769. mov rax, [q + 24]
  770. sbb [rdi + 24], rax
  771. rawMontgomeryMul1_mulM_done:
  772. mov rdx, rcx ; recover rdx to its original place.
  773. add rsp, 32 ; recover rsp
  774. ret
  775. ;;;;;;;;;;;;;;;;;;;;;;
  776. ; rawFromMontgomery
  777. ;;;;;;;;;;;;;;;;;;;;;;
  778. ; Multiply two elements in montgomery form
  779. ; Params:
  780. ; rsi <= Pointer to the long data of element 1
  781. ; rdi <= Pointer to the long data of result
  782. ; Modified registers:
  783. ; r8, r9, 10, r11, rax, rcx
  784. ;;;;;;;;;;;;;;;;;;;;;;
  785. rawFromMontgomery:
  786. sub rsp, 32 ; Reserve space for ms
  787. mov rcx, rdx ; rdx is needed for multiplications so keep it in cx
  788. mov r11, 0xc2e1f593efffffff ; np
  789. xor r8,r8
  790. xor r9,r9
  791. xor r10,r10
  792. add r8, [rdi + 0]
  793. adc r9, 0x0
  794. adc r10, 0x0
  795. mov rax, r8
  796. mul r11
  797. mov [rsp + 0], rax
  798. mul qword [q]
  799. add r8, rax
  800. adc r9, rdx
  801. adc r10, 0x0
  802. add r9, [rdi + 8]
  803. adc r10, 0x0
  804. adc r8, 0x0
  805. mov rax, [rsp + 0]
  806. mul qword [q + 8]
  807. add r9, rax
  808. adc r10, rdx
  809. adc r8, 0x0
  810. mov rax, r9
  811. mul r11
  812. mov [rsp + 8], rax
  813. mul qword [q]
  814. add r9, rax
  815. adc r10, rdx
  816. adc r8, 0x0
  817. add r10, [rdi + 16]
  818. adc r8, 0x0
  819. adc r9, 0x0
  820. mov rax, [rsp + 8]
  821. mul qword [q + 8]
  822. add r10, rax
  823. adc r8, rdx
  824. adc r9, 0x0
  825. mov rax, [rsp + 0]
  826. mul qword [q + 16]
  827. add r10, rax
  828. adc r8, rdx
  829. adc r9, 0x0
  830. mov rax, r10
  831. mul r11
  832. mov [rsp + 16], rax
  833. mul qword [q]
  834. add r10, rax
  835. adc r8, rdx
  836. adc r9, 0x0
  837. add r8, [rdi + 24]
  838. adc r9, 0x0
  839. adc r10, 0x0
  840. mov rax, [rsp + 16]
  841. mul qword [q + 8]
  842. add r8, rax
  843. adc r9, rdx
  844. adc r10, 0x0
  845. mov rax, [rsp + 8]
  846. mul qword [q + 16]
  847. add r8, rax
  848. adc r9, rdx
  849. adc r10, 0x0
  850. mov rax, [rsp + 0]
  851. mul qword [q + 24]
  852. add r8, rax
  853. adc r9, rdx
  854. adc r10, 0x0
  855. mov rax, r8
  856. mul r11
  857. mov [rsp + 24], rax
  858. mul qword [q]
  859. add r8, rax
  860. adc r9, rdx
  861. adc r10, 0x0
  862. mov rax, [rsp + 24]
  863. mul qword [q + 8]
  864. add r9, rax
  865. adc r10, rdx
  866. adc r8, 0x0
  867. mov rax, [rsp + 16]
  868. mul qword [q + 16]
  869. add r9, rax
  870. adc r10, rdx
  871. adc r8, 0x0
  872. mov rax, [rsp + 8]
  873. mul qword [q + 24]
  874. add r9, rax
  875. adc r10, rdx
  876. adc r8, 0x0
  877. mov [rdi + 0 ], r9
  878. xor r9,r9
  879. mov rax, [rsp + 24]
  880. mul qword [q + 16]
  881. add r10, rax
  882. adc r8, rdx
  883. adc r9, 0x0
  884. mov rax, [rsp + 16]
  885. mul qword [q + 24]
  886. add r10, rax
  887. adc r8, rdx
  888. adc r9, 0x0
  889. mov [rdi + 8 ], r10
  890. xor r10,r10
  891. mov rax, [rsp + 24]
  892. mul qword [q + 24]
  893. add r8, rax
  894. adc r9, rdx
  895. adc r10, 0x0
  896. mov [rdi + 16 ], r8
  897. xor r8,r8
  898. mov [rdi + 24 ], r9
  899. xor r9,r9
  900. test r10, r10
  901. jnz rawFromMontgomery_mulM_sq
  902. ; Compare with q
  903. mov rax, [rdi + 24]
  904. cmp rax, [q + 24]
  905. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  906. jnz rawFromMontgomery_mulM_sq ; q is lower
  907. mov rax, [rdi + 16]
  908. cmp rax, [q + 16]
  909. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  910. jnz rawFromMontgomery_mulM_sq ; q is lower
  911. mov rax, [rdi + 8]
  912. cmp rax, [q + 8]
  913. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  914. jnz rawFromMontgomery_mulM_sq ; q is lower
  915. mov rax, [rdi + 0]
  916. cmp rax, [q + 0]
  917. jc rawFromMontgomery_mulM_done ; q is bigget so done.
  918. jnz rawFromMontgomery_mulM_sq ; q is lower
  919. ; If equal substract q
  920. rawFromMontgomery_mulM_sq:
  921. mov rax, [q + 0]
  922. sub [rdi + 0], rax
  923. mov rax, [q + 8]
  924. sbb [rdi + 8], rax
  925. mov rax, [q + 16]
  926. sbb [rdi + 16], rax
  927. mov rax, [q + 24]
  928. sbb [rdi + 24], rax
  929. rawFromMontgomery_mulM_done:
  930. mov rdx, rcx ; recover rdx to its original place.
  931. add rsp, 32 ; recover rsp
  932. ret
  933. ;;;;;;;;;;;;;;;;;;;;;;
  934. ; toMontgomery
  935. ;;;;;;;;;;;;;;;;;;;;;;
  936. ; Convert a number to Montgomery
  937. ; rdi <= Pointer element to convert
  938. ; Modified registers:
  939. ; r8, r9, 10, r11, rax, rcx
  940. ;;;;;;;;;;;;;;;;;;;;
  941. Fr_toMontgomery:
  942. mov rax, [rdi]
  943. bts rax, 62 ; check if montgomery
  944. jc toMontgomery_doNothing
  945. bts rax, 63
  946. jc toMontgomeryLong
  947. toMontgomeryShort:
  948. mov [rdi], rax
  949. add rdi, 8
  950. push rsi
  951. lea rsi, [R2]
  952. movsx rdx, eax
  953. cmp rdx, 0
  954. js negMontgomeryShort
  955. posMontgomeryShort:
  956. call rawMontgomeryMul1
  957. pop rsi
  958. sub rdi, 8
  959. ret
  960. negMontgomeryShort:
  961. neg rdx ; Do the multiplication positive and then negate the result.
  962. call rawMontgomeryMul1
  963. mov rsi, rdi
  964. call rawNegL
  965. pop rsi
  966. sub rdi, 8
  967. ret
  968. toMontgomeryLong:
  969. mov [rdi], rax
  970. add rdi, 8
  971. push rsi
  972. mov rdx, rdi
  973. lea rsi, [R2]
  974. call rawMontgomeryMul
  975. pop rsi
  976. sub rdi, 8
  977. toMontgomery_doNothing:
  978. ret
  979. ;;;;;;;;;;;;;;;;;;;;;;
  980. ; toNormal
  981. ;;;;;;;;;;;;;;;;;;;;;;
  982. ; Convert a number from Montgomery
  983. ; rdi <= Pointer element to convert
  984. ; Modified registers:
  985. ; r8, r9, 10, r11, rax, rcx
  986. ;;;;;;;;;;;;;;;;;;;;
  987. Fr_toNormal:
  988. mov rax, [rdi]
  989. btc rax, 62 ; check if montgomery
  990. jnc toNormal_doNothing
  991. bt rax, 63 ; if short, it means it's converted
  992. jnc toNormal_doNothing
  993. toNormalLong:
  994. mov [rdi], rax
  995. add rdi, 8
  996. call rawFromMontgomery
  997. sub rdi, 8
  998. toNormal_doNothing:
  999. ret
  1000. ;;;;;;;;;;;;;;;;;;;;;;
  1001. ; toLongNormal
  1002. ;;;;;;;;;;;;;;;;;;;;;;
  1003. ; Convert a number to long normal
  1004. ; rdi <= Pointer element to convert
  1005. ; Modified registers:
  1006. ; r8, r9, 10, r11, rax, rcx
  1007. ;;;;;;;;;;;;;;;;;;;;
  1008. Fr_toLongNormal:
  1009. mov rax, [rdi]
  1010. bt rax, 62 ; check if montgomery
  1011. jc toLongNormal_fromMontgomery
  1012. bt rax, 63 ; check if long
  1013. jnc toLongNormal_fromShort
  1014. ret ; It is already long
  1015. toLongNormal_fromMontgomery:
  1016. add rdi, 8
  1017. call rawFromMontgomery
  1018. sub rdi, 8
  1019. ret
  1020. toLongNormal_fromShort:
  1021. mov r8, rsi ; save rsi
  1022. movsx rsi, eax
  1023. call rawCopyS2L
  1024. mov rsi, r8 ; recover rsi
  1025. ret
  1026. ;;;;;;;;;;;;;;;;;;;;;;
  1027. ; add
  1028. ;;;;;;;;;;;;;;;;;;;;;;
  1029. ; Adds two elements of any kind
  1030. ; Params:
  1031. ; rsi <= Pointer to element 1
  1032. ; rdx <= Pointer to element 2
  1033. ; rdi <= Pointer to result
  1034. ; Modified Registers:
  1035. ; r8, r9, 10, r11, rax, rcx
  1036. ;;;;;;;;;;;;;;;;;;;;;;
  1037. Fr_add:
  1038. mov rax, [rsi]
  1039. mov rcx, [rdx]
  1040. bt rax, 63 ; Check if is short first operand
  1041. jc add_l1
  1042. bt rcx, 63 ; Check if is short second operand
  1043. jc add_s1l2
  1044. add_s1s2: ; Both operands are short
  1045. xor rdx, rdx
  1046. mov edx, eax
  1047. add edx, ecx
  1048. jo add_manageOverflow ; rsi already is the 64bits result
  1049. mov [rdi], rdx ; not necessary to adjust so just save and return
  1050. ret
  1051. add_manageOverflow: ; Do the operation in 64 bits
  1052. push rsi
  1053. movsx rsi, eax
  1054. movsx rdx, ecx
  1055. add rsi, rdx
  1056. call rawCopyS2L
  1057. pop rsi
  1058. ret
  1059. add_l1:
  1060. bt rcx, 63 ; Check if is short second operand
  1061. jc add_l1l2
  1062. ;;;;;;;;
  1063. add_l1s2:
  1064. bt rax, 62 ; check if montgomery first
  1065. jc add_l1ms2
  1066. add_l1ns2:
  1067. mov r11b, 0x80
  1068. shl r11, 56
  1069. mov [rdi], r11
  1070. add rsi, 8
  1071. movsx rdx, ecx
  1072. add rdi, 8
  1073. cmp rdx, 0
  1074. jns tmp_1
  1075. neg rdx
  1076. call rawSubLS
  1077. sub rdi, 8
  1078. sub rsi, 8
  1079. ret
  1080. tmp_1:
  1081. call rawAddLS
  1082. sub rdi, 8
  1083. sub rsi, 8
  1084. ret
  1085. add_l1ms2:
  1086. bt rcx, 62 ; check if montgomery second
  1087. jc add_l1ms2m
  1088. add_l1ms2n:
  1089. mov r11b, 0xC0
  1090. shl r11, 56
  1091. mov [rdi], r11
  1092. push rdi
  1093. mov rdi, rdx
  1094. call Fr_toMontgomery
  1095. mov rdx, rdi
  1096. pop rdi
  1097. add rdi, 8
  1098. add rsi, 8
  1099. add rdx, 8
  1100. call rawAddLL
  1101. sub rdi, 8
  1102. sub rsi, 8
  1103. ret
  1104. add_l1ms2m:
  1105. mov r11b, 0xC0
  1106. shl r11, 56
  1107. mov [rdi], r11
  1108. add rdi, 8
  1109. add rsi, 8
  1110. add rdx, 8
  1111. call rawAddLL
  1112. sub rdi, 8
  1113. sub rsi, 8
  1114. ret
  1115. ;;;;;;;;
  1116. add_s1l2:
  1117. bt rcx, 62 ; check if montgomery first
  1118. jc add_s1l2m
  1119. add_s1l2n:
  1120. mov r11b, 0x80
  1121. shl r11, 56
  1122. mov [rdi], r11
  1123. lea rsi, [rdx + 8]
  1124. movsx rdx, eax
  1125. add rdi, 8
  1126. cmp rdx, 0
  1127. jns tmp_2
  1128. neg rdx
  1129. call rawSubLS
  1130. sub rdi, 8
  1131. sub rsi, 8
  1132. ret
  1133. tmp_2:
  1134. call rawAddLS
  1135. sub rdi, 8
  1136. sub rsi, 8
  1137. ret
  1138. add_s1l2m:
  1139. bt rax, 62 ; check if montgomery second
  1140. jc add_s1ml2m
  1141. add_s1nl2m:
  1142. mov r11b, 0xC0
  1143. shl r11, 56
  1144. mov [rdi], r11
  1145. push rdi
  1146. mov rdi, rsi
  1147. mov rsi, rdx
  1148. call Fr_toMontgomery
  1149. mov rdx, rsi
  1150. mov rsi, rdi
  1151. pop rdi
  1152. add rdi, 8
  1153. add rsi, 8
  1154. add rdx, 8
  1155. call rawAddLL
  1156. sub rdi, 8
  1157. sub rsi, 8
  1158. ret
  1159. add_s1ml2m:
  1160. mov r11b, 0xC0
  1161. shl r11, 56
  1162. mov [rdi], r11
  1163. add rdi, 8
  1164. add rsi, 8
  1165. add rdx, 8
  1166. call rawAddLL
  1167. sub rdi, 8
  1168. sub rsi, 8
  1169. ret
  1170. ;;;;
  1171. add_l1l2:
  1172. bt rax, 62 ; check if montgomery first
  1173. jc add_l1ml2
  1174. add_l1nl2:
  1175. bt rcx, 62 ; check if montgomery second
  1176. jc add_l1nl2m
  1177. add_l1nl2n:
  1178. mov r11b, 0x80
  1179. shl r11, 56
  1180. mov [rdi], r11
  1181. add rdi, 8
  1182. add rsi, 8
  1183. add rdx, 8
  1184. call rawAddLL
  1185. sub rdi, 8
  1186. sub rsi, 8
  1187. ret
  1188. add_l1nl2m:
  1189. mov r11b, 0xC0
  1190. shl r11, 56
  1191. mov [rdi], r11
  1192. push rdi
  1193. mov rdi, rsi
  1194. mov rsi, rdx
  1195. call Fr_toMontgomery
  1196. mov rdx, rsi
  1197. mov rsi, rdi
  1198. pop rdi
  1199. add rdi, 8
  1200. add rsi, 8
  1201. add rdx, 8
  1202. call rawAddLL
  1203. sub rdi, 8
  1204. sub rsi, 8
  1205. ret
  1206. add_l1ml2:
  1207. bt rcx, 62 ; check if montgomery seconf
  1208. jc add_l1ml2m
  1209. add_l1ml2n:
  1210. mov r11b, 0xC0
  1211. shl r11, 56
  1212. mov [rdi], r11
  1213. push rdi
  1214. mov rdi, rdx
  1215. call Fr_toMontgomery
  1216. mov rdx, rdi
  1217. pop rdi
  1218. add rdi, 8
  1219. add rsi, 8
  1220. add rdx, 8
  1221. call rawAddLL
  1222. sub rdi, 8
  1223. sub rsi, 8
  1224. ret
  1225. add_l1ml2m:
  1226. mov r11b, 0xC0
  1227. shl r11, 56
  1228. mov [rdi], r11
  1229. add rdi, 8
  1230. add rsi, 8
  1231. add rdx, 8
  1232. call rawAddLL
  1233. sub rdi, 8
  1234. sub rsi, 8
  1235. ret
  1236. ;;;;;;;;;;;;;;;;;;;;;;
  1237. ; rawAddLL
  1238. ;;;;;;;;;;;;;;;;;;;;;;
  1239. ; Adds two elements of type long
  1240. ; Params:
  1241. ; rsi <= Pointer to the long data of element 1
  1242. ; rdx <= Pointer to the long data of element 2
  1243. ; rdi <= Pointer to the long data of result
  1244. ; Modified Registers:
  1245. ; rax
  1246. ;;;;;;;;;;;;;;;;;;;;;;
  1247. rawAddLL:
  1248. ; Add component by component with carry
  1249. mov rax, [rsi + 0]
  1250. add rax, [rdx + 0]
  1251. mov [rdi + 0], rax
  1252. mov rax, [rsi + 8]
  1253. adc rax, [rdx + 8]
  1254. mov [rdi + 8], rax
  1255. mov rax, [rsi + 16]
  1256. adc rax, [rdx + 16]
  1257. mov [rdi + 16], rax
  1258. mov rax, [rsi + 24]
  1259. adc rax, [rdx + 24]
  1260. mov [rdi + 24], rax
  1261. jc rawAddLL_sq ; if overflow, substract q
  1262. ; Compare with q
  1263. cmp rax, [q + 24]
  1264. jc rawAddLL_done ; q is bigget so done.
  1265. jnz rawAddLL_sq ; q is lower
  1266. mov rax, [rdi + 16]
  1267. cmp rax, [q + 16]
  1268. jc rawAddLL_done ; q is bigget so done.
  1269. jnz rawAddLL_sq ; q is lower
  1270. mov rax, [rdi + 8]
  1271. cmp rax, [q + 8]
  1272. jc rawAddLL_done ; q is bigget so done.
  1273. jnz rawAddLL_sq ; q is lower
  1274. mov rax, [rdi + 0]
  1275. cmp rax, [q + 0]
  1276. jc rawAddLL_done ; q is bigget so done.
  1277. jnz rawAddLL_sq ; q is lower
  1278. ; If equal substract q
  1279. rawAddLL_sq:
  1280. mov rax, [q + 0]
  1281. sub [rdi + 0], rax
  1282. mov rax, [q + 8]
  1283. sbb [rdi + 8], rax
  1284. mov rax, [q + 16]
  1285. sbb [rdi + 16], rax
  1286. mov rax, [q + 24]
  1287. sbb [rdi + 24], rax
  1288. rawAddLL_done:
  1289. ret
  1290. ;;;;;;;;;;;;;;;;;;;;;;
  1291. ; rawAddLS
  1292. ;;;;;;;;;;;;;;;;;;;;;;
  1293. ; Adds two elements of type long
  1294. ; Params:
  1295. ; rdi <= Pointer to the long data of result
  1296. ; rsi <= Pointer to the long data of element 1
  1297. ; rdx <= Value to be added
  1298. ;;;;;;;;;;;;;;;;;;;;;;
  1299. rawAddLS:
  1300. ; Add component by component with carry
  1301. add rdx, [rsi]
  1302. mov [rdi] ,rdx
  1303. mov rdx, 0
  1304. adc rdx, [rsi + 8]
  1305. mov [rdi + 8], rdx
  1306. mov rdx, 0
  1307. adc rdx, [rsi + 16]
  1308. mov [rdi + 16], rdx
  1309. mov rdx, 0
  1310. adc rdx, [rsi + 24]
  1311. mov [rdi + 24], rdx
  1312. jc rawAddLS_sq ; if overflow, substract q
  1313. ; Compare with q
  1314. mov rax, [rdi + 24]
  1315. cmp rax, [q + 24]
  1316. jc rawAddLS_done ; q is bigget so done.
  1317. jnz rawAddLS_sq ; q is lower
  1318. mov rax, [rdi + 16]
  1319. cmp rax, [q + 16]
  1320. jc rawAddLS_done ; q is bigget so done.
  1321. jnz rawAddLS_sq ; q is lower
  1322. mov rax, [rdi + 8]
  1323. cmp rax, [q + 8]
  1324. jc rawAddLS_done ; q is bigget so done.
  1325. jnz rawAddLS_sq ; q is lower
  1326. mov rax, [rdi + 0]
  1327. cmp rax, [q + 0]
  1328. jc rawAddLS_done ; q is bigget so done.
  1329. jnz rawAddLS_sq ; q is lower
  1330. ; If equal substract q
  1331. rawAddLS_sq:
  1332. mov rax, [q + 0]
  1333. sub [rdi + 0], rax
  1334. mov rax, [q + 8]
  1335. sbb [rdi + 8], rax
  1336. mov rax, [q + 16]
  1337. sbb [rdi + 16], rax
  1338. mov rax, [q + 24]
  1339. sbb [rdi + 24], rax
  1340. rawAddLS_done:
  1341. ret
  1342. ;;;;;;;;;;;;;;;;;;;;;;
  1343. ; sub
  1344. ;;;;;;;;;;;;;;;;;;;;;;
  1345. ; Substracts two elements of any kind
  1346. ; Params:
  1347. ; rsi <= Pointer to element 1
  1348. ; rdx <= Pointer to element 2
  1349. ; rdi <= Pointer to result
  1350. ; Modified Registers:
  1351. ; r8, r9, 10, r11, rax, rcx
  1352. ;;;;;;;;;;;;;;;;;;;;;;
  1353. Fr_sub:
  1354. mov rax, [rsi]
  1355. mov rcx, [rdx]
  1356. bt rax, 63 ; Check if is long first operand
  1357. jc sub_l1
  1358. bt rcx, 63 ; Check if is long second operand
  1359. jc sub_s1l2
  1360. sub_s1s2: ; Both operands are short
  1361. xor rdx, rdx
  1362. mov edx, eax
  1363. sub edx, ecx
  1364. jo sub_manageOverflow ; rsi already is the 64bits result
  1365. mov [rdi], rdx ; not necessary to adjust so just save and return
  1366. ret
  1367. sub_manageOverflow: ; Do the operation in 64 bits
  1368. push rsi
  1369. movsx rsi, eax
  1370. movsx rdx, ecx
  1371. sub rsi, rdx
  1372. call rawCopyS2L
  1373. pop rsi
  1374. ret
  1375. sub_l1:
  1376. bt rcx, 63 ; Check if is short second operand
  1377. jc sub_l1l2
  1378. ;;;;;;;;
  1379. sub_l1s2:
  1380. bt rax, 62 ; check if montgomery first
  1381. jc sub_l1ms2
  1382. sub_l1ns2:
  1383. mov r11b, 0x80
  1384. shl r11, 56
  1385. mov [rdi], r11
  1386. add rsi, 8
  1387. movsx rdx, ecx
  1388. add rdi, 8
  1389. cmp rdx, 0
  1390. jns tmp_3
  1391. neg rdx
  1392. call rawAddLS
  1393. sub rdi, 8
  1394. sub rsi, 8
  1395. ret
  1396. tmp_3:
  1397. call rawSubLS
  1398. sub rdi, 8
  1399. sub rsi, 8
  1400. ret
  1401. sub_l1ms2:
  1402. bt rcx, 62 ; check if montgomery second
  1403. jc sub_l1ms2m
  1404. sub_l1ms2n:
  1405. mov r11b, 0xC0
  1406. shl r11, 56
  1407. mov [rdi], r11
  1408. push rdi
  1409. mov rdi, rdx
  1410. call Fr_toMontgomery
  1411. mov rdx, rdi
  1412. pop rdi
  1413. add rdi, 8
  1414. add rsi, 8
  1415. add rdx, 8
  1416. call rawSubLL
  1417. sub rdi, 8
  1418. sub rsi, 8
  1419. ret
  1420. sub_l1ms2m:
  1421. mov r11b, 0xC0
  1422. shl r11, 56
  1423. mov [rdi], r11
  1424. add rdi, 8
  1425. add rsi, 8
  1426. add rdx, 8
  1427. call rawSubLL
  1428. sub rdi, 8
  1429. sub rsi, 8
  1430. ret
  1431. ;;;;;;;;
  1432. sub_s1l2:
  1433. bt rcx, 62 ; check if montgomery first
  1434. jc sub_s1l2m
  1435. sub_s1l2n:
  1436. mov r11b, 0x80
  1437. shl r11, 56
  1438. mov [rdi], r11
  1439. cmp eax, 0
  1440. js tmp_4
  1441. ; First Operand is positive
  1442. push rsi
  1443. add rdi, 8
  1444. movsx rsi, eax
  1445. add rdx, 8
  1446. call rawSubSL
  1447. sub rdi, 8
  1448. pop rsi
  1449. ret
  1450. tmp_4: ; First operand is negative
  1451. push rsi
  1452. lea rsi, [rdx + 8]
  1453. movsx rdx, eax
  1454. add rdi, 8
  1455. neg rdx
  1456. call rawNegLS
  1457. sub rdi, 8
  1458. pop rsi
  1459. ret
  1460. sub_s1l2m:
  1461. bt rax, 62 ; check if montgomery second
  1462. jc sub_s1ml2m
  1463. sub_s1nl2m:
  1464. mov r11b, 0xC0
  1465. shl r11, 56
  1466. mov [rdi], r11
  1467. push rdi
  1468. mov rdi, rsi
  1469. mov rsi, rdx
  1470. call Fr_toMontgomery
  1471. mov rdx, rsi
  1472. mov rsi, rdi
  1473. pop rdi
  1474. add rdi, 8
  1475. add rsi, 8
  1476. add rdx, 8
  1477. call rawSubLL
  1478. sub rdi, 8
  1479. sub rsi, 8
  1480. ret
  1481. sub_s1ml2m:
  1482. mov r11b, 0xC0
  1483. shl r11, 56
  1484. mov [rdi], r11
  1485. add rdi, 8
  1486. add rsi, 8
  1487. add rdx, 8
  1488. call rawSubLL
  1489. sub rdi, 8
  1490. sub rsi, 8
  1491. ret
  1492. ;;;;
  1493. sub_l1l2:
  1494. bt rax, 62 ; check if montgomery first
  1495. jc sub_l1ml2
  1496. sub_l1nl2:
  1497. bt rcx, 62 ; check if montgomery second
  1498. jc sub_l1nl2m
  1499. sub_l1nl2n:
  1500. mov r11b, 0x80
  1501. shl r11, 56
  1502. mov [rdi], r11
  1503. add rdi, 8
  1504. add rsi, 8
  1505. add rdx, 8
  1506. call rawSubLL
  1507. sub rdi, 8
  1508. sub rsi, 8
  1509. ret
  1510. sub_l1nl2m:
  1511. mov r11b, 0xC0
  1512. shl r11, 56
  1513. mov [rdi], r11
  1514. push rdi
  1515. mov rdi, rsi
  1516. mov rsi, rdx
  1517. call Fr_toMontgomery
  1518. mov rdx, rsi
  1519. mov rsi, rdi
  1520. pop rdi
  1521. add rdi, 8
  1522. add rsi, 8
  1523. add rdx, 8
  1524. call rawSubLL
  1525. sub rdi, 8
  1526. sub rsi, 8
  1527. ret
  1528. sub_l1ml2:
  1529. bt rcx, 62 ; check if montgomery seconf
  1530. jc sub_l1ml2m
  1531. sub_l1ml2n:
  1532. mov r11b, 0xC0
  1533. shl r11, 56
  1534. mov [rdi], r11
  1535. push rdi
  1536. mov rdi, rdx
  1537. call Fr_toMontgomery
  1538. mov rdx, rdi
  1539. pop rdi
  1540. add rdi, 8
  1541. add rsi, 8
  1542. add rdx, 8
  1543. call rawSubLL
  1544. sub rdi, 8
  1545. sub rsi, 8
  1546. ret
  1547. sub_l1ml2m:
  1548. mov r11b, 0xC0
  1549. shl r11, 56
  1550. mov [rdi], r11
  1551. add rdi, 8
  1552. add rsi, 8
  1553. add rdx, 8
  1554. call rawSubLL
  1555. sub rdi, 8
  1556. sub rsi, 8
  1557. ret
  1558. ;;;;;;;;;;;;;;;;;;;;;;
  1559. ; rawSubLS
  1560. ;;;;;;;;;;;;;;;;;;;;;;
  1561. ; Substracts a short element from the long element
  1562. ; Params:
  1563. ; rdi <= Pointer to the long data of result
  1564. ; rsi <= Pointer to the long data of element 1 where will be substracted
  1565. ; rdx <= Value to be substracted
  1566. ; [rdi] = [rsi] - rdx
  1567. ; Modified Registers:
  1568. ; rax
  1569. ;;;;;;;;;;;;;;;;;;;;;;
  1570. rawSubLS:
  1571. ; Substract first digit
  1572. mov rax, [rsi]
  1573. sub rax, rdx
  1574. mov [rdi] ,rax
  1575. mov rdx, 0
  1576. mov rax, [rsi + 8]
  1577. sbb rax, rdx
  1578. mov [rdi + 8], rax
  1579. mov rax, [rsi + 16]
  1580. sbb rax, rdx
  1581. mov [rdi + 16], rax
  1582. mov rax, [rsi + 24]
  1583. sbb rax, rdx
  1584. mov [rdi + 24], rax
  1585. jnc rawSubLS_done ; if overflow, add q
  1586. ; Add q
  1587. rawSubLS_aq:
  1588. mov rax, [q + 0]
  1589. add [rdi + 0], rax
  1590. mov rax, [q + 8]
  1591. adc [rdi + 8], rax
  1592. mov rax, [q + 16]
  1593. adc [rdi + 16], rax
  1594. mov rax, [q + 24]
  1595. adc [rdi + 24], rax
  1596. rawSubLS_done:
  1597. ret
  1598. ;;;;;;;;;;;;;;;;;;;;;;
  1599. ; rawSubSL
  1600. ;;;;;;;;;;;;;;;;;;;;;;
  1601. ; Substracts a long element from a short element
  1602. ; Params:
  1603. ; rdi <= Pointer to the long data of result
  1604. ; rsi <= Value from where will bo substracted
  1605. ; rdx <= Pointer to long of the value to be substracted
  1606. ;
  1607. ; [rdi] = rsi - [rdx]
  1608. ; Modified Registers:
  1609. ; rax
  1610. ;;;;;;;;;;;;;;;;;;;;;;
  1611. rawSubSL:
  1612. ; Substract first digit
  1613. sub rsi, [rdx]
  1614. mov [rdi] ,rsi
  1615. mov rax, 0
  1616. sbb rax, [rdx + 8]
  1617. mov [rdi + 8], rax
  1618. mov rax, 0
  1619. sbb rax, [rdx + 16]
  1620. mov [rdi + 16], rax
  1621. mov rax, 0
  1622. sbb rax, [rdx + 24]
  1623. mov [rdi + 24], rax
  1624. jnc rawSubSL_done ; if overflow, add q
  1625. ; Add q
  1626. rawSubSL_aq:
  1627. mov rax, [q + 0]
  1628. add [rdi + 0], rax
  1629. mov rax, [q + 8]
  1630. adc [rdi + 8], rax
  1631. mov rax, [q + 16]
  1632. adc [rdi + 16], rax
  1633. mov rax, [q + 24]
  1634. adc [rdi + 24], rax
  1635. rawSubSL_done:
  1636. ret
  1637. ;;;;;;;;;;;;;;;;;;;;;;
  1638. ; rawSubLL
  1639. ;;;;;;;;;;;;;;;;;;;;;;
  1640. ; Substracts a long element from a short element
  1641. ; Params:
  1642. ; rdi <= Pointer to the long data of result
  1643. ; rsi <= Pointer to long from where substracted
  1644. ; rdx <= Pointer to long of the value to be substracted
  1645. ;
  1646. ; [rdi] = [rsi] - [rdx]
  1647. ; Modified Registers:
  1648. ; rax
  1649. ;;;;;;;;;;;;;;;;;;;;;;
  1650. rawSubLL:
  1651. ; Substract first digit
  1652. mov rax, [rsi + 0]
  1653. sub rax, [rdx + 0]
  1654. mov [rdi + 0], rax
  1655. mov rax, [rsi + 8]
  1656. sbb rax, [rdx + 8]
  1657. mov [rdi + 8], rax
  1658. mov rax, [rsi + 16]
  1659. sbb rax, [rdx + 16]
  1660. mov [rdi + 16], rax
  1661. mov rax, [rsi + 24]
  1662. sbb rax, [rdx + 24]
  1663. mov [rdi + 24], rax
  1664. jnc rawSubLL_done ; if overflow, add q
  1665. ; Add q
  1666. rawSubLL_aq:
  1667. mov rax, [q + 0]
  1668. add [rdi + 0], rax
  1669. mov rax, [q + 8]
  1670. adc [rdi + 8], rax
  1671. mov rax, [q + 16]
  1672. adc [rdi + 16], rax
  1673. mov rax, [q + 24]
  1674. adc [rdi + 24], rax
  1675. rawSubLL_done:
  1676. ret
  1677. ;;;;;;;;;;;;;;;;;;;;;;
  1678. ; rawNegLS
  1679. ;;;;;;;;;;;;;;;;;;;;;;
  1680. ; Substracts a long element and a short element form 0
  1681. ; Params:
  1682. ; rdi <= Pointer to the long data of result
  1683. ; rsi <= Pointer to long from where substracted
  1684. ; rdx <= short value to be substracted too
  1685. ;
  1686. ; [rdi] = -[rsi] - rdx
  1687. ; Modified Registers:
  1688. ; rax
  1689. ;;;;;;;;;;;;;;;;;;;;;;
  1690. rawNegLS:
  1691. mov rax, [q]
  1692. sub rax, rdx
  1693. mov [rdi], rax
  1694. mov rax, [q + 8 ]
  1695. sbb rax, 0
  1696. mov [rdi + 8], rax
  1697. mov rax, [q + 16 ]
  1698. sbb rax, 0
  1699. mov [rdi + 16], rax
  1700. mov rax, [q + 24 ]
  1701. sbb rax, 0
  1702. mov [rdi + 24], rax
  1703. setc dl
  1704. mov rax, [rdi + 0 ]
  1705. sub rax, [rsi + 0]
  1706. mov [rdi + 0], rax
  1707. mov rax, [rdi + 8 ]
  1708. sbb rax, [rsi + 8]
  1709. mov [rdi + 8], rax
  1710. mov rax, [rdi + 16 ]
  1711. sbb rax, [rsi + 16]
  1712. mov [rdi + 16], rax
  1713. mov rax, [rdi + 24 ]
  1714. sbb rax, [rsi + 24]
  1715. mov [rdi + 24], rax
  1716. setc dh
  1717. or dl, dh
  1718. jz rawNegSL_done
  1719. ; it is a negative value, so add q
  1720. mov rax, [q + 0]
  1721. add [rdi + 0], rax
  1722. mov rax, [q + 8]
  1723. adc [rdi + 8], rax
  1724. mov rax, [q + 16]
  1725. adc [rdi + 16], rax
  1726. mov rax, [q + 24]
  1727. adc [rdi + 24], rax
  1728. rawNegSL_done:
  1729. ret
  1730. ;;;;;;;;;;;;;;;;;;;;;;
  1731. ; neg
  1732. ;;;;;;;;;;;;;;;;;;;;;;
  1733. ; Adds two elements of any kind
  1734. ; Params:
  1735. ; rsi <= Pointer to element to be negated
  1736. ; rdi <= Pointer to result
  1737. ; [rdi] = -[rsi]
  1738. ;;;;;;;;;;;;;;;;;;;;;;
  1739. Fr_neg:
  1740. mov rax, [rsi]
  1741. bt rax, 63 ; Check if is short first operand
  1742. jc neg_l
  1743. neg_s: ; Operand is short
  1744. neg eax
  1745. jo neg_manageOverflow ; Check if overflow. (0x80000000 is the only case)
  1746. mov [rdi], rax ; not necessary to adjust so just save and return
  1747. ret
  1748. neg_manageOverflow: ; Do the operation in 64 bits
  1749. push rsi
  1750. movsx rsi, eax
  1751. neg rsi
  1752. call rawCopyS2L
  1753. pop rsi
  1754. ret
  1755. neg_l:
  1756. mov [rdi], rax ; Copy the type
  1757. add rdi, 8
  1758. add rsi, 8
  1759. call rawNegL
  1760. sub rdi, 8
  1761. sub rsi, 8
  1762. ret
  1763. ;;;;;;;;;;;;;;;;;;;;;;
  1764. ; rawNeg
  1765. ;;;;;;;;;;;;;;;;;;;;;;
  1766. ; Negates a value
  1767. ; Params:
  1768. ; rdi <= Pointer to the long data of result
  1769. ; rsi <= Pointer to the long data of element 1
  1770. ;
  1771. ; [rdi] = - [rsi]
  1772. ;;;;;;;;;;;;;;;;;;;;;;
  1773. rawNegL:
  1774. ; Compare is zero
  1775. xor rax, rax
  1776. cmp [rsi + 0], rax
  1777. jnz doNegate
  1778. cmp [rsi + 8], rax
  1779. jnz doNegate
  1780. cmp [rsi + 16], rax
  1781. jnz doNegate
  1782. cmp [rsi + 24], rax
  1783. jnz doNegate
  1784. ; it's zero so just set to zero
  1785. mov [rdi + 0], rax
  1786. mov [rdi + 8], rax
  1787. mov [rdi + 16], rax
  1788. mov [rdi + 24], rax
  1789. ret
  1790. doNegate:
  1791. mov rax, [q + 0]
  1792. sub rax, [rsi + 0]
  1793. mov [rdi + 0], rax
  1794. mov rax, [q + 8]
  1795. sbb rax, [rsi + 8]
  1796. mov [rdi + 8], rax
  1797. mov rax, [q + 16]
  1798. sbb rax, [rsi + 16]
  1799. mov [rdi + 16], rax
  1800. mov rax, [q + 24]
  1801. sbb rax, [rsi + 24]
  1802. mov [rdi + 24], rax
  1803. ret
  1804. ;;;;;;;;;;;;;;;;;;;;;;
  1805. ; square
  1806. ;;;;;;;;;;;;;;;;;;;;;;
  1807. ; Squares a field element
  1808. ; Params:
  1809. ; rsi <= Pointer to element 1
  1810. ; rdi <= Pointer to result
  1811. ; [rdi] = [rsi] * [rsi]
  1812. ; Modified Registers:
  1813. ; r8, r9, 10, r11, rax, rcx
  1814. ;;;;;;;;;;;;;;;;;;;;;;
  1815. Fr_square:
  1816. mov r8, [rsi]
  1817. bt r8, 63 ; Check if is short first operand
  1818. jc square_l1
  1819. square_s1: ; Both operands are short
  1820. xor rax, rax
  1821. mov eax, r8d
  1822. imul eax
  1823. jo square_manageOverflow ; rsi already is the 64bits result
  1824. mov [rdi], rax ; not necessary to adjust so just save and return
  1825. square_manageOverflow: ; Do the operation in 64 bits
  1826. push rsi
  1827. movsx rax, r8d
  1828. imul rax
  1829. mov rsi, rax
  1830. call rawCopyS2L
  1831. pop rsi
  1832. ret
  1833. square_l1:
  1834. bt r8, 62 ; check if montgomery first
  1835. jc square_l1m
  1836. square_l1n:
  1837. mov r11b, 0xC0
  1838. shl r11, 56
  1839. mov [rdi], r11
  1840. add rdi, 8
  1841. add rsi, 8
  1842. call rawMontgomerySquare
  1843. sub rdi, 8
  1844. sub rsi, 8
  1845. push rsi
  1846. add rdi, 8
  1847. mov rsi, rdi
  1848. lea rdx, [R3]
  1849. call rawMontgomeryMul
  1850. sub rdi, 8
  1851. pop rsi
  1852. ret
  1853. square_l1m:
  1854. mov r11b, 0xC0
  1855. shl r11, 56
  1856. mov [rdi], r11
  1857. add rdi, 8
  1858. add rsi, 8
  1859. call rawMontgomerySquare
  1860. sub rdi, 8
  1861. sub rsi, 8
  1862. ret
  1863. ;;;;;;;;;;;;;;;;;;;;;;
  1864. ; mul
  1865. ;;;;;;;;;;;;;;;;;;;;;;
  1866. ; Multiplies two elements of any kind
  1867. ; Params:
  1868. ; rsi <= Pointer to element 1
  1869. ; rdx <= Pointer to element 2
  1870. ; rdi <= Pointer to result
  1871. ; [rdi] = [rsi] * [rdi]
  1872. ; Modified Registers:
  1873. ; r8, r9, 10, r11, rax, rcx
  1874. ;;;;;;;;;;;;;;;;;;;;;;
  1875. Fr_mul:
  1876. mov r8, [rsi]
  1877. mov r9, [rdx]
  1878. bt r8, 63 ; Check if is short first operand
  1879. jc mul_l1
  1880. bt r9, 63 ; Check if is short second operand
  1881. jc mul_s1l2
  1882. mul_s1s2: ; Both operands are short
  1883. xor rax, rax
  1884. mov eax, r8d
  1885. imul r9d
  1886. jo mul_manageOverflow ; rsi already is the 64bits result
  1887. mov [rdi], rax ; not necessary to adjust so just save and return
  1888. mul_manageOverflow: ; Do the operation in 64 bits
  1889. push rsi
  1890. movsx rax, r8d
  1891. movsx rcx, r9d
  1892. imul rcx
  1893. mov rsi, rax
  1894. call rawCopyS2L
  1895. pop rsi
  1896. ret
  1897. mul_l1:
  1898. bt r9, 63 ; Check if is short second operand
  1899. jc mul_l1l2
  1900. ;;;;;;;;
  1901. mul_l1s2:
  1902. bt r8, 62 ; check if montgomery first
  1903. jc mul_l1ms2
  1904. mul_l1ns2:
  1905. bt r9, 62 ; check if montgomery first
  1906. jc mul_l1ns2m
  1907. mul_l1ns2n:
  1908. mov r11b, 0xC0
  1909. shl r11, 56
  1910. mov [rdi], r11
  1911. push rsi
  1912. add rsi, 8
  1913. movsx rdx, r9d
  1914. add rdi, 8
  1915. cmp rdx, 0
  1916. jns tmp_5
  1917. neg rdx
  1918. call rawMontgomeryMul1
  1919. mov rsi, rdi
  1920. call rawNegL
  1921. sub rdi, 8
  1922. pop rsi
  1923. jmp tmp_6
  1924. tmp_5:
  1925. call rawMontgomeryMul1
  1926. sub rdi, 8
  1927. pop rsi
  1928. tmp_6:
  1929. push rsi
  1930. add rdi, 8
  1931. mov rsi, rdi
  1932. lea rdx, [R3]
  1933. call rawMontgomeryMul
  1934. sub rdi, 8
  1935. pop rsi
  1936. ret
  1937. mul_l1ns2m:
  1938. mov r11b, 0x80
  1939. shl r11, 56
  1940. mov [rdi], r11
  1941. add rdi, 8
  1942. add rsi, 8
  1943. add rdx, 8
  1944. call rawMontgomeryMul
  1945. sub rdi, 8
  1946. sub rsi, 8
  1947. ret
  1948. mul_l1ms2:
  1949. bt r9, 62 ; check if montgomery second
  1950. jc mul_l1ms2m
  1951. mul_l1ms2n:
  1952. mov r11b, 0x80
  1953. shl r11, 56
  1954. mov [rdi], r11
  1955. push rsi
  1956. add rsi, 8
  1957. movsx rdx, r9d
  1958. add rdi, 8
  1959. cmp rdx, 0
  1960. jns tmp_7
  1961. neg rdx
  1962. call rawMontgomeryMul1
  1963. mov rsi, rdi
  1964. call rawNegL
  1965. sub rdi, 8
  1966. pop rsi
  1967. jmp tmp_8
  1968. tmp_7:
  1969. call rawMontgomeryMul1
  1970. sub rdi, 8
  1971. pop rsi
  1972. tmp_8:
  1973. ret
  1974. mul_l1ms2m:
  1975. mov r11b, 0xC0
  1976. shl r11, 56
  1977. mov [rdi], r11
  1978. add rdi, 8
  1979. add rsi, 8
  1980. add rdx, 8
  1981. call rawMontgomeryMul
  1982. sub rdi, 8
  1983. sub rsi, 8
  1984. ret
  1985. ;;;;;;;;
  1986. mul_s1l2:
  1987. bt r8, 62 ; check if montgomery first
  1988. jc mul_s1ml2
  1989. mul_s1nl2:
  1990. bt r9, 62 ; check if montgomery first
  1991. jc mul_s1nl2m
  1992. mul_s1nl2n:
  1993. mov r11b, 0xC0
  1994. shl r11, 56
  1995. mov [rdi], r11
  1996. push rsi
  1997. lea rsi, [rdx + 8]
  1998. movsx rdx, r8d
  1999. add rdi, 8
  2000. cmp rdx, 0
  2001. jns tmp_9
  2002. neg rdx
  2003. call rawMontgomeryMul1
  2004. mov rsi, rdi
  2005. call rawNegL
  2006. sub rdi, 8
  2007. pop rsi
  2008. jmp tmp_10
  2009. tmp_9:
  2010. call rawMontgomeryMul1
  2011. sub rdi, 8
  2012. pop rsi
  2013. tmp_10:
  2014. push rsi
  2015. add rdi, 8
  2016. mov rsi, rdi
  2017. lea rdx, [R3]
  2018. call rawMontgomeryMul
  2019. sub rdi, 8
  2020. pop rsi
  2021. ret
  2022. mul_s1nl2m:
  2023. mov r11b, 0x80
  2024. shl r11, 56
  2025. mov [rdi], r11
  2026. push rsi
  2027. lea rsi, [rdx + 8]
  2028. movsx rdx, r8d
  2029. add rdi, 8
  2030. cmp rdx, 0
  2031. jns tmp_11
  2032. neg rdx
  2033. call rawMontgomeryMul1
  2034. mov rsi, rdi
  2035. call rawNegL
  2036. sub rdi, 8
  2037. pop rsi
  2038. jmp tmp_12
  2039. tmp_11:
  2040. call rawMontgomeryMul1
  2041. sub rdi, 8
  2042. pop rsi
  2043. tmp_12:
  2044. ret
  2045. mul_s1ml2:
  2046. bt r9, 62 ; check if montgomery first
  2047. jc mul_s1ml2m
  2048. mul_s1ml2n:
  2049. mov r11b, 0x80
  2050. shl r11, 56
  2051. mov [rdi], r11
  2052. add rdi, 8
  2053. add rsi, 8
  2054. add rdx, 8
  2055. call rawMontgomeryMul
  2056. sub rdi, 8
  2057. sub rsi, 8
  2058. ret
  2059. mul_s1ml2m:
  2060. mov r11b, 0xC0
  2061. shl r11, 56
  2062. mov [rdi], r11
  2063. add rdi, 8
  2064. add rsi, 8
  2065. add rdx, 8
  2066. call rawMontgomeryMul
  2067. sub rdi, 8
  2068. sub rsi, 8
  2069. ret
  2070. ;;;;
  2071. mul_l1l2:
  2072. bt r8, 62 ; check if montgomery first
  2073. jc mul_l1ml2
  2074. mul_l1nl2:
  2075. bt r9, 62 ; check if montgomery second
  2076. jc mul_l1nl2m
  2077. mul_l1nl2n:
  2078. mov r11b, 0xC0
  2079. shl r11, 56
  2080. mov [rdi], r11
  2081. add rdi, 8
  2082. add rsi, 8
  2083. add rdx, 8
  2084. call rawMontgomeryMul
  2085. sub rdi, 8
  2086. sub rsi, 8
  2087. push rsi
  2088. add rdi, 8
  2089. mov rsi, rdi
  2090. lea rdx, [R3]
  2091. call rawMontgomeryMul
  2092. sub rdi, 8
  2093. pop rsi
  2094. ret
  2095. mul_l1nl2m:
  2096. mov r11b, 0x80
  2097. shl r11, 56
  2098. mov [rdi], r11
  2099. add rdi, 8
  2100. add rsi, 8
  2101. add rdx, 8
  2102. call rawMontgomeryMul
  2103. sub rdi, 8
  2104. sub rsi, 8
  2105. ret
  2106. mul_l1ml2:
  2107. bt r9, 62 ; check if montgomery seconf
  2108. jc mul_l1ml2m
  2109. mul_l1ml2n:
  2110. mov r11b, 0x80
  2111. shl r11, 56
  2112. mov [rdi], r11
  2113. add rdi, 8
  2114. add rsi, 8
  2115. add rdx, 8
  2116. call rawMontgomeryMul
  2117. sub rdi, 8
  2118. sub rsi, 8
  2119. ret
  2120. mul_l1ml2m:
  2121. mov r11b, 0xC0
  2122. shl r11, 56
  2123. mov [rdi], r11
  2124. add rdi, 8
  2125. add rsi, 8
  2126. add rdx, 8
  2127. call rawMontgomeryMul
  2128. sub rdi, 8
  2129. sub rsi, 8
  2130. ret
  2131. ;;;;;;;;;;;;;;;;;;;;;;
  2132. ; band
  2133. ;;;;;;;;;;;;;;;;;;;;;;
  2134. ; Adds two elements of any kind
  2135. ; Params:
  2136. ; rsi <= Pointer to element 1
  2137. ; rdx <= Pointer to element 2
  2138. ; rdi <= Pointer to result
  2139. ; Modified Registers:
  2140. ; r8, r9, 10, r11, rax, rcx
  2141. ;;;;;;;;;;;;;;;;;;;;;;
  2142. Fr_band:
  2143. mov r8, [rsi]
  2144. mov r9, [rdx]
  2145. bt r8, 63 ; Check if is short first operand
  2146. jc and_l1
  2147. bt r9, 63 ; Check if is short second operand
  2148. jc and_s1l2
  2149. and_s1s2:
  2150. cmp r8d, 0
  2151. js tmp_13
  2152. cmp r9d, 0
  2153. js tmp_13
  2154. xor rdx, rdx ; both ops are positive so do the op and return
  2155. mov edx, r8d
  2156. and edx, r9d
  2157. mov [rdi], rdx ; not necessary to adjust so just save and return
  2158. ret
  2159. tmp_13:
  2160. mov r11b, 0x80
  2161. shl r11, 56
  2162. mov [rdi], r11
  2163. push rdi
  2164. push rsi
  2165. mov rdi, rdx
  2166. movsx rsi, r9d
  2167. call rawCopyS2L
  2168. mov rdx, rdi
  2169. pop rsi
  2170. pop rdi
  2171. push rdi
  2172. push rdx
  2173. mov rdi, rsi
  2174. movsx rsi, r8d
  2175. call rawCopyS2L
  2176. mov rsi, rdi
  2177. pop rdx
  2178. pop rdi
  2179. mov rax, [rsi + 8]
  2180. and rax, [rdx + 8]
  2181. mov [rdi + 8 ], rax
  2182. mov rax, [rsi + 16]
  2183. and rax, [rdx + 16]
  2184. mov [rdi + 16 ], rax
  2185. mov rax, [rsi + 24]
  2186. and rax, [rdx + 24]
  2187. mov [rdi + 24 ], rax
  2188. mov rax, [rsi + 32]
  2189. and rax, [rdx + 32]
  2190. and rax, [lboMask]
  2191. mov [rdi + 32 ], rax
  2192. ret
  2193. and_l1:
  2194. bt r9, 63 ; Check if is short second operand
  2195. jc and_l1l2
  2196. and_l1s2:
  2197. bt r8, 62 ; check if montgomery first
  2198. jc and_l1ms2
  2199. and_l1ns2:
  2200. mov r11b, 0x80
  2201. shl r11, 56
  2202. mov [rdi], r11
  2203. cmp r9d, 0
  2204. js tmp_14
  2205. movsx rax, r9d
  2206. and rax, [rsi +8]
  2207. mov [rdi+8], rax
  2208. xor rax, rax
  2209. and rax, [rsi + 16];
  2210. mov [rdi + 16 ], rax;
  2211. xor rax, rax
  2212. and rax, [rsi + 24];
  2213. mov [rdi + 24 ], rax;
  2214. xor rax, rax
  2215. and rax, [rsi + 32];
  2216. and rax, [lboMask] ;
  2217. mov [rdi + 32 ], rax;
  2218. ret
  2219. tmp_14:
  2220. push rdi
  2221. push rsi
  2222. mov rdi, rdx
  2223. movsx rsi, r9d
  2224. call rawCopyS2L
  2225. mov rdx, rdi
  2226. pop rsi
  2227. pop rdi
  2228. mov r11b, 0x80
  2229. shl r11, 56
  2230. mov [rdi], r11
  2231. mov rax, [rsi + 8]
  2232. and rax, [rdx + 8]
  2233. mov [rdi + 8 ], rax
  2234. mov rax, [rsi + 16]
  2235. and rax, [rdx + 16]
  2236. mov [rdi + 16 ], rax
  2237. mov rax, [rsi + 24]
  2238. and rax, [rdx + 24]
  2239. mov [rdi + 24 ], rax
  2240. mov rax, [rsi + 32]
  2241. and rax, [rdx + 32]
  2242. and rax, [lboMask]
  2243. mov [rdi + 32 ], rax
  2244. ret
  2245. and_l1ms2:
  2246. mov r11b, 0x80
  2247. shl r11, 56
  2248. mov [rdi], r11
  2249. push r9 ; r9 is used in montgomery so we need to save it
  2250. push rdi
  2251. mov rdi, rsi
  2252. mov rsi, rdx
  2253. call Fr_toNormal
  2254. mov rdx, rsi
  2255. mov rsi, rdi
  2256. pop rdi
  2257. pop r9
  2258. cmp r9d, 0
  2259. js tmp_15
  2260. movsx rax, r9d
  2261. and rax, [rsi +8]
  2262. mov [rdi+8], rax
  2263. xor rax, rax
  2264. and rax, [rsi + 16];
  2265. mov [rdi + 16 ], rax;
  2266. xor rax, rax
  2267. and rax, [rsi + 24];
  2268. mov [rdi + 24 ], rax;
  2269. xor rax, rax
  2270. and rax, [rsi + 32];
  2271. and rax, [lboMask] ;
  2272. mov [rdi + 32 ], rax;
  2273. ret
  2274. tmp_15:
  2275. push rdi
  2276. push rsi
  2277. mov rdi, rdx
  2278. movsx rsi, r9d
  2279. call rawCopyS2L
  2280. mov rdx, rdi
  2281. pop rsi
  2282. pop rdi
  2283. mov r11b, 0x80
  2284. shl r11, 56
  2285. mov [rdi], r11
  2286. mov rax, [rsi + 8]
  2287. and rax, [rdx + 8]
  2288. mov [rdi + 8 ], rax
  2289. mov rax, [rsi + 16]
  2290. and rax, [rdx + 16]
  2291. mov [rdi + 16 ], rax
  2292. mov rax, [rsi + 24]
  2293. and rax, [rdx + 24]
  2294. mov [rdi + 24 ], rax
  2295. mov rax, [rsi + 32]
  2296. and rax, [rdx + 32]
  2297. and rax, [lboMask]
  2298. mov [rdi + 32 ], rax
  2299. ret
  2300. and_s1l2:
  2301. bt r9, 62 ; check if montgomery first
  2302. jc and_s1l2m
  2303. and_s1l2n:
  2304. mov r11b, 0x80
  2305. shl r11, 56
  2306. mov [rdi], r11
  2307. cmp r8d, 0
  2308. js tmp_16
  2309. movsx rax, r8d
  2310. and rax, [rdx +8]
  2311. mov [rdi+8], rax
  2312. xor rax, rax
  2313. and rax, [rdx + 16]
  2314. mov [rdi + 16 ], rax
  2315. xor rax, rax
  2316. and rax, [rdx + 24]
  2317. mov [rdi + 24 ], rax
  2318. xor rax, rax
  2319. and rax, [rdx + 32]
  2320. and rax, [lboMask]
  2321. mov [rdi + 32 ], rax
  2322. ret
  2323. tmp_16:
  2324. push rdi
  2325. push rdx
  2326. mov rdi, rsi
  2327. movsx rsi, r8d
  2328. call rawCopyS2L
  2329. mov rsi, rdi
  2330. pop rdx
  2331. pop rdi
  2332. mov r11b, 0x80
  2333. shl r11, 56
  2334. mov [rdi], r11
  2335. mov rax, [rsi + 8]
  2336. and rax, [rdx + 8]
  2337. mov [rdi + 8 ], rax
  2338. mov rax, [rsi + 16]
  2339. and rax, [rdx + 16]
  2340. mov [rdi + 16 ], rax
  2341. mov rax, [rsi + 24]
  2342. and rax, [rdx + 24]
  2343. mov [rdi + 24 ], rax
  2344. mov rax, [rsi + 32]
  2345. and rax, [rdx + 32]
  2346. and rax, [lboMask]
  2347. mov [rdi + 32 ], rax
  2348. ret
  2349. and_s1l2m:
  2350. mov r11b, 0x80
  2351. shl r11, 56
  2352. mov [rdi], r11
  2353. push r8 ; r8 is used in montgomery so we need to save it
  2354. push rdi
  2355. mov rdi, rdx
  2356. call Fr_toNormal
  2357. mov rdx, rdi
  2358. pop rdi
  2359. pop r8
  2360. cmp r8d, 0
  2361. js tmp_17
  2362. movsx rax, r8d
  2363. and rax, [rdx +8]
  2364. mov [rdi+8], rax
  2365. xor rax, rax
  2366. and rax, [rdx + 16]
  2367. mov [rdi + 16 ], rax
  2368. xor rax, rax
  2369. and rax, [rdx + 24]
  2370. mov [rdi + 24 ], rax
  2371. xor rax, rax
  2372. and rax, [rdx + 32]
  2373. and rax, [lboMask]
  2374. mov [rdi + 32 ], rax
  2375. ret
  2376. tmp_17:
  2377. push rdi
  2378. push rdx
  2379. mov rdi, rsi
  2380. movsx rsi, r8d
  2381. call rawCopyS2L
  2382. mov rsi, rdi
  2383. pop rdx
  2384. pop rdi
  2385. mov r11b, 0x80
  2386. shl r11, 56
  2387. mov [rdi], r11
  2388. mov rax, [rsi + 8]
  2389. and rax, [rdx + 8]
  2390. mov [rdi + 8 ], rax
  2391. mov rax, [rsi + 16]
  2392. and rax, [rdx + 16]
  2393. mov [rdi + 16 ], rax
  2394. mov rax, [rsi + 24]
  2395. and rax, [rdx + 24]
  2396. mov [rdi + 24 ], rax
  2397. mov rax, [rsi + 32]
  2398. and rax, [rdx + 32]
  2399. and rax, [lboMask]
  2400. mov [rdi + 32 ], rax
  2401. ret
  2402. and_l1l2:
  2403. bt r8, 62 ; check if montgomery first
  2404. jc and_l1ml2
  2405. bt r9, 62 ; check if montgomery first
  2406. jc and_l1nl2m
  2407. and_l1nl2n:
  2408. mov r11b, 0x80
  2409. shl r11, 56
  2410. mov [rdi], r11
  2411. mov rax, [rsi + 8]
  2412. and rax, [rdx + 8]
  2413. mov [rdi + 8 ], rax
  2414. mov rax, [rsi + 16]
  2415. and rax, [rdx + 16]
  2416. mov [rdi + 16 ], rax
  2417. mov rax, [rsi + 24]
  2418. and rax, [rdx + 24]
  2419. mov [rdi + 24 ], rax
  2420. mov rax, [rsi + 32]
  2421. and rax, [rdx + 32]
  2422. and rax, [lboMask]
  2423. mov [rdi + 32 ], rax
  2424. ret
  2425. and_l1nl2m:
  2426. mov r11b, 0x80
  2427. shl r11, 56
  2428. mov [rdi], r11
  2429. push rdi
  2430. mov rdi, rdx
  2431. call Fr_toNormal
  2432. mov rdx, rdi
  2433. pop rdi
  2434. mov rax, [rsi + 8]
  2435. and rax, [rdx + 8]
  2436. mov [rdi + 8 ], rax
  2437. mov rax, [rsi + 16]
  2438. and rax, [rdx + 16]
  2439. mov [rdi + 16 ], rax
  2440. mov rax, [rsi + 24]
  2441. and rax, [rdx + 24]
  2442. mov [rdi + 24 ], rax
  2443. mov rax, [rsi + 32]
  2444. and rax, [rdx + 32]
  2445. and rax, [lboMask]
  2446. mov [rdi + 32 ], rax
  2447. ret
  2448. and_l1ml2:
  2449. bt r9, 62 ; check if montgomery first
  2450. jc and_l1ml2m
  2451. and_l1ml2n:
  2452. mov r11b, 0x80
  2453. shl r11, 56
  2454. mov [rdi], r11
  2455. push rdi
  2456. mov rdi, rsi
  2457. mov rsi, rdx
  2458. call Fr_toNormal
  2459. mov rdx, rsi
  2460. mov rsi, rdi
  2461. pop rdi
  2462. mov rax, [rsi + 8]
  2463. and rax, [rdx + 8]
  2464. mov [rdi + 8 ], rax
  2465. mov rax, [rsi + 16]
  2466. and rax, [rdx + 16]
  2467. mov [rdi + 16 ], rax
  2468. mov rax, [rsi + 24]
  2469. and rax, [rdx + 24]
  2470. mov [rdi + 24 ], rax
  2471. mov rax, [rsi + 32]
  2472. and rax, [rdx + 32]
  2473. and rax, [lboMask]
  2474. mov [rdi + 32 ], rax
  2475. ret
  2476. and_l1ml2m:
  2477. mov r11b, 0x80
  2478. shl r11, 56
  2479. mov [rdi], r11
  2480. push rdi
  2481. mov rdi, rsi
  2482. mov rsi, rdx
  2483. call Fr_toNormal
  2484. mov rdx, rsi
  2485. mov rsi, rdi
  2486. pop rdi
  2487. push rdi
  2488. mov rdi, rdx
  2489. call Fr_toNormal
  2490. mov rdx, rdi
  2491. pop rdi
  2492. mov rax, [rsi + 8]
  2493. and rax, [rdx + 8]
  2494. mov [rdi + 8 ], rax
  2495. mov rax, [rsi + 16]
  2496. and rax, [rdx + 16]
  2497. mov [rdi + 16 ], rax
  2498. mov rax, [rsi + 24]
  2499. and rax, [rdx + 24]
  2500. mov [rdi + 24 ], rax
  2501. mov rax, [rsi + 32]
  2502. and rax, [rdx + 32]
  2503. and rax, [lboMask]
  2504. mov [rdi + 32 ], rax
  2505. ret
  2506. ;;;;;;;;;;;;;;;;;;;;;;
  2507. ; bor
  2508. ;;;;;;;;;;;;;;;;;;;;;;
  2509. ; Adds two elements of any kind
  2510. ; Params:
  2511. ; rsi <= Pointer to element 1
  2512. ; rdx <= Pointer to element 2
  2513. ; rdi <= Pointer to result
  2514. ; Modified Registers:
  2515. ; r8, r9, 10, r11, rax, rcx
  2516. ;;;;;;;;;;;;;;;;;;;;;;
  2517. Fr_bor:
  2518. mov r8, [rsi]
  2519. mov r9, [rdx]
  2520. bt r8, 63 ; Check if is short first operand
  2521. jc or_l1
  2522. bt r9, 63 ; Check if is short second operand
  2523. jc or_s1l2
  2524. or_s1s2:
  2525. cmp r8d, 0
  2526. js tmp_18
  2527. cmp r9d, 0
  2528. js tmp_18
  2529. xor rdx, rdx ; both ops are positive so do the op and return
  2530. mov edx, r8d
  2531. or edx, r9d
  2532. mov [rdi], rdx ; not necessary to adjust so just save and return
  2533. ret
  2534. tmp_18:
  2535. mov r11b, 0x80
  2536. shl r11, 56
  2537. mov [rdi], r11
  2538. push rdi
  2539. push rsi
  2540. mov rdi, rdx
  2541. movsx rsi, r9d
  2542. call rawCopyS2L
  2543. mov rdx, rdi
  2544. pop rsi
  2545. pop rdi
  2546. push rdi
  2547. push rdx
  2548. mov rdi, rsi
  2549. movsx rsi, r8d
  2550. call rawCopyS2L
  2551. mov rsi, rdi
  2552. pop rdx
  2553. pop rdi
  2554. mov rax, [rsi + 8]
  2555. or rax, [rdx + 8]
  2556. mov [rdi + 8 ], rax
  2557. mov rax, [rsi + 16]
  2558. or rax, [rdx + 16]
  2559. mov [rdi + 16 ], rax
  2560. mov rax, [rsi + 24]
  2561. or rax, [rdx + 24]
  2562. mov [rdi + 24 ], rax
  2563. mov rax, [rsi + 32]
  2564. or rax, [rdx + 32]
  2565. and rax, [lboMask]
  2566. mov [rdi + 32 ], rax
  2567. ret
  2568. or_l1:
  2569. bt r9, 63 ; Check if is short second operand
  2570. jc or_l1l2
  2571. or_l1s2:
  2572. bt r8, 62 ; check if montgomery first
  2573. jc or_l1ms2
  2574. or_l1ns2:
  2575. mov r11b, 0x80
  2576. shl r11, 56
  2577. mov [rdi], r11
  2578. cmp r9d, 0
  2579. js tmp_19
  2580. movsx rax, r9d
  2581. or rax, [rsi +8]
  2582. mov [rdi+8], rax
  2583. xor rax, rax
  2584. or rax, [rsi + 16];
  2585. mov [rdi + 16 ], rax;
  2586. xor rax, rax
  2587. or rax, [rsi + 24];
  2588. mov [rdi + 24 ], rax;
  2589. xor rax, rax
  2590. or rax, [rsi + 32];
  2591. and rax, [lboMask] ;
  2592. mov [rdi + 32 ], rax;
  2593. ret
  2594. tmp_19:
  2595. push rdi
  2596. push rsi
  2597. mov rdi, rdx
  2598. movsx rsi, r9d
  2599. call rawCopyS2L
  2600. mov rdx, rdi
  2601. pop rsi
  2602. pop rdi
  2603. mov r11b, 0x80
  2604. shl r11, 56
  2605. mov [rdi], r11
  2606. mov rax, [rsi + 8]
  2607. or rax, [rdx + 8]
  2608. mov [rdi + 8 ], rax
  2609. mov rax, [rsi + 16]
  2610. or rax, [rdx + 16]
  2611. mov [rdi + 16 ], rax
  2612. mov rax, [rsi + 24]
  2613. or rax, [rdx + 24]
  2614. mov [rdi + 24 ], rax
  2615. mov rax, [rsi + 32]
  2616. or rax, [rdx + 32]
  2617. and rax, [lboMask]
  2618. mov [rdi + 32 ], rax
  2619. ret
  2620. or_l1ms2:
  2621. mov r11b, 0x80
  2622. shl r11, 56
  2623. mov [rdi], r11
  2624. push r9 ; r9 is used in montgomery so we need to save it
  2625. push rdi
  2626. mov rdi, rsi
  2627. mov rsi, rdx
  2628. call Fr_toNormal
  2629. mov rdx, rsi
  2630. mov rsi, rdi
  2631. pop rdi
  2632. pop r9
  2633. cmp r9d, 0
  2634. js tmp_20
  2635. movsx rax, r9d
  2636. or rax, [rsi +8]
  2637. mov [rdi+8], rax
  2638. xor rax, rax
  2639. or rax, [rsi + 16];
  2640. mov [rdi + 16 ], rax;
  2641. xor rax, rax
  2642. or rax, [rsi + 24];
  2643. mov [rdi + 24 ], rax;
  2644. xor rax, rax
  2645. or rax, [rsi + 32];
  2646. and rax, [lboMask] ;
  2647. mov [rdi + 32 ], rax;
  2648. ret
  2649. tmp_20:
  2650. push rdi
  2651. push rsi
  2652. mov rdi, rdx
  2653. movsx rsi, r9d
  2654. call rawCopyS2L
  2655. mov rdx, rdi
  2656. pop rsi
  2657. pop rdi
  2658. mov r11b, 0x80
  2659. shl r11, 56
  2660. mov [rdi], r11
  2661. mov rax, [rsi + 8]
  2662. or rax, [rdx + 8]
  2663. mov [rdi + 8 ], rax
  2664. mov rax, [rsi + 16]
  2665. or rax, [rdx + 16]
  2666. mov [rdi + 16 ], rax
  2667. mov rax, [rsi + 24]
  2668. or rax, [rdx + 24]
  2669. mov [rdi + 24 ], rax
  2670. mov rax, [rsi + 32]
  2671. or rax, [rdx + 32]
  2672. and rax, [lboMask]
  2673. mov [rdi + 32 ], rax
  2674. ret
  2675. or_s1l2:
  2676. bt r9, 62 ; check if montgomery first
  2677. jc or_s1l2m
  2678. or_s1l2n:
  2679. mov r11b, 0x80
  2680. shl r11, 56
  2681. mov [rdi], r11
  2682. cmp r8d, 0
  2683. js tmp_21
  2684. movsx rax, r8d
  2685. or rax, [rdx +8]
  2686. mov [rdi+8], rax
  2687. xor rax, rax
  2688. or rax, [rdx + 16]
  2689. mov [rdi + 16 ], rax
  2690. xor rax, rax
  2691. or rax, [rdx + 24]
  2692. mov [rdi + 24 ], rax
  2693. xor rax, rax
  2694. or rax, [rdx + 32]
  2695. and rax, [lboMask]
  2696. mov [rdi + 32 ], rax
  2697. ret
  2698. tmp_21:
  2699. push rdi
  2700. push rdx
  2701. mov rdi, rsi
  2702. movsx rsi, r8d
  2703. call rawCopyS2L
  2704. mov rsi, rdi
  2705. pop rdx
  2706. pop rdi
  2707. mov r11b, 0x80
  2708. shl r11, 56
  2709. mov [rdi], r11
  2710. mov rax, [rsi + 8]
  2711. or rax, [rdx + 8]
  2712. mov [rdi + 8 ], rax
  2713. mov rax, [rsi + 16]
  2714. or rax, [rdx + 16]
  2715. mov [rdi + 16 ], rax
  2716. mov rax, [rsi + 24]
  2717. or rax, [rdx + 24]
  2718. mov [rdi + 24 ], rax
  2719. mov rax, [rsi + 32]
  2720. or rax, [rdx + 32]
  2721. and rax, [lboMask]
  2722. mov [rdi + 32 ], rax
  2723. ret
  2724. or_s1l2m:
  2725. mov r11b, 0x80
  2726. shl r11, 56
  2727. mov [rdi], r11
  2728. push r8 ; r8 is used in montgomery so we need to save it
  2729. push rdi
  2730. mov rdi, rdx
  2731. call Fr_toNormal
  2732. mov rdx, rdi
  2733. pop rdi
  2734. pop r8
  2735. cmp r8d, 0
  2736. js tmp_22
  2737. movsx rax, r8d
  2738. or rax, [rdx +8]
  2739. mov [rdi+8], rax
  2740. xor rax, rax
  2741. or rax, [rdx + 16]
  2742. mov [rdi + 16 ], rax
  2743. xor rax, rax
  2744. or rax, [rdx + 24]
  2745. mov [rdi + 24 ], rax
  2746. xor rax, rax
  2747. or rax, [rdx + 32]
  2748. and rax, [lboMask]
  2749. mov [rdi + 32 ], rax
  2750. ret
  2751. tmp_22:
  2752. push rdi
  2753. push rdx
  2754. mov rdi, rsi
  2755. movsx rsi, r8d
  2756. call rawCopyS2L
  2757. mov rsi, rdi
  2758. pop rdx
  2759. pop rdi
  2760. mov r11b, 0x80
  2761. shl r11, 56
  2762. mov [rdi], r11
  2763. mov rax, [rsi + 8]
  2764. or rax, [rdx + 8]
  2765. mov [rdi + 8 ], rax
  2766. mov rax, [rsi + 16]
  2767. or rax, [rdx + 16]
  2768. mov [rdi + 16 ], rax
  2769. mov rax, [rsi + 24]
  2770. or rax, [rdx + 24]
  2771. mov [rdi + 24 ], rax
  2772. mov rax, [rsi + 32]
  2773. or rax, [rdx + 32]
  2774. and rax, [lboMask]
  2775. mov [rdi + 32 ], rax
  2776. ret
  2777. or_l1l2:
  2778. bt r8, 62 ; check if montgomery first
  2779. jc or_l1ml2
  2780. bt r9, 62 ; check if montgomery first
  2781. jc or_l1nl2m
  2782. or_l1nl2n:
  2783. mov r11b, 0x80
  2784. shl r11, 56
  2785. mov [rdi], r11
  2786. mov rax, [rsi + 8]
  2787. or rax, [rdx + 8]
  2788. mov [rdi + 8 ], rax
  2789. mov rax, [rsi + 16]
  2790. or rax, [rdx + 16]
  2791. mov [rdi + 16 ], rax
  2792. mov rax, [rsi + 24]
  2793. or rax, [rdx + 24]
  2794. mov [rdi + 24 ], rax
  2795. mov rax, [rsi + 32]
  2796. or rax, [rdx + 32]
  2797. and rax, [lboMask]
  2798. mov [rdi + 32 ], rax
  2799. ret
  2800. or_l1nl2m:
  2801. mov r11b, 0x80
  2802. shl r11, 56
  2803. mov [rdi], r11
  2804. push rdi
  2805. mov rdi, rdx
  2806. call Fr_toNormal
  2807. mov rdx, rdi
  2808. pop rdi
  2809. mov rax, [rsi + 8]
  2810. or rax, [rdx + 8]
  2811. mov [rdi + 8 ], rax
  2812. mov rax, [rsi + 16]
  2813. or rax, [rdx + 16]
  2814. mov [rdi + 16 ], rax
  2815. mov rax, [rsi + 24]
  2816. or rax, [rdx + 24]
  2817. mov [rdi + 24 ], rax
  2818. mov rax, [rsi + 32]
  2819. or rax, [rdx + 32]
  2820. and rax, [lboMask]
  2821. mov [rdi + 32 ], rax
  2822. ret
  2823. or_l1ml2:
  2824. bt r9, 62 ; check if montgomery first
  2825. jc or_l1ml2m
  2826. or_l1ml2n:
  2827. mov r11b, 0x80
  2828. shl r11, 56
  2829. mov [rdi], r11
  2830. push rdi
  2831. mov rdi, rsi
  2832. mov rsi, rdx
  2833. call Fr_toNormal
  2834. mov rdx, rsi
  2835. mov rsi, rdi
  2836. pop rdi
  2837. mov rax, [rsi + 8]
  2838. or rax, [rdx + 8]
  2839. mov [rdi + 8 ], rax
  2840. mov rax, [rsi + 16]
  2841. or rax, [rdx + 16]
  2842. mov [rdi + 16 ], rax
  2843. mov rax, [rsi + 24]
  2844. or rax, [rdx + 24]
  2845. mov [rdi + 24 ], rax
  2846. mov rax, [rsi + 32]
  2847. or rax, [rdx + 32]
  2848. and rax, [lboMask]
  2849. mov [rdi + 32 ], rax
  2850. ret
  2851. or_l1ml2m:
  2852. mov r11b, 0x80
  2853. shl r11, 56
  2854. mov [rdi], r11
  2855. push rdi
  2856. mov rdi, rsi
  2857. mov rsi, rdx
  2858. call Fr_toNormal
  2859. mov rdx, rsi
  2860. mov rsi, rdi
  2861. pop rdi
  2862. push rdi
  2863. mov rdi, rdx
  2864. call Fr_toNormal
  2865. mov rdx, rdi
  2866. pop rdi
  2867. mov rax, [rsi + 8]
  2868. or rax, [rdx + 8]
  2869. mov [rdi + 8 ], rax
  2870. mov rax, [rsi + 16]
  2871. or rax, [rdx + 16]
  2872. mov [rdi + 16 ], rax
  2873. mov rax, [rsi + 24]
  2874. or rax, [rdx + 24]
  2875. mov [rdi + 24 ], rax
  2876. mov rax, [rsi + 32]
  2877. or rax, [rdx + 32]
  2878. and rax, [lboMask]
  2879. mov [rdi + 32 ], rax
  2880. ret
  2881. ;;;;;;;;;;;;;;;;;;;;;;
  2882. ; bxor
  2883. ;;;;;;;;;;;;;;;;;;;;;;
  2884. ; Adds two elements of any kind
  2885. ; Params:
  2886. ; rsi <= Pointer to element 1
  2887. ; rdx <= Pointer to element 2
  2888. ; rdi <= Pointer to result
  2889. ; Modified Registers:
  2890. ; r8, r9, 10, r11, rax, rcx
  2891. ;;;;;;;;;;;;;;;;;;;;;;
  2892. Fr_bxor:
  2893. mov r8, [rsi]
  2894. mov r9, [rdx]
  2895. bt r8, 63 ; Check if is short first operand
  2896. jc xor_l1
  2897. bt r9, 63 ; Check if is short second operand
  2898. jc xor_s1l2
  2899. xor_s1s2:
  2900. cmp r8d, 0
  2901. js tmp_23
  2902. cmp r9d, 0
  2903. js tmp_23
  2904. xor rdx, rdx ; both ops are positive so do the op and return
  2905. mov edx, r8d
  2906. xor edx, r9d
  2907. mov [rdi], rdx ; not necessary to adjust so just save and return
  2908. ret
  2909. tmp_23:
  2910. mov r11b, 0x80
  2911. shl r11, 56
  2912. mov [rdi], r11
  2913. push rdi
  2914. push rsi
  2915. mov rdi, rdx
  2916. movsx rsi, r9d
  2917. call rawCopyS2L
  2918. mov rdx, rdi
  2919. pop rsi
  2920. pop rdi
  2921. push rdi
  2922. push rdx
  2923. mov rdi, rsi
  2924. movsx rsi, r8d
  2925. call rawCopyS2L
  2926. mov rsi, rdi
  2927. pop rdx
  2928. pop rdi
  2929. mov rax, [rsi + 8]
  2930. xor rax, [rdx + 8]
  2931. mov [rdi + 8 ], rax
  2932. mov rax, [rsi + 16]
  2933. xor rax, [rdx + 16]
  2934. mov [rdi + 16 ], rax
  2935. mov rax, [rsi + 24]
  2936. xor rax, [rdx + 24]
  2937. mov [rdi + 24 ], rax
  2938. mov rax, [rsi + 32]
  2939. xor rax, [rdx + 32]
  2940. and rax, [lboMask]
  2941. mov [rdi + 32 ], rax
  2942. ret
  2943. xor_l1:
  2944. bt r9, 63 ; Check if is short second operand
  2945. jc xor_l1l2
  2946. xor_l1s2:
  2947. bt r8, 62 ; check if montgomery first
  2948. jc xor_l1ms2
  2949. xor_l1ns2:
  2950. mov r11b, 0x80
  2951. shl r11, 56
  2952. mov [rdi], r11
  2953. cmp r9d, 0
  2954. js tmp_24
  2955. movsx rax, r9d
  2956. xor rax, [rsi +8]
  2957. mov [rdi+8], rax
  2958. xor rax, rax
  2959. xor rax, [rsi + 16];
  2960. mov [rdi + 16 ], rax;
  2961. xor rax, rax
  2962. xor rax, [rsi + 24];
  2963. mov [rdi + 24 ], rax;
  2964. xor rax, rax
  2965. xor rax, [rsi + 32];
  2966. and rax, [lboMask] ;
  2967. mov [rdi + 32 ], rax;
  2968. ret
  2969. tmp_24:
  2970. push rdi
  2971. push rsi
  2972. mov rdi, rdx
  2973. movsx rsi, r9d
  2974. call rawCopyS2L
  2975. mov rdx, rdi
  2976. pop rsi
  2977. pop rdi
  2978. mov r11b, 0x80
  2979. shl r11, 56
  2980. mov [rdi], r11
  2981. mov rax, [rsi + 8]
  2982. xor rax, [rdx + 8]
  2983. mov [rdi + 8 ], rax
  2984. mov rax, [rsi + 16]
  2985. xor rax, [rdx + 16]
  2986. mov [rdi + 16 ], rax
  2987. mov rax, [rsi + 24]
  2988. xor rax, [rdx + 24]
  2989. mov [rdi + 24 ], rax
  2990. mov rax, [rsi + 32]
  2991. xor rax, [rdx + 32]
  2992. and rax, [lboMask]
  2993. mov [rdi + 32 ], rax
  2994. ret
  2995. xor_l1ms2:
  2996. mov r11b, 0x80
  2997. shl r11, 56
  2998. mov [rdi], r11
  2999. push r9 ; r9 is used in montgomery so we need to save it
  3000. push rdi
  3001. mov rdi, rsi
  3002. mov rsi, rdx
  3003. call Fr_toNormal
  3004. mov rdx, rsi
  3005. mov rsi, rdi
  3006. pop rdi
  3007. pop r9
  3008. cmp r9d, 0
  3009. js tmp_25
  3010. movsx rax, r9d
  3011. xor rax, [rsi +8]
  3012. mov [rdi+8], rax
  3013. xor rax, rax
  3014. xor rax, [rsi + 16];
  3015. mov [rdi + 16 ], rax;
  3016. xor rax, rax
  3017. xor rax, [rsi + 24];
  3018. mov [rdi + 24 ], rax;
  3019. xor rax, rax
  3020. xor rax, [rsi + 32];
  3021. and rax, [lboMask] ;
  3022. mov [rdi + 32 ], rax;
  3023. ret
  3024. tmp_25:
  3025. push rdi
  3026. push rsi
  3027. mov rdi, rdx
  3028. movsx rsi, r9d
  3029. call rawCopyS2L
  3030. mov rdx, rdi
  3031. pop rsi
  3032. pop rdi
  3033. mov r11b, 0x80
  3034. shl r11, 56
  3035. mov [rdi], r11
  3036. mov rax, [rsi + 8]
  3037. xor rax, [rdx + 8]
  3038. mov [rdi + 8 ], rax
  3039. mov rax, [rsi + 16]
  3040. xor rax, [rdx + 16]
  3041. mov [rdi + 16 ], rax
  3042. mov rax, [rsi + 24]
  3043. xor rax, [rdx + 24]
  3044. mov [rdi + 24 ], rax
  3045. mov rax, [rsi + 32]
  3046. xor rax, [rdx + 32]
  3047. and rax, [lboMask]
  3048. mov [rdi + 32 ], rax
  3049. ret
  3050. xor_s1l2:
  3051. bt r9, 62 ; check if montgomery first
  3052. jc xor_s1l2m
  3053. xor_s1l2n:
  3054. mov r11b, 0x80
  3055. shl r11, 56
  3056. mov [rdi], r11
  3057. cmp r8d, 0
  3058. js tmp_26
  3059. movsx rax, r8d
  3060. xor rax, [rdx +8]
  3061. mov [rdi+8], rax
  3062. xor rax, rax
  3063. xor rax, [rdx + 16]
  3064. mov [rdi + 16 ], rax
  3065. xor rax, rax
  3066. xor rax, [rdx + 24]
  3067. mov [rdi + 24 ], rax
  3068. xor rax, rax
  3069. xor rax, [rdx + 32]
  3070. and rax, [lboMask]
  3071. mov [rdi + 32 ], rax
  3072. ret
  3073. tmp_26:
  3074. push rdi
  3075. push rdx
  3076. mov rdi, rsi
  3077. movsx rsi, r8d
  3078. call rawCopyS2L
  3079. mov rsi, rdi
  3080. pop rdx
  3081. pop rdi
  3082. mov r11b, 0x80
  3083. shl r11, 56
  3084. mov [rdi], r11
  3085. mov rax, [rsi + 8]
  3086. xor rax, [rdx + 8]
  3087. mov [rdi + 8 ], rax
  3088. mov rax, [rsi + 16]
  3089. xor rax, [rdx + 16]
  3090. mov [rdi + 16 ], rax
  3091. mov rax, [rsi + 24]
  3092. xor rax, [rdx + 24]
  3093. mov [rdi + 24 ], rax
  3094. mov rax, [rsi + 32]
  3095. xor rax, [rdx + 32]
  3096. and rax, [lboMask]
  3097. mov [rdi + 32 ], rax
  3098. ret
  3099. xor_s1l2m:
  3100. mov r11b, 0x80
  3101. shl r11, 56
  3102. mov [rdi], r11
  3103. push r8 ; r8 is used in montgomery so we need to save it
  3104. push rdi
  3105. mov rdi, rdx
  3106. call Fr_toNormal
  3107. mov rdx, rdi
  3108. pop rdi
  3109. pop r8
  3110. cmp r8d, 0
  3111. js tmp_27
  3112. movsx rax, r8d
  3113. xor rax, [rdx +8]
  3114. mov [rdi+8], rax
  3115. xor rax, rax
  3116. xor rax, [rdx + 16]
  3117. mov [rdi + 16 ], rax
  3118. xor rax, rax
  3119. xor rax, [rdx + 24]
  3120. mov [rdi + 24 ], rax
  3121. xor rax, rax
  3122. xor rax, [rdx + 32]
  3123. and rax, [lboMask]
  3124. mov [rdi + 32 ], rax
  3125. ret
  3126. tmp_27:
  3127. push rdi
  3128. push rdx
  3129. mov rdi, rsi
  3130. movsx rsi, r8d
  3131. call rawCopyS2L
  3132. mov rsi, rdi
  3133. pop rdx
  3134. pop rdi
  3135. mov r11b, 0x80
  3136. shl r11, 56
  3137. mov [rdi], r11
  3138. mov rax, [rsi + 8]
  3139. xor rax, [rdx + 8]
  3140. mov [rdi + 8 ], rax
  3141. mov rax, [rsi + 16]
  3142. xor rax, [rdx + 16]
  3143. mov [rdi + 16 ], rax
  3144. mov rax, [rsi + 24]
  3145. xor rax, [rdx + 24]
  3146. mov [rdi + 24 ], rax
  3147. mov rax, [rsi + 32]
  3148. xor rax, [rdx + 32]
  3149. and rax, [lboMask]
  3150. mov [rdi + 32 ], rax
  3151. ret
  3152. xor_l1l2:
  3153. bt r8, 62 ; check if montgomery first
  3154. jc xor_l1ml2
  3155. bt r9, 62 ; check if montgomery first
  3156. jc xor_l1nl2m
  3157. xor_l1nl2n:
  3158. mov r11b, 0x80
  3159. shl r11, 56
  3160. mov [rdi], r11
  3161. mov rax, [rsi + 8]
  3162. xor rax, [rdx + 8]
  3163. mov [rdi + 8 ], rax
  3164. mov rax, [rsi + 16]
  3165. xor rax, [rdx + 16]
  3166. mov [rdi + 16 ], rax
  3167. mov rax, [rsi + 24]
  3168. xor rax, [rdx + 24]
  3169. mov [rdi + 24 ], rax
  3170. mov rax, [rsi + 32]
  3171. xor rax, [rdx + 32]
  3172. and rax, [lboMask]
  3173. mov [rdi + 32 ], rax
  3174. ret
  3175. xor_l1nl2m:
  3176. mov r11b, 0x80
  3177. shl r11, 56
  3178. mov [rdi], r11
  3179. push rdi
  3180. mov rdi, rdx
  3181. call Fr_toNormal
  3182. mov rdx, rdi
  3183. pop rdi
  3184. mov rax, [rsi + 8]
  3185. xor rax, [rdx + 8]
  3186. mov [rdi + 8 ], rax
  3187. mov rax, [rsi + 16]
  3188. xor rax, [rdx + 16]
  3189. mov [rdi + 16 ], rax
  3190. mov rax, [rsi + 24]
  3191. xor rax, [rdx + 24]
  3192. mov [rdi + 24 ], rax
  3193. mov rax, [rsi + 32]
  3194. xor rax, [rdx + 32]
  3195. and rax, [lboMask]
  3196. mov [rdi + 32 ], rax
  3197. ret
  3198. xor_l1ml2:
  3199. bt r9, 62 ; check if montgomery first
  3200. jc xor_l1ml2m
  3201. xor_l1ml2n:
  3202. mov r11b, 0x80
  3203. shl r11, 56
  3204. mov [rdi], r11
  3205. push rdi
  3206. mov rdi, rsi
  3207. mov rsi, rdx
  3208. call Fr_toNormal
  3209. mov rdx, rsi
  3210. mov rsi, rdi
  3211. pop rdi
  3212. mov rax, [rsi + 8]
  3213. xor rax, [rdx + 8]
  3214. mov [rdi + 8 ], rax
  3215. mov rax, [rsi + 16]
  3216. xor rax, [rdx + 16]
  3217. mov [rdi + 16 ], rax
  3218. mov rax, [rsi + 24]
  3219. xor rax, [rdx + 24]
  3220. mov [rdi + 24 ], rax
  3221. mov rax, [rsi + 32]
  3222. xor rax, [rdx + 32]
  3223. and rax, [lboMask]
  3224. mov [rdi + 32 ], rax
  3225. ret
  3226. xor_l1ml2m:
  3227. mov r11b, 0x80
  3228. shl r11, 56
  3229. mov [rdi], r11
  3230. push rdi
  3231. mov rdi, rsi
  3232. mov rsi, rdx
  3233. call Fr_toNormal
  3234. mov rdx, rsi
  3235. mov rsi, rdi
  3236. pop rdi
  3237. push rdi
  3238. mov rdi, rdx
  3239. call Fr_toNormal
  3240. mov rdx, rdi
  3241. pop rdi
  3242. mov rax, [rsi + 8]
  3243. xor rax, [rdx + 8]
  3244. mov [rdi + 8 ], rax
  3245. mov rax, [rsi + 16]
  3246. xor rax, [rdx + 16]
  3247. mov [rdi + 16 ], rax
  3248. mov rax, [rsi + 24]
  3249. xor rax, [rdx + 24]
  3250. mov [rdi + 24 ], rax
  3251. mov rax, [rsi + 32]
  3252. xor rax, [rdx + 32]
  3253. and rax, [lboMask]
  3254. mov [rdi + 32 ], rax
  3255. ret
  3256. ;;;;;;;;;;;;;;;;;;;;;;
  3257. ; bnot
  3258. ;;;;;;;;;;;;;;;;;;;;;;
  3259. ; Adds two elements of any kind
  3260. ; Params:
  3261. ; rsi <= Pointer to element 1
  3262. ; rdi <= Pointer to result
  3263. ; Modified Registers:
  3264. ; r8, r9, 10, r11, rax, rcx
  3265. ;;;;;;;;;;;;;;;;;;;;;;
  3266. Fr_bnot:
  3267. mov r11b, 0x80
  3268. shl r11, 56
  3269. mov [rdi], r11
  3270. mov r8, [rsi]
  3271. bt r8, 63 ; Check if is long operand
  3272. jc bnot_l1
  3273. bnot_s:
  3274. push rdi
  3275. push rdx
  3276. mov rdi, rsi
  3277. movsx rsi, r8d
  3278. call rawCopyS2L
  3279. mov rsi, rdi
  3280. pop rdx
  3281. pop rdi
  3282. jmp bnot_l1n
  3283. bnot_l1:
  3284. bt r8, 62 ; check if montgomery first
  3285. jnc bnot_l1n
  3286. bnot_l1m:
  3287. push rdi
  3288. mov rdi, rsi
  3289. mov rsi, rdx
  3290. call Fr_toNormal
  3291. mov rdx, rsi
  3292. mov rsi, rdi
  3293. pop rdi
  3294. bnot_l1n:
  3295. mov rax, [rsi + 8]
  3296. not rax
  3297. mov [rdi + 8], rax
  3298. mov rax, [rsi + 16]
  3299. not rax
  3300. mov [rdi + 16], rax
  3301. mov rax, [rsi + 24]
  3302. not rax
  3303. mov [rdi + 24], rax
  3304. mov rax, [rsi + 32]
  3305. not rax
  3306. and rax, [lboMask]
  3307. mov [rdi + 32], rax
  3308. ret
  3309. ;;;;;;;;;;;;;;;;;;;;;;
  3310. ; eq
  3311. ;;;;;;;;;;;;;;;;;;;;;;
  3312. ; Adds two elements of any kind
  3313. ; Params:
  3314. ; rsi <= Pointer to element 1
  3315. ; rdx <= Pointer to element 2
  3316. ; rdi <= Pointer to result can be zero or one.
  3317. ; Modified Registers:
  3318. ; r8, r9, 10, r11, rax, rcx
  3319. ;;;;;;;;;;;;;;;;;;;;;;
  3320. Fr_eq:
  3321. sub rsp, 40 ; Save space for the result of the substraction
  3322. push rdi ; Save rdi
  3323. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3324. call Fr_sub ; Do a substraction
  3325. call Fr_toNormal ; Convert it to normal
  3326. pop rdi
  3327. mov rax, [rsp] ; We already poped do no need to add 8
  3328. bt rax, 63 ; check is result is long
  3329. jc eq_longCmp
  3330. eq_shortCmp:
  3331. cmp eax, 0
  3332. je eq_s_eq
  3333. js eq_s_lt
  3334. eq_s_gt:
  3335. mov qword [rdi], 0
  3336. add rsp, 40
  3337. ret
  3338. eq_s_lt:
  3339. mov qword [rdi], 0
  3340. add rsp, 40
  3341. ret
  3342. eq_s_eq:
  3343. mov qword [rdi], 1
  3344. add rsp, 40
  3345. ret
  3346. eq_longCmp:
  3347. cmp qword [rsp + 32], 0
  3348. jnz eq_neq
  3349. cmp qword [rsp + 24], 0
  3350. jnz eq_neq
  3351. cmp qword [rsp + 16], 0
  3352. jnz eq_neq
  3353. cmp qword [rsp + 8], 0
  3354. jnz eq_neq
  3355. eq_eq:
  3356. mov qword [rdi], 1
  3357. add rsp, 40
  3358. ret
  3359. eq_neq:
  3360. mov qword [rdi], 0
  3361. add rsp, 40
  3362. ret
  3363. ;;;;;;;;;;;;;;;;;;;;;;
  3364. ; neq
  3365. ;;;;;;;;;;;;;;;;;;;;;;
  3366. ; Adds two elements of any kind
  3367. ; Params:
  3368. ; rsi <= Pointer to element 1
  3369. ; rdx <= Pointer to element 2
  3370. ; rdi <= Pointer to result can be zero or one.
  3371. ; Modified Registers:
  3372. ; r8, r9, 10, r11, rax, rcx
  3373. ;;;;;;;;;;;;;;;;;;;;;;
  3374. Fr_neq:
  3375. sub rsp, 40 ; Save space for the result of the substraction
  3376. push rdi ; Save rdi
  3377. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3378. call Fr_sub ; Do a substraction
  3379. call Fr_toNormal ; Convert it to normal
  3380. pop rdi
  3381. mov rax, [rsp] ; We already poped do no need to add 8
  3382. bt rax, 63 ; check is result is long
  3383. jc neq_longCmp
  3384. neq_shortCmp:
  3385. cmp eax, 0
  3386. je neq_s_eq
  3387. js neq_s_lt
  3388. neq_s_gt:
  3389. mov qword [rdi], 1
  3390. add rsp, 40
  3391. ret
  3392. neq_s_lt:
  3393. mov qword [rdi], 1
  3394. add rsp, 40
  3395. ret
  3396. neq_s_eq:
  3397. mov qword [rdi], 0
  3398. add rsp, 40
  3399. ret
  3400. neq_longCmp:
  3401. cmp qword [rsp + 32], 0
  3402. jnz neq_neq
  3403. cmp qword [rsp + 24], 0
  3404. jnz neq_neq
  3405. cmp qword [rsp + 16], 0
  3406. jnz neq_neq
  3407. cmp qword [rsp + 8], 0
  3408. jnz neq_neq
  3409. neq_eq:
  3410. mov qword [rdi], 0
  3411. add rsp, 40
  3412. ret
  3413. neq_neq:
  3414. mov qword [rdi], 1
  3415. add rsp, 40
  3416. ret
  3417. ;;;;;;;;;;;;;;;;;;;;;;
  3418. ; lt
  3419. ;;;;;;;;;;;;;;;;;;;;;;
  3420. ; Adds two elements of any kind
  3421. ; Params:
  3422. ; rsi <= Pointer to element 1
  3423. ; rdx <= Pointer to element 2
  3424. ; rdi <= Pointer to result can be zero or one.
  3425. ; Modified Registers:
  3426. ; r8, r9, 10, r11, rax, rcx
  3427. ;;;;;;;;;;;;;;;;;;;;;;
  3428. Fr_lt:
  3429. sub rsp, 40 ; Save space for the result of the substraction
  3430. push rdi ; Save rdi
  3431. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3432. call Fr_sub ; Do a substraction
  3433. call Fr_toNormal ; Convert it to normal
  3434. pop rdi
  3435. mov rax, [rsp] ; We already poped do no need to add 8
  3436. bt rax, 63 ; check is result is long
  3437. jc lt_longCmp
  3438. lt_shortCmp:
  3439. cmp eax, 0
  3440. je lt_s_eq
  3441. js lt_s_lt
  3442. lt_s_gt:
  3443. mov qword [rdi], 0
  3444. add rsp, 40
  3445. ret
  3446. lt_s_lt:
  3447. mov qword [rdi], 1
  3448. add rsp, 40
  3449. ret
  3450. lt_s_eq:
  3451. mov qword [rdi], 0
  3452. add rsp, 40
  3453. ret
  3454. lt_longCmp:
  3455. cmp qword [rsp + 32], 0
  3456. jnz lt_neq
  3457. cmp qword [rsp + 24], 0
  3458. jnz lt_neq
  3459. cmp qword [rsp + 16], 0
  3460. jnz lt_neq
  3461. cmp qword [rsp + 8], 0
  3462. jnz lt_neq
  3463. lt_eq:
  3464. mov qword [rdi], 0
  3465. add rsp, 40
  3466. ret
  3467. mov rax, [rsp + 32]
  3468. cmp [half + 24], rax ; comare with (q-1)/2
  3469. jc tmp_29 ; half<rax => e1-e2 is neg => e1 < e2
  3470. jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2
  3471. mov rax, [rsp + 24]
  3472. cmp [half + 16], rax ; comare with (q-1)/2
  3473. jc tmp_29 ; half<rax => e1-e2 is neg => e1 < e2
  3474. jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2
  3475. mov rax, [rsp + 16]
  3476. cmp [half + 8], rax ; comare with (q-1)/2
  3477. jc tmp_29 ; half<rax => e1-e2 is neg => e1 < e2
  3478. jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2
  3479. mov rax, [rsp + 8]
  3480. cmp [half + 0], rax ; comare with (q-1)/2
  3481. jc tmp_29 ; half<rax => e1-e2 is neg => e1 < e2
  3482. jnz tmp_28 ; half>rax => e1 -e2 is pos => e1 > e2
  3483. ; half == rax => e1-e2 is pos => e1 > e2
  3484. tmp_28:
  3485. mov qword [rdi], 0
  3486. add rsp, 40
  3487. ret
  3488. tmp_29:
  3489. mov qword [rdi], 1
  3490. add rsp, 40
  3491. ret
  3492. lt_neq:
  3493. mov rax, [rsp + 32]
  3494. cmp [half + 24], rax ; comare with (q-1)/2
  3495. jc tmp_31 ; half<rax => e1-e2 is neg => e1 < e2
  3496. jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2
  3497. mov rax, [rsp + 24]
  3498. cmp [half + 16], rax ; comare with (q-1)/2
  3499. jc tmp_31 ; half<rax => e1-e2 is neg => e1 < e2
  3500. jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2
  3501. mov rax, [rsp + 16]
  3502. cmp [half + 8], rax ; comare with (q-1)/2
  3503. jc tmp_31 ; half<rax => e1-e2 is neg => e1 < e2
  3504. jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2
  3505. mov rax, [rsp + 8]
  3506. cmp [half + 0], rax ; comare with (q-1)/2
  3507. jc tmp_31 ; half<rax => e1-e2 is neg => e1 < e2
  3508. jnz tmp_30 ; half>rax => e1 -e2 is pos => e1 > e2
  3509. ; half == rax => e1-e2 is pos => e1 > e2
  3510. tmp_30:
  3511. mov qword [rdi], 0
  3512. add rsp, 40
  3513. ret
  3514. tmp_31:
  3515. mov qword [rdi], 1
  3516. add rsp, 40
  3517. ret
  3518. ;;;;;;;;;;;;;;;;;;;;;;
  3519. ; gt
  3520. ;;;;;;;;;;;;;;;;;;;;;;
  3521. ; Adds two elements of any kind
  3522. ; Params:
  3523. ; rsi <= Pointer to element 1
  3524. ; rdx <= Pointer to element 2
  3525. ; rdi <= Pointer to result can be zero or one.
  3526. ; Modified Registers:
  3527. ; r8, r9, 10, r11, rax, rcx
  3528. ;;;;;;;;;;;;;;;;;;;;;;
  3529. Fr_gt:
  3530. sub rsp, 40 ; Save space for the result of the substraction
  3531. push rdi ; Save rdi
  3532. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3533. call Fr_sub ; Do a substraction
  3534. call Fr_toNormal ; Convert it to normal
  3535. pop rdi
  3536. mov rax, [rsp] ; We already poped do no need to add 8
  3537. bt rax, 63 ; check is result is long
  3538. jc gt_longCmp
  3539. gt_shortCmp:
  3540. cmp eax, 0
  3541. je gt_s_eq
  3542. js gt_s_lt
  3543. gt_s_gt:
  3544. mov qword [rdi], 1
  3545. add rsp, 40
  3546. ret
  3547. gt_s_lt:
  3548. mov qword [rdi], 0
  3549. add rsp, 40
  3550. ret
  3551. gt_s_eq:
  3552. mov qword [rdi], 0
  3553. add rsp, 40
  3554. ret
  3555. gt_longCmp:
  3556. cmp qword [rsp + 32], 0
  3557. jnz gt_neq
  3558. cmp qword [rsp + 24], 0
  3559. jnz gt_neq
  3560. cmp qword [rsp + 16], 0
  3561. jnz gt_neq
  3562. cmp qword [rsp + 8], 0
  3563. jnz gt_neq
  3564. gt_eq:
  3565. mov qword [rdi], 0
  3566. add rsp, 40
  3567. ret
  3568. mov rax, [rsp + 32]
  3569. cmp [half + 24], rax ; comare with (q-1)/2
  3570. jc tmp_33 ; half<rax => e1-e2 is neg => e1 < e2
  3571. jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2
  3572. mov rax, [rsp + 24]
  3573. cmp [half + 16], rax ; comare with (q-1)/2
  3574. jc tmp_33 ; half<rax => e1-e2 is neg => e1 < e2
  3575. jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2
  3576. mov rax, [rsp + 16]
  3577. cmp [half + 8], rax ; comare with (q-1)/2
  3578. jc tmp_33 ; half<rax => e1-e2 is neg => e1 < e2
  3579. jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2
  3580. mov rax, [rsp + 8]
  3581. cmp [half + 0], rax ; comare with (q-1)/2
  3582. jc tmp_33 ; half<rax => e1-e2 is neg => e1 < e2
  3583. jnz tmp_32 ; half>rax => e1 -e2 is pos => e1 > e2
  3584. ; half == rax => e1-e2 is pos => e1 > e2
  3585. tmp_32:
  3586. mov qword [rdi], 1
  3587. add rsp, 40
  3588. ret
  3589. tmp_33:
  3590. mov qword [rdi], 0
  3591. add rsp, 40
  3592. ret
  3593. gt_neq:
  3594. mov rax, [rsp + 32]
  3595. cmp [half + 24], rax ; comare with (q-1)/2
  3596. jc tmp_35 ; half<rax => e1-e2 is neg => e1 < e2
  3597. jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2
  3598. mov rax, [rsp + 24]
  3599. cmp [half + 16], rax ; comare with (q-1)/2
  3600. jc tmp_35 ; half<rax => e1-e2 is neg => e1 < e2
  3601. jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2
  3602. mov rax, [rsp + 16]
  3603. cmp [half + 8], rax ; comare with (q-1)/2
  3604. jc tmp_35 ; half<rax => e1-e2 is neg => e1 < e2
  3605. jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2
  3606. mov rax, [rsp + 8]
  3607. cmp [half + 0], rax ; comare with (q-1)/2
  3608. jc tmp_35 ; half<rax => e1-e2 is neg => e1 < e2
  3609. jnz tmp_34 ; half>rax => e1 -e2 is pos => e1 > e2
  3610. ; half == rax => e1-e2 is pos => e1 > e2
  3611. tmp_34:
  3612. mov qword [rdi], 1
  3613. add rsp, 40
  3614. ret
  3615. tmp_35:
  3616. mov qword [rdi], 0
  3617. add rsp, 40
  3618. ret
  3619. ;;;;;;;;;;;;;;;;;;;;;;
  3620. ; leq
  3621. ;;;;;;;;;;;;;;;;;;;;;;
  3622. ; Adds two elements of any kind
  3623. ; Params:
  3624. ; rsi <= Pointer to element 1
  3625. ; rdx <= Pointer to element 2
  3626. ; rdi <= Pointer to result can be zero or one.
  3627. ; Modified Registers:
  3628. ; r8, r9, 10, r11, rax, rcx
  3629. ;;;;;;;;;;;;;;;;;;;;;;
  3630. Fr_leq:
  3631. sub rsp, 40 ; Save space for the result of the substraction
  3632. push rdi ; Save rdi
  3633. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3634. call Fr_sub ; Do a substraction
  3635. call Fr_toNormal ; Convert it to normal
  3636. pop rdi
  3637. mov rax, [rsp] ; We already poped do no need to add 8
  3638. bt rax, 63 ; check is result is long
  3639. jc leq_longCmp
  3640. leq_shortCmp:
  3641. cmp eax, 0
  3642. je leq_s_eq
  3643. js leq_s_lt
  3644. leq_s_gt:
  3645. mov qword [rdi], 0
  3646. add rsp, 40
  3647. ret
  3648. leq_s_lt:
  3649. mov qword [rdi], 1
  3650. add rsp, 40
  3651. ret
  3652. leq_s_eq:
  3653. mov qword [rdi], 1
  3654. add rsp, 40
  3655. ret
  3656. leq_longCmp:
  3657. cmp qword [rsp + 32], 0
  3658. jnz leq_neq
  3659. cmp qword [rsp + 24], 0
  3660. jnz leq_neq
  3661. cmp qword [rsp + 16], 0
  3662. jnz leq_neq
  3663. cmp qword [rsp + 8], 0
  3664. jnz leq_neq
  3665. leq_eq:
  3666. mov qword [rdi], 1
  3667. add rsp, 40
  3668. ret
  3669. mov rax, [rsp + 32]
  3670. cmp [half + 24], rax ; comare with (q-1)/2
  3671. jc tmp_37 ; half<rax => e1-e2 is neg => e1 < e2
  3672. jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2
  3673. mov rax, [rsp + 24]
  3674. cmp [half + 16], rax ; comare with (q-1)/2
  3675. jc tmp_37 ; half<rax => e1-e2 is neg => e1 < e2
  3676. jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2
  3677. mov rax, [rsp + 16]
  3678. cmp [half + 8], rax ; comare with (q-1)/2
  3679. jc tmp_37 ; half<rax => e1-e2 is neg => e1 < e2
  3680. jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2
  3681. mov rax, [rsp + 8]
  3682. cmp [half + 0], rax ; comare with (q-1)/2
  3683. jc tmp_37 ; half<rax => e1-e2 is neg => e1 < e2
  3684. jnz tmp_36 ; half>rax => e1 -e2 is pos => e1 > e2
  3685. ; half == rax => e1-e2 is pos => e1 > e2
  3686. tmp_36:
  3687. mov qword [rdi], 0
  3688. add rsp, 40
  3689. ret
  3690. tmp_37:
  3691. mov qword [rdi], 1
  3692. add rsp, 40
  3693. ret
  3694. leq_neq:
  3695. mov rax, [rsp + 32]
  3696. cmp [half + 24], rax ; comare with (q-1)/2
  3697. jc tmp_39 ; half<rax => e1-e2 is neg => e1 < e2
  3698. jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2
  3699. mov rax, [rsp + 24]
  3700. cmp [half + 16], rax ; comare with (q-1)/2
  3701. jc tmp_39 ; half<rax => e1-e2 is neg => e1 < e2
  3702. jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2
  3703. mov rax, [rsp + 16]
  3704. cmp [half + 8], rax ; comare with (q-1)/2
  3705. jc tmp_39 ; half<rax => e1-e2 is neg => e1 < e2
  3706. jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2
  3707. mov rax, [rsp + 8]
  3708. cmp [half + 0], rax ; comare with (q-1)/2
  3709. jc tmp_39 ; half<rax => e1-e2 is neg => e1 < e2
  3710. jnz tmp_38 ; half>rax => e1 -e2 is pos => e1 > e2
  3711. ; half == rax => e1-e2 is pos => e1 > e2
  3712. tmp_38:
  3713. mov qword [rdi], 0
  3714. add rsp, 40
  3715. ret
  3716. tmp_39:
  3717. mov qword [rdi], 1
  3718. add rsp, 40
  3719. ret
  3720. ;;;;;;;;;;;;;;;;;;;;;;
  3721. ; geq
  3722. ;;;;;;;;;;;;;;;;;;;;;;
  3723. ; Adds two elements of any kind
  3724. ; Params:
  3725. ; rsi <= Pointer to element 1
  3726. ; rdx <= Pointer to element 2
  3727. ; rdi <= Pointer to result can be zero or one.
  3728. ; Modified Registers:
  3729. ; r8, r9, 10, r11, rax, rcx
  3730. ;;;;;;;;;;;;;;;;;;;;;;
  3731. Fr_geq:
  3732. sub rsp, 40 ; Save space for the result of the substraction
  3733. push rdi ; Save rdi
  3734. lea rdi, [rsp+8] ; We pushed rdi so we need to add 8
  3735. call Fr_sub ; Do a substraction
  3736. call Fr_toNormal ; Convert it to normal
  3737. pop rdi
  3738. mov rax, [rsp] ; We already poped do no need to add 8
  3739. bt rax, 63 ; check is result is long
  3740. jc geq_longCmp
  3741. geq_shortCmp:
  3742. cmp eax, 0
  3743. je geq_s_eq
  3744. js geq_s_lt
  3745. geq_s_gt:
  3746. mov qword [rdi], 1
  3747. add rsp, 40
  3748. ret
  3749. geq_s_lt:
  3750. mov qword [rdi], 0
  3751. add rsp, 40
  3752. ret
  3753. geq_s_eq:
  3754. mov qword [rdi], 1
  3755. add rsp, 40
  3756. ret
  3757. geq_longCmp:
  3758. cmp qword [rsp + 32], 0
  3759. jnz geq_neq
  3760. cmp qword [rsp + 24], 0
  3761. jnz geq_neq
  3762. cmp qword [rsp + 16], 0
  3763. jnz geq_neq
  3764. cmp qword [rsp + 8], 0
  3765. jnz geq_neq
  3766. geq_eq:
  3767. mov qword [rdi], 1
  3768. add rsp, 40
  3769. ret
  3770. mov rax, [rsp + 32]
  3771. cmp [half + 24], rax ; comare with (q-1)/2
  3772. jc tmp_41 ; half<rax => e1-e2 is neg => e1 < e2
  3773. jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2
  3774. mov rax, [rsp + 24]
  3775. cmp [half + 16], rax ; comare with (q-1)/2
  3776. jc tmp_41 ; half<rax => e1-e2 is neg => e1 < e2
  3777. jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2
  3778. mov rax, [rsp + 16]
  3779. cmp [half + 8], rax ; comare with (q-1)/2
  3780. jc tmp_41 ; half<rax => e1-e2 is neg => e1 < e2
  3781. jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2
  3782. mov rax, [rsp + 8]
  3783. cmp [half + 0], rax ; comare with (q-1)/2
  3784. jc tmp_41 ; half<rax => e1-e2 is neg => e1 < e2
  3785. jnz tmp_40 ; half>rax => e1 -e2 is pos => e1 > e2
  3786. ; half == rax => e1-e2 is pos => e1 > e2
  3787. tmp_40:
  3788. mov qword [rdi], 1
  3789. add rsp, 40
  3790. ret
  3791. tmp_41:
  3792. mov qword [rdi], 0
  3793. add rsp, 40
  3794. ret
  3795. geq_neq:
  3796. mov rax, [rsp + 32]
  3797. cmp [half + 24], rax ; comare with (q-1)/2
  3798. jc tmp_43 ; half<rax => e1-e2 is neg => e1 < e2
  3799. jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2
  3800. mov rax, [rsp + 24]
  3801. cmp [half + 16], rax ; comare with (q-1)/2
  3802. jc tmp_43 ; half<rax => e1-e2 is neg => e1 < e2
  3803. jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2
  3804. mov rax, [rsp + 16]
  3805. cmp [half + 8], rax ; comare with (q-1)/2
  3806. jc tmp_43 ; half<rax => e1-e2 is neg => e1 < e2
  3807. jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2
  3808. mov rax, [rsp + 8]
  3809. cmp [half + 0], rax ; comare with (q-1)/2
  3810. jc tmp_43 ; half<rax => e1-e2 is neg => e1 < e2
  3811. jnz tmp_42 ; half>rax => e1 -e2 is pos => e1 > e2
  3812. ; half == rax => e1-e2 is pos => e1 > e2
  3813. tmp_42:
  3814. mov qword [rdi], 1
  3815. add rsp, 40
  3816. ret
  3817. tmp_43:
  3818. mov qword [rdi], 0
  3819. add rsp, 40
  3820. ret
  3821. ;;;;;;;;;;;;;;;;;;;;;;
  3822. ; land
  3823. ;;;;;;;;;;;;;;;;;;;;;;
  3824. ; Logical and between two elements
  3825. ; Params:
  3826. ; rsi <= Pointer to element 1
  3827. ; rdx <= Pointer to element 2
  3828. ; rdi <= Pointer to result zero or one
  3829. ; Modified Registers:
  3830. ; rax, rcx, r8
  3831. ;;;;;;;;;;;;;;;;;;;;;;
  3832. Fr_land:
  3833. mov rax, [rsi]
  3834. bt rax, 63
  3835. jc tmp_44
  3836. test eax, eax
  3837. jz retZero_46
  3838. jmp retOne_45
  3839. tmp_44:
  3840. mov rax, [rsi + 8]
  3841. test rax, rax
  3842. jnz retOne_45
  3843. mov rax, [rsi + 16]
  3844. test rax, rax
  3845. jnz retOne_45
  3846. mov rax, [rsi + 24]
  3847. test rax, rax
  3848. jnz retOne_45
  3849. mov rax, [rsi + 32]
  3850. test rax, rax
  3851. jnz retOne_45
  3852. retZero_46:
  3853. mov qword r8, 0
  3854. jmp done_47
  3855. retOne_45:
  3856. mov qword r8, 1
  3857. done_47:
  3858. mov rax, [rdx]
  3859. bt rax, 63
  3860. jc tmp_48
  3861. test eax, eax
  3862. jz retZero_50
  3863. jmp retOne_49
  3864. tmp_48:
  3865. mov rax, [rdx + 8]
  3866. test rax, rax
  3867. jnz retOne_49
  3868. mov rax, [rdx + 16]
  3869. test rax, rax
  3870. jnz retOne_49
  3871. mov rax, [rdx + 24]
  3872. test rax, rax
  3873. jnz retOne_49
  3874. mov rax, [rdx + 32]
  3875. test rax, rax
  3876. jnz retOne_49
  3877. retZero_50:
  3878. mov qword rcx, 0
  3879. jmp done_51
  3880. retOne_49:
  3881. mov qword rcx, 1
  3882. done_51:
  3883. and rcx, r8
  3884. mov [rdi], rcx
  3885. ret
  3886. ;;;;;;;;;;;;;;;;;;;;;;
  3887. ; lor
  3888. ;;;;;;;;;;;;;;;;;;;;;;
  3889. ; Logical or between two elements
  3890. ; Params:
  3891. ; rsi <= Pointer to element 1
  3892. ; rdx <= Pointer to element 2
  3893. ; rdi <= Pointer to result zero or one
  3894. ; Modified Registers:
  3895. ; rax, rcx, r8
  3896. ;;;;;;;;;;;;;;;;;;;;;;
  3897. Fr_lor:
  3898. mov rax, [rsi]
  3899. bt rax, 63
  3900. jc tmp_52
  3901. test eax, eax
  3902. jz retZero_54
  3903. jmp retOne_53
  3904. tmp_52:
  3905. mov rax, [rsi + 8]
  3906. test rax, rax
  3907. jnz retOne_53
  3908. mov rax, [rsi + 16]
  3909. test rax, rax
  3910. jnz retOne_53
  3911. mov rax, [rsi + 24]
  3912. test rax, rax
  3913. jnz retOne_53
  3914. mov rax, [rsi + 32]
  3915. test rax, rax
  3916. jnz retOne_53
  3917. retZero_54:
  3918. mov qword r8, 0
  3919. jmp done_55
  3920. retOne_53:
  3921. mov qword r8, 1
  3922. done_55:
  3923. mov rax, [rdx]
  3924. bt rax, 63
  3925. jc tmp_56
  3926. test eax, eax
  3927. jz retZero_58
  3928. jmp retOne_57
  3929. tmp_56:
  3930. mov rax, [rdx + 8]
  3931. test rax, rax
  3932. jnz retOne_57
  3933. mov rax, [rdx + 16]
  3934. test rax, rax
  3935. jnz retOne_57
  3936. mov rax, [rdx + 24]
  3937. test rax, rax
  3938. jnz retOne_57
  3939. mov rax, [rdx + 32]
  3940. test rax, rax
  3941. jnz retOne_57
  3942. retZero_58:
  3943. mov qword rcx, 0
  3944. jmp done_59
  3945. retOne_57:
  3946. mov qword rcx, 1
  3947. done_59:
  3948. or rcx, r8
  3949. mov [rdi], rcx
  3950. ret
  3951. ;;;;;;;;;;;;;;;;;;;;;;
  3952. ; lnot
  3953. ;;;;;;;;;;;;;;;;;;;;;;
  3954. ; Do the logical not of an element
  3955. ; Params:
  3956. ; rsi <= Pointer to element to be tested
  3957. ; rdi <= Pointer to result one if element1 is zero and zero otherwise
  3958. ; Modified Registers:
  3959. ; rax, rax, r8
  3960. ;;;;;;;;;;;;;;;;;;;;;;
  3961. Fr_lnot:
  3962. mov rax, [rsi]
  3963. bt rax, 63
  3964. jc tmp_60
  3965. test eax, eax
  3966. jz retZero_62
  3967. jmp retOne_61
  3968. tmp_60:
  3969. mov rax, [rsi + 8]
  3970. test rax, rax
  3971. jnz retOne_61
  3972. mov rax, [rsi + 16]
  3973. test rax, rax
  3974. jnz retOne_61
  3975. mov rax, [rsi + 24]
  3976. test rax, rax
  3977. jnz retOne_61
  3978. mov rax, [rsi + 32]
  3979. test rax, rax
  3980. jnz retOne_61
  3981. retZero_62:
  3982. mov qword rcx, 0
  3983. jmp done_63
  3984. retOne_61:
  3985. mov qword rcx, 1
  3986. done_63:
  3987. test rcx, rcx
  3988. jz lnot_retOne
  3989. lnot_retZero:
  3990. mov qword [rdi], 0
  3991. ret
  3992. lnot_retOne:
  3993. mov qword [rdi], 1
  3994. ret
  3995. ;;;;;;;;;;;;;;;;;;;;;;
  3996. ; isTrue
  3997. ;;;;;;;;;;;;;;;;;;;;;;
  3998. ; Convert a 64 bit integer to a long format field element
  3999. ; Params:
  4000. ; rsi <= Pointer to the element
  4001. ; Returs:
  4002. ; rax <= 1 if true 0 if false
  4003. ;;;;;;;;;;;;;;;;;;;;;;;
  4004. Fr_isTrue:
  4005. mov rax, [rdi]
  4006. bt rax, 63
  4007. jc tmp_64
  4008. test eax, eax
  4009. jz retZero_66
  4010. jmp retOne_65
  4011. tmp_64:
  4012. mov rax, [rdi + 8]
  4013. test rax, rax
  4014. jnz retOne_65
  4015. mov rax, [rdi + 16]
  4016. test rax, rax
  4017. jnz retOne_65
  4018. mov rax, [rdi + 24]
  4019. test rax, rax
  4020. jnz retOne_65
  4021. mov rax, [rdi + 32]
  4022. test rax, rax
  4023. jnz retOne_65
  4024. retZero_66:
  4025. mov qword rax, 0
  4026. jmp done_67
  4027. retOne_65:
  4028. mov qword rax, 1
  4029. done_67:
  4030. ret
  4031. section .data
  4032. Fr_q:
  4033. dd 0
  4034. dd 0x80000000
  4035. q dq 0x43e1f593f0000001,0x2833e84879b97091,0xb85045b68181585d,0x30644e72e131a029
  4036. half dq 0xa1f0fac9f8000000,0x9419f4243cdcb848,0xdc2822db40c0ac2e,0x183227397098d014
  4037. R2 dq 0x1bb8e645ae216da7,0x53fe3ab1e35c59e3,0x8c49833d53bb8085,0x0216d0b17f4e44a5
  4038. R3 dq 0x5e94d8e1b4bf0040,0x2a489cbe1cfbb6b8,0x893cc664a19fcfed,0x0cf8594b7fcc657c
  4039. lboMask dq 0x1fffffffffffffff