// Code generated by goff (v0.2.0) DO NOT EDIT
|
|
|
|
#include "textflag.h"
|
|
|
|
// func MulAssignElement(res,y *Element)
|
|
// montgomery multiplication of res by y
|
|
// stores the result in res
|
|
TEXT ·MulAssignElement(SB), NOSPLIT, $0-16
|
|
|
|
// dereference our parameters
|
|
MOVQ res+0(FP), DI
|
|
MOVQ y+8(FP), R8
|
|
|
|
// check if we support adx and mulx
|
|
CMPB ·supportAdx(SB), $1
|
|
JNE no_adx
|
|
|
|
// the algorithm is described here
|
|
// https://hackmd.io/@zkteam/modular_multiplication
|
|
// however, to benefit from the ADCX and ADOX carry chains
|
|
// we split the inner loops in 2:
|
|
// for i=0 to N-1
|
|
// for j=0 to N-1
|
|
// (A,t[j]) := t[j] + a[j]*b[i] + A
|
|
// m := t[0]*q'[0] mod W
|
|
// C,_ := t[0] + m*q[0]
|
|
// for j=1 to N-1
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
// t[N-1] = C + A
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 0
|
|
|
|
// clear up the carry flags
|
|
XORQ R9 , R9
|
|
|
|
// R12 = y[0]
|
|
MOVQ 0(R8), R12
|
|
|
|
// for j=0 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
|
|
// DX = res[0]
|
|
MOVQ 0(DI), DX
|
|
MULXQ R12, CX , R9
|
|
|
|
// DX = res[1]
|
|
MOVQ 8(DI), DX
|
|
MOVQ R9, BX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BX
|
|
|
|
// DX = res[2]
|
|
MOVQ 16(DI), DX
|
|
MOVQ R9, BP
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BP
|
|
|
|
// DX = res[3]
|
|
MOVQ 24(DI), DX
|
|
MOVQ R9, SI
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, SI
|
|
|
|
// add the last carries to R9
|
|
MOVQ $0, DX
|
|
ADCXQ DX, R9
|
|
ADOXQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, DX
|
|
MULXQ CX,R11, DX
|
|
|
|
// clear the carry flags
|
|
XORQ DX, DX
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, DX
|
|
MULXQ R11, AX, R10
|
|
ADCXQ CX ,AX
|
|
|
|
// for j=1 to N-1
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
|
|
MOVQ $0x2833e84879b97091, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BX, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0xb85045b68181585d, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BP, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0x30644e72e131a029, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ SI, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BP
|
|
MOVQ $0, AX
|
|
ADCXQ AX, DX
|
|
ADOXQ DX, R9
|
|
MOVQ R9, SI
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 1
|
|
|
|
// clear up the carry flags
|
|
XORQ R9 , R9
|
|
|
|
// R12 = y[1]
|
|
MOVQ 8(R8), R12
|
|
|
|
// for j=0 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
|
|
// DX = res[0]
|
|
MOVQ 0(DI), DX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, CX
|
|
|
|
// DX = res[1]
|
|
MOVQ 8(DI), DX
|
|
ADCXQ R9, BX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BX
|
|
|
|
// DX = res[2]
|
|
MOVQ 16(DI), DX
|
|
ADCXQ R9, BP
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BP
|
|
|
|
// DX = res[3]
|
|
MOVQ 24(DI), DX
|
|
ADCXQ R9, SI
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, SI
|
|
|
|
// add the last carries to R9
|
|
MOVQ $0, DX
|
|
ADCXQ DX, R9
|
|
ADOXQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, DX
|
|
MULXQ CX,R11, DX
|
|
|
|
// clear the carry flags
|
|
XORQ DX, DX
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, DX
|
|
MULXQ R11, AX, R10
|
|
ADCXQ CX ,AX
|
|
|
|
// for j=1 to N-1
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
|
|
MOVQ $0x2833e84879b97091, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BX, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0xb85045b68181585d, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BP, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0x30644e72e131a029, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ SI, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BP
|
|
MOVQ $0, AX
|
|
ADCXQ AX, DX
|
|
ADOXQ DX, R9
|
|
MOVQ R9, SI
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 2
|
|
|
|
// clear up the carry flags
|
|
XORQ R9 , R9
|
|
|
|
// R12 = y[2]
|
|
MOVQ 16(R8), R12
|
|
|
|
// for j=0 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
|
|
// DX = res[0]
|
|
MOVQ 0(DI), DX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, CX
|
|
|
|
// DX = res[1]
|
|
MOVQ 8(DI), DX
|
|
ADCXQ R9, BX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BX
|
|
|
|
// DX = res[2]
|
|
MOVQ 16(DI), DX
|
|
ADCXQ R9, BP
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BP
|
|
|
|
// DX = res[3]
|
|
MOVQ 24(DI), DX
|
|
ADCXQ R9, SI
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, SI
|
|
|
|
// add the last carries to R9
|
|
MOVQ $0, DX
|
|
ADCXQ DX, R9
|
|
ADOXQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, DX
|
|
MULXQ CX,R11, DX
|
|
|
|
// clear the carry flags
|
|
XORQ DX, DX
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, DX
|
|
MULXQ R11, AX, R10
|
|
ADCXQ CX ,AX
|
|
|
|
// for j=1 to N-1
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
|
|
MOVQ $0x2833e84879b97091, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BX, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0xb85045b68181585d, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BP, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0x30644e72e131a029, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ SI, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BP
|
|
MOVQ $0, AX
|
|
ADCXQ AX, DX
|
|
ADOXQ DX, R9
|
|
MOVQ R9, SI
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 3
|
|
|
|
// clear up the carry flags
|
|
XORQ R9 , R9
|
|
|
|
// R12 = y[3]
|
|
MOVQ 24(R8), R12
|
|
|
|
// for j=0 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
|
|
// DX = res[0]
|
|
MOVQ 0(DI), DX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, CX
|
|
|
|
// DX = res[1]
|
|
MOVQ 8(DI), DX
|
|
ADCXQ R9, BX
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BX
|
|
|
|
// DX = res[2]
|
|
MOVQ 16(DI), DX
|
|
ADCXQ R9, BP
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, BP
|
|
|
|
// DX = res[3]
|
|
MOVQ 24(DI), DX
|
|
ADCXQ R9, SI
|
|
MULXQ R12, AX, R9
|
|
ADOXQ AX, SI
|
|
|
|
// add the last carries to R9
|
|
MOVQ $0, DX
|
|
ADCXQ DX, R9
|
|
ADOXQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, DX
|
|
MULXQ CX,R11, DX
|
|
|
|
// clear the carry flags
|
|
XORQ DX, DX
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, DX
|
|
MULXQ R11, AX, R10
|
|
ADCXQ CX ,AX
|
|
|
|
// for j=1 to N-1
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
|
|
MOVQ $0x2833e84879b97091, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BX, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0xb85045b68181585d, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ BP, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
|
|
MOVQ $0x30644e72e131a029, DX
|
|
MULXQ R11, AX, DX
|
|
ADCXQ SI, R10
|
|
ADOXQ AX, R10
|
|
MOVQ R10, BP
|
|
MOVQ $0, AX
|
|
ADCXQ AX, DX
|
|
ADOXQ DX, R9
|
|
MOVQ R9, SI
|
|
|
|
reduce:
|
|
// reduce, constant time version
|
|
// first we copy registers storing t in a separate set of registers
|
|
// as SUBQ modifies the 2nd operand
|
|
MOVQ CX, DX
|
|
MOVQ BX, R8
|
|
MOVQ BP, R9
|
|
MOVQ SI, R10
|
|
MOVQ $0x43e1f593f0000001, R11
|
|
SUBQ R11, DX
|
|
MOVQ $0x2833e84879b97091, R11
|
|
SBBQ R11, R8
|
|
MOVQ $0xb85045b68181585d, R11
|
|
SBBQ R11, R9
|
|
MOVQ $0x30644e72e131a029, R11
|
|
SBBQ R11, R10
|
|
JCS t_is_smaller // no borrow, we return t
|
|
|
|
// borrow is set, we return u
|
|
MOVQ DX, (DI)
|
|
MOVQ R8, 8(DI)
|
|
MOVQ R9, 16(DI)
|
|
MOVQ R10, 24(DI)
|
|
RET
|
|
t_is_smaller:
|
|
MOVQ CX, 0(DI)
|
|
MOVQ BX, 8(DI)
|
|
MOVQ BP, 16(DI)
|
|
MOVQ SI, 24(DI)
|
|
RET
|
|
|
|
no_adx:
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 0
|
|
|
|
// (A,t[0]) := t[0] + x[0]*y[0]
|
|
MOVQ (DI), AX // x[0]
|
|
MOVQ 0(R8), R12
|
|
MULQ R12 // x[0] * y[0]
|
|
MOVQ DX, R9
|
|
MOVQ AX, CX
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, R11
|
|
IMULQ CX , R11
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, AX
|
|
MULQ R11
|
|
ADDQ CX ,AX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R10
|
|
|
|
// for j=1 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
MOVQ 8(DI), AX
|
|
MULQ R12 // x[1] * y[0]
|
|
MOVQ R9, BX
|
|
ADDQ AX, BX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x2833e84879b97091, AX
|
|
MULQ R11
|
|
ADDQ BX, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
MOVQ 16(DI), AX
|
|
MULQ R12 // x[2] * y[0]
|
|
MOVQ R9, BP
|
|
ADDQ AX, BP
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0xb85045b68181585d, AX
|
|
MULQ R11
|
|
ADDQ BP, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
MOVQ 24(DI), AX
|
|
MULQ R12 // x[3] * y[0]
|
|
MOVQ R9, SI
|
|
ADDQ AX, SI
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x30644e72e131a029, AX
|
|
MULQ R11
|
|
ADDQ SI, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BP
|
|
MOVQ DX, R10
|
|
|
|
ADDQ R10, R9
|
|
MOVQ R9, SI
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 1
|
|
|
|
// (A,t[0]) := t[0] + x[0]*y[1]
|
|
MOVQ (DI), AX // x[0]
|
|
MOVQ 8(R8), R12
|
|
MULQ R12 // x[0] * y[1]
|
|
ADDQ AX, CX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, R11
|
|
IMULQ CX , R11
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, AX
|
|
MULQ R11
|
|
ADDQ CX ,AX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R10
|
|
|
|
// for j=1 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
MOVQ 8(DI), AX
|
|
MULQ R12 // x[1] * y[1]
|
|
ADDQ R9, BX
|
|
ADCQ $0, DX
|
|
ADDQ AX, BX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x2833e84879b97091, AX
|
|
MULQ R11
|
|
ADDQ BX, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
MOVQ 16(DI), AX
|
|
MULQ R12 // x[2] * y[1]
|
|
ADDQ R9, BP
|
|
ADCQ $0, DX
|
|
ADDQ AX, BP
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0xb85045b68181585d, AX
|
|
MULQ R11
|
|
ADDQ BP, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
MOVQ 24(DI), AX
|
|
MULQ R12 // x[3] * y[1]
|
|
ADDQ R9, SI
|
|
ADCQ $0, DX
|
|
ADDQ AX, SI
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x30644e72e131a029, AX
|
|
MULQ R11
|
|
ADDQ SI, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BP
|
|
MOVQ DX, R10
|
|
|
|
ADDQ R10, R9
|
|
MOVQ R9, SI
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 2
|
|
|
|
// (A,t[0]) := t[0] + x[0]*y[2]
|
|
MOVQ (DI), AX // x[0]
|
|
MOVQ 16(R8), R12
|
|
MULQ R12 // x[0] * y[2]
|
|
ADDQ AX, CX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, R11
|
|
IMULQ CX , R11
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, AX
|
|
MULQ R11
|
|
ADDQ CX ,AX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R10
|
|
|
|
// for j=1 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
MOVQ 8(DI), AX
|
|
MULQ R12 // x[1] * y[2]
|
|
ADDQ R9, BX
|
|
ADCQ $0, DX
|
|
ADDQ AX, BX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x2833e84879b97091, AX
|
|
MULQ R11
|
|
ADDQ BX, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
MOVQ 16(DI), AX
|
|
MULQ R12 // x[2] * y[2]
|
|
ADDQ R9, BP
|
|
ADCQ $0, DX
|
|
ADDQ AX, BP
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0xb85045b68181585d, AX
|
|
MULQ R11
|
|
ADDQ BP, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
MOVQ 24(DI), AX
|
|
MULQ R12 // x[3] * y[2]
|
|
ADDQ R9, SI
|
|
ADCQ $0, DX
|
|
ADDQ AX, SI
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x30644e72e131a029, AX
|
|
MULQ R11
|
|
ADDQ SI, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BP
|
|
MOVQ DX, R10
|
|
|
|
ADDQ R10, R9
|
|
MOVQ R9, SI
|
|
|
|
// ---------------------------------------------------------------------------------------------
|
|
// outter loop 3
|
|
|
|
// (A,t[0]) := t[0] + x[0]*y[3]
|
|
MOVQ (DI), AX // x[0]
|
|
MOVQ 24(R8), R12
|
|
MULQ R12 // x[0] * y[3]
|
|
ADDQ AX, CX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
// m := t[0]*q'[0] mod W
|
|
MOVQ $0xc2e1f593efffffff, R11
|
|
IMULQ CX , R11
|
|
|
|
// C,_ := t[0] + m*q[0]
|
|
MOVQ $0x43e1f593f0000001, AX
|
|
MULQ R11
|
|
ADDQ CX ,AX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R10
|
|
|
|
// for j=1 to N-1
|
|
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
|
// (C,t[j-1]) := t[j] + m*q[j] + C
|
|
MOVQ 8(DI), AX
|
|
MULQ R12 // x[1] * y[3]
|
|
ADDQ R9, BX
|
|
ADCQ $0, DX
|
|
ADDQ AX, BX
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x2833e84879b97091, AX
|
|
MULQ R11
|
|
ADDQ BX, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, CX
|
|
MOVQ DX, R10
|
|
MOVQ 16(DI), AX
|
|
MULQ R12 // x[2] * y[3]
|
|
ADDQ R9, BP
|
|
ADCQ $0, DX
|
|
ADDQ AX, BP
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0xb85045b68181585d, AX
|
|
MULQ R11
|
|
ADDQ BP, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BX
|
|
MOVQ DX, R10
|
|
MOVQ 24(DI), AX
|
|
MULQ R12 // x[3] * y[3]
|
|
ADDQ R9, SI
|
|
ADCQ $0, DX
|
|
ADDQ AX, SI
|
|
ADCQ $0, DX
|
|
MOVQ DX, R9
|
|
|
|
MOVQ $0x30644e72e131a029, AX
|
|
MULQ R11
|
|
ADDQ SI, R10
|
|
ADCQ $0, DX
|
|
ADDQ AX, R10
|
|
ADCQ $0, DX
|
|
|
|
MOVQ R10, BP
|
|
MOVQ DX, R10
|
|
|
|
ADDQ R10, R9
|
|
MOVQ R9, SI
|
|
|
|
JMP reduce
|