@ -0,0 +1,170 @@ |
|||||
|
// +build !amd64
|
||||
|
|
||||
|
// Copyright 2020 ConsenSys AG
|
||||
|
//
|
||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
// you may not use this file except in compliance with the License.
|
||||
|
// You may obtain a copy of the License at
|
||||
|
//
|
||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
//
|
||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
// See the License for the specific language governing permissions and
|
||||
|
// limitations under the License.
|
||||
|
|
||||
|
// Code generated by goff (v0.2.0) DO NOT EDIT
|
||||
|
|
||||
|
// Package ff contains field arithmetic operations
|
||||
|
package ff |
||||
|
|
||||
|
// /!\ WARNING /!\
|
||||
|
// this code has not been audited and is provided as-is. In particular,
|
||||
|
// there is no security guarantees such as constant time implementation
|
||||
|
// or side-channel attack resistance
|
||||
|
// /!\ WARNING /!\
|
||||
|
|
||||
|
import "math/bits" |
||||
|
|
||||
|
// Mul z = x * y mod q
|
||||
|
// see https://hackmd.io/@zkteam/modular_multiplication
|
||||
|
func (z *Element) Mul(x, y *Element) *Element { |
||||
|
|
||||
|
var t [4]uint64 |
||||
|
var c [3]uint64 |
||||
|
{ |
||||
|
// round 0
|
||||
|
v := x[0] |
||||
|
c[1], c[0] = bits.Mul64(v, y[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd1(v, y[1], c[1]) |
||||
|
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd1(v, y[2], c[1]) |
||||
|
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd1(v, y[3], c[1]) |
||||
|
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
{ |
||||
|
// round 1
|
||||
|
v := x[1] |
||||
|
c[1], c[0] = madd1(v, y[0], t[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd2(v, y[1], c[1], t[1]) |
||||
|
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, y[2], c[1], t[2]) |
||||
|
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, y[3], c[1], t[3]) |
||||
|
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
{ |
||||
|
// round 2
|
||||
|
v := x[2] |
||||
|
c[1], c[0] = madd1(v, y[0], t[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd2(v, y[1], c[1], t[1]) |
||||
|
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, y[2], c[1], t[2]) |
||||
|
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, y[3], c[1], t[3]) |
||||
|
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
{ |
||||
|
// round 3
|
||||
|
v := x[3] |
||||
|
c[1], c[0] = madd1(v, y[0], t[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd2(v, y[1], c[1], t[1]) |
||||
|
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, y[2], c[1], t[2]) |
||||
|
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, y[3], c[1], t[3]) |
||||
|
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
|
||||
|
// if z > q --> z -= q
|
||||
|
// note: this is NOT constant time
|
||||
|
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { |
||||
|
var b uint64 |
||||
|
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) |
||||
|
z[1], b = bits.Sub64(z[1], 2896914383306846353, b) |
||||
|
z[2], b = bits.Sub64(z[2], 13281191951274694749, b) |
||||
|
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b) |
||||
|
} |
||||
|
return z |
||||
|
} |
||||
|
|
||||
|
// MulAssign z = z * x mod q
|
||||
|
// see https://hackmd.io/@zkteam/modular_multiplication
|
||||
|
func (z *Element) MulAssign(x *Element) *Element { |
||||
|
|
||||
|
var t [4]uint64 |
||||
|
var c [3]uint64 |
||||
|
{ |
||||
|
// round 0
|
||||
|
v := z[0] |
||||
|
c[1], c[0] = bits.Mul64(v, x[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd1(v, x[1], c[1]) |
||||
|
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd1(v, x[2], c[1]) |
||||
|
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd1(v, x[3], c[1]) |
||||
|
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
{ |
||||
|
// round 1
|
||||
|
v := z[1] |
||||
|
c[1], c[0] = madd1(v, x[0], t[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd2(v, x[1], c[1], t[1]) |
||||
|
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, x[2], c[1], t[2]) |
||||
|
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, x[3], c[1], t[3]) |
||||
|
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
{ |
||||
|
// round 2
|
||||
|
v := z[2] |
||||
|
c[1], c[0] = madd1(v, x[0], t[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd2(v, x[1], c[1], t[1]) |
||||
|
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, x[2], c[1], t[2]) |
||||
|
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, x[3], c[1], t[3]) |
||||
|
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
{ |
||||
|
// round 3
|
||||
|
v := z[3] |
||||
|
c[1], c[0] = madd1(v, x[0], t[0]) |
||||
|
m := c[0] * 14042775128853446655 |
||||
|
c[2] = madd0(m, 4891460686036598785, c[0]) |
||||
|
c[1], c[0] = madd2(v, x[1], c[1], t[1]) |
||||
|
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, x[2], c[1], t[2]) |
||||
|
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
||||
|
c[1], c[0] = madd2(v, x[3], c[1], t[3]) |
||||
|
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
||||
|
} |
||||
|
|
||||
|
// if z > q --> z -= q
|
||||
|
// note: this is NOT constant time
|
||||
|
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { |
||||
|
var b uint64 |
||||
|
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) |
||||
|
z[1], b = bits.Sub64(z[1], 2896914383306846353, b) |
||||
|
z[2], b = bits.Sub64(z[2], 13281191951274694749, b) |
||||
|
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b) |
||||
|
} |
||||
|
return z |
||||
|
} |
@ -0,0 +1,39 @@ |
|||||
|
// Copyright 2020 ConsenSys AG
|
||||
|
//
|
||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
// you may not use this file except in compliance with the License.
|
||||
|
// You may obtain a copy of the License at
|
||||
|
//
|
||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
//
|
||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
// See the License for the specific language governing permissions and
|
||||
|
// limitations under the License.
|
||||
|
|
||||
|
// Code generated by goff (v0.2.0) DO NOT EDIT
|
||||
|
|
||||
|
// Package ff contains field arithmetic operations
|
||||
|
package ff |
||||
|
|
||||
|
// MulAssignElement z = z * x mod q (constant time)
|
||||
|
// calling this instead of z.MulAssign(x) is prefered for performance critical path
|
||||
|
//go:noescape
|
||||
|
func MulAssignElement(res, y *Element) |
||||
|
|
||||
|
// Mul z = x * y mod q (constant time)
|
||||
|
// see https://hackmd.io/@zkteam/modular_multiplication
|
||||
|
func (z *Element) Mul(x, y *Element) *Element { |
||||
|
res := *x |
||||
|
MulAssignElement(&res, y) |
||||
|
z.Set(&res) |
||||
|
return z |
||||
|
} |
||||
|
|
||||
|
// MulAssign z = z * x mod q (constant time)
|
||||
|
// see https://hackmd.io/@zkteam/modular_multiplication
|
||||
|
func (z *Element) MulAssign(x *Element) *Element { |
||||
|
MulAssignElement(z, x) |
||||
|
return z |
||||
|
} |
@ -0,0 +1,695 @@ |
|||||
|
// Code generated by goff (v0.2.0) DO NOT EDIT |
||||
|
|
||||
|
#include "textflag.h" |
||||
|
|
||||
|
// func MulAssignElement(res,y *Element) |
||||
|
// montgomery multiplication of res by y |
||||
|
// stores the result in res |
||||
|
TEXT ·MulAssignElement(SB), NOSPLIT, $0-16 |
||||
|
|
||||
|
// dereference our parameters |
||||
|
MOVQ res+0(FP), DI |
||||
|
MOVQ y+8(FP), R8 |
||||
|
|
||||
|
// check if we support adx and mulx |
||||
|
CMPB ·supportAdx(SB), $1 |
||||
|
JNE no_adx |
||||
|
|
||||
|
// the algorithm is described here |
||||
|
// https://hackmd.io/@zkteam/modular_multiplication |
||||
|
// however, to benefit from the ADCX and ADOX carry chains |
||||
|
// we split the inner loops in 2: |
||||
|
// for i=0 to N-1 |
||||
|
// for j=0 to N-1 |
||||
|
// (A,t[j]) := t[j] + a[j]*b[i] + A |
||||
|
// m := t[0]*q'[0] mod W |
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
// for j=1 to N-1 |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
// t[N-1] = C + A |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 0 |
||||
|
|
||||
|
// clear up the carry flags |
||||
|
XORQ R9 , R9 |
||||
|
|
||||
|
// R12 = y[0] |
||||
|
MOVQ 0(R8), R12 |
||||
|
|
||||
|
// for j=0 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
|
||||
|
// DX = res[0] |
||||
|
MOVQ 0(DI), DX |
||||
|
MULXQ R12, CX , R9 |
||||
|
|
||||
|
// DX = res[1] |
||||
|
MOVQ 8(DI), DX |
||||
|
MOVQ R9, BX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BX |
||||
|
|
||||
|
// DX = res[2] |
||||
|
MOVQ 16(DI), DX |
||||
|
MOVQ R9, BP |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BP |
||||
|
|
||||
|
// DX = res[3] |
||||
|
MOVQ 24(DI), DX |
||||
|
MOVQ R9, SI |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, SI |
||||
|
|
||||
|
// add the last carries to R9 |
||||
|
MOVQ $0, DX |
||||
|
ADCXQ DX, R9 |
||||
|
ADOXQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, DX |
||||
|
MULXQ CX,R11, DX |
||||
|
|
||||
|
// clear the carry flags |
||||
|
XORQ DX, DX |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, DX |
||||
|
MULXQ R11, AX, R10 |
||||
|
ADCXQ CX ,AX |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BX, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BP, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ SI, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BP |
||||
|
MOVQ $0, AX |
||||
|
ADCXQ AX, DX |
||||
|
ADOXQ DX, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 1 |
||||
|
|
||||
|
// clear up the carry flags |
||||
|
XORQ R9 , R9 |
||||
|
|
||||
|
// R12 = y[1] |
||||
|
MOVQ 8(R8), R12 |
||||
|
|
||||
|
// for j=0 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
|
||||
|
// DX = res[0] |
||||
|
MOVQ 0(DI), DX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, CX |
||||
|
|
||||
|
// DX = res[1] |
||||
|
MOVQ 8(DI), DX |
||||
|
ADCXQ R9, BX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BX |
||||
|
|
||||
|
// DX = res[2] |
||||
|
MOVQ 16(DI), DX |
||||
|
ADCXQ R9, BP |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BP |
||||
|
|
||||
|
// DX = res[3] |
||||
|
MOVQ 24(DI), DX |
||||
|
ADCXQ R9, SI |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, SI |
||||
|
|
||||
|
// add the last carries to R9 |
||||
|
MOVQ $0, DX |
||||
|
ADCXQ DX, R9 |
||||
|
ADOXQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, DX |
||||
|
MULXQ CX,R11, DX |
||||
|
|
||||
|
// clear the carry flags |
||||
|
XORQ DX, DX |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, DX |
||||
|
MULXQ R11, AX, R10 |
||||
|
ADCXQ CX ,AX |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BX, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BP, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ SI, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BP |
||||
|
MOVQ $0, AX |
||||
|
ADCXQ AX, DX |
||||
|
ADOXQ DX, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 2 |
||||
|
|
||||
|
// clear up the carry flags |
||||
|
XORQ R9 , R9 |
||||
|
|
||||
|
// R12 = y[2] |
||||
|
MOVQ 16(R8), R12 |
||||
|
|
||||
|
// for j=0 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
|
||||
|
// DX = res[0] |
||||
|
MOVQ 0(DI), DX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, CX |
||||
|
|
||||
|
// DX = res[1] |
||||
|
MOVQ 8(DI), DX |
||||
|
ADCXQ R9, BX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BX |
||||
|
|
||||
|
// DX = res[2] |
||||
|
MOVQ 16(DI), DX |
||||
|
ADCXQ R9, BP |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BP |
||||
|
|
||||
|
// DX = res[3] |
||||
|
MOVQ 24(DI), DX |
||||
|
ADCXQ R9, SI |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, SI |
||||
|
|
||||
|
// add the last carries to R9 |
||||
|
MOVQ $0, DX |
||||
|
ADCXQ DX, R9 |
||||
|
ADOXQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, DX |
||||
|
MULXQ CX,R11, DX |
||||
|
|
||||
|
// clear the carry flags |
||||
|
XORQ DX, DX |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, DX |
||||
|
MULXQ R11, AX, R10 |
||||
|
ADCXQ CX ,AX |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BX, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BP, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ SI, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BP |
||||
|
MOVQ $0, AX |
||||
|
ADCXQ AX, DX |
||||
|
ADOXQ DX, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 3 |
||||
|
|
||||
|
// clear up the carry flags |
||||
|
XORQ R9 , R9 |
||||
|
|
||||
|
// R12 = y[3] |
||||
|
MOVQ 24(R8), R12 |
||||
|
|
||||
|
// for j=0 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
|
||||
|
// DX = res[0] |
||||
|
MOVQ 0(DI), DX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, CX |
||||
|
|
||||
|
// DX = res[1] |
||||
|
MOVQ 8(DI), DX |
||||
|
ADCXQ R9, BX |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BX |
||||
|
|
||||
|
// DX = res[2] |
||||
|
MOVQ 16(DI), DX |
||||
|
ADCXQ R9, BP |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, BP |
||||
|
|
||||
|
// DX = res[3] |
||||
|
MOVQ 24(DI), DX |
||||
|
ADCXQ R9, SI |
||||
|
MULXQ R12, AX, R9 |
||||
|
ADOXQ AX, SI |
||||
|
|
||||
|
// add the last carries to R9 |
||||
|
MOVQ $0, DX |
||||
|
ADCXQ DX, R9 |
||||
|
ADOXQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, DX |
||||
|
MULXQ CX,R11, DX |
||||
|
|
||||
|
// clear the carry flags |
||||
|
XORQ DX, DX |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, DX |
||||
|
MULXQ R11, AX, R10 |
||||
|
ADCXQ CX ,AX |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BX, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ BP, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, DX |
||||
|
MULXQ R11, AX, DX |
||||
|
ADCXQ SI, R10 |
||||
|
ADOXQ AX, R10 |
||||
|
MOVQ R10, BP |
||||
|
MOVQ $0, AX |
||||
|
ADCXQ AX, DX |
||||
|
ADOXQ DX, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
reduce: |
||||
|
// reduce, constant time version |
||||
|
// first we copy registers storing t in a separate set of registers |
||||
|
// as SUBQ modifies the 2nd operand |
||||
|
MOVQ CX, DX |
||||
|
MOVQ BX, R8 |
||||
|
MOVQ BP, R9 |
||||
|
MOVQ SI, R10 |
||||
|
MOVQ $0x43e1f593f0000001, R11 |
||||
|
SUBQ R11, DX |
||||
|
MOVQ $0x2833e84879b97091, R11 |
||||
|
SBBQ R11, R8 |
||||
|
MOVQ $0xb85045b68181585d, R11 |
||||
|
SBBQ R11, R9 |
||||
|
MOVQ $0x30644e72e131a029, R11 |
||||
|
SBBQ R11, R10 |
||||
|
JCS t_is_smaller // no borrow, we return t |
||||
|
|
||||
|
// borrow is set, we return u |
||||
|
MOVQ DX, (DI) |
||||
|
MOVQ R8, 8(DI) |
||||
|
MOVQ R9, 16(DI) |
||||
|
MOVQ R10, 24(DI) |
||||
|
RET |
||||
|
t_is_smaller: |
||||
|
MOVQ CX, 0(DI) |
||||
|
MOVQ BX, 8(DI) |
||||
|
MOVQ BP, 16(DI) |
||||
|
MOVQ SI, 24(DI) |
||||
|
RET |
||||
|
|
||||
|
no_adx: |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 0 |
||||
|
|
||||
|
// (A,t[0]) := t[0] + x[0]*y[0] |
||||
|
MOVQ (DI), AX // x[0] |
||||
|
MOVQ 0(R8), R12 |
||||
|
MULQ R12 // x[0] * y[0] |
||||
|
MOVQ DX, R9 |
||||
|
MOVQ AX, CX |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, R11 |
||||
|
IMULQ CX , R11 |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, AX |
||||
|
MULQ R11 |
||||
|
ADDQ CX ,AX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
MOVQ 8(DI), AX |
||||
|
MULQ R12 // x[1] * y[0] |
||||
|
MOVQ R9, BX |
||||
|
ADDQ AX, BX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BX, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 16(DI), AX |
||||
|
MULQ R12 // x[2] * y[0] |
||||
|
MOVQ R9, BP |
||||
|
ADDQ AX, BP |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BP, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 24(DI), AX |
||||
|
MULQ R12 // x[3] * y[0] |
||||
|
MOVQ R9, SI |
||||
|
ADDQ AX, SI |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, AX |
||||
|
MULQ R11 |
||||
|
ADDQ SI, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BP |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
ADDQ R10, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 1 |
||||
|
|
||||
|
// (A,t[0]) := t[0] + x[0]*y[1] |
||||
|
MOVQ (DI), AX // x[0] |
||||
|
MOVQ 8(R8), R12 |
||||
|
MULQ R12 // x[0] * y[1] |
||||
|
ADDQ AX, CX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, R11 |
||||
|
IMULQ CX , R11 |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, AX |
||||
|
MULQ R11 |
||||
|
ADDQ CX ,AX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
MOVQ 8(DI), AX |
||||
|
MULQ R12 // x[1] * y[1] |
||||
|
ADDQ R9, BX |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, BX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BX, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 16(DI), AX |
||||
|
MULQ R12 // x[2] * y[1] |
||||
|
ADDQ R9, BP |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, BP |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BP, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 24(DI), AX |
||||
|
MULQ R12 // x[3] * y[1] |
||||
|
ADDQ R9, SI |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, SI |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, AX |
||||
|
MULQ R11 |
||||
|
ADDQ SI, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BP |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
ADDQ R10, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 2 |
||||
|
|
||||
|
// (A,t[0]) := t[0] + x[0]*y[2] |
||||
|
MOVQ (DI), AX // x[0] |
||||
|
MOVQ 16(R8), R12 |
||||
|
MULQ R12 // x[0] * y[2] |
||||
|
ADDQ AX, CX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, R11 |
||||
|
IMULQ CX , R11 |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, AX |
||||
|
MULQ R11 |
||||
|
ADDQ CX ,AX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
MOVQ 8(DI), AX |
||||
|
MULQ R12 // x[1] * y[2] |
||||
|
ADDQ R9, BX |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, BX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BX, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 16(DI), AX |
||||
|
MULQ R12 // x[2] * y[2] |
||||
|
ADDQ R9, BP |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, BP |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BP, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 24(DI), AX |
||||
|
MULQ R12 // x[3] * y[2] |
||||
|
ADDQ R9, SI |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, SI |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, AX |
||||
|
MULQ R11 |
||||
|
ADDQ SI, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BP |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
ADDQ R10, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
// --------------------------------------------------------------------------------------------- |
||||
|
// outter loop 3 |
||||
|
|
||||
|
// (A,t[0]) := t[0] + x[0]*y[3] |
||||
|
MOVQ (DI), AX // x[0] |
||||
|
MOVQ 24(R8), R12 |
||||
|
MULQ R12 // x[0] * y[3] |
||||
|
ADDQ AX, CX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
// m := t[0]*q'[0] mod W |
||||
|
MOVQ $0xc2e1f593efffffff, R11 |
||||
|
IMULQ CX , R11 |
||||
|
|
||||
|
// C,_ := t[0] + m*q[0] |
||||
|
MOVQ $0x43e1f593f0000001, AX |
||||
|
MULQ R11 |
||||
|
ADDQ CX ,AX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
// for j=1 to N-1 |
||||
|
// (A,t[j]) := t[j] + x[j]*y[i] + A |
||||
|
// (C,t[j-1]) := t[j] + m*q[j] + C |
||||
|
MOVQ 8(DI), AX |
||||
|
MULQ R12 // x[1] * y[3] |
||||
|
ADDQ R9, BX |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, BX |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x2833e84879b97091, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BX, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, CX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 16(DI), AX |
||||
|
MULQ R12 // x[2] * y[3] |
||||
|
ADDQ R9, BP |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, BP |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0xb85045b68181585d, AX |
||||
|
MULQ R11 |
||||
|
ADDQ BP, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BX |
||||
|
MOVQ DX, R10 |
||||
|
MOVQ 24(DI), AX |
||||
|
MULQ R12 // x[3] * y[3] |
||||
|
ADDQ R9, SI |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, SI |
||||
|
ADCQ $0, DX |
||||
|
MOVQ DX, R9 |
||||
|
|
||||
|
MOVQ $0x30644e72e131a029, AX |
||||
|
MULQ R11 |
||||
|
ADDQ SI, R10 |
||||
|
ADCQ $0, DX |
||||
|
ADDQ AX, R10 |
||||
|
ADCQ $0, DX |
||||
|
|
||||
|
MOVQ R10, BP |
||||
|
MOVQ DX, R10 |
||||
|
|
||||
|
ADDQ R10, R9 |
||||
|
MOVQ R9, SI |
||||
|
|
||||
|
JMP reduce |
@ -0,0 +1,93 @@ |
|||||
|
// +build !amd64
|
||||
|
|
||||
|
// Copyright 2020 ConsenSys AG
|
||||
|
//
|
||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
// you may not use this file except in compliance with the License.
|
||||
|
// You may obtain a copy of the License at
|
||||
|
//
|
||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
//
|
||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
// See the License for the specific language governing permissions and
|
||||
|
// limitations under the License.
|
||||
|
|
||||
|
// Code generated by goff (v0.2.0) DO NOT EDIT
|
||||
|
|
||||
|
// Package ff contains field arithmetic operations
|
||||
|
package ff |
||||
|
|
||||
|
// /!\ WARNING /!\
|
||||
|
// this code has not been audited and is provided as-is. In particular,
|
||||
|
// there is no security guarantees such as constant time implementation
|
||||
|
// or side-channel attack resistance
|
||||
|
// /!\ WARNING /!\
|
||||
|
|
||||
|
import "math/bits" |
||||
|
|
||||
|
// Square z = x * x mod q
|
||||
|
// see https://hackmd.io/@zkteam/modular_multiplication
|
||||
|
func (z *Element) Square(x *Element) *Element { |
||||
|
|
||||
|
var p [4]uint64 |
||||
|
|
||||
|
var u, v uint64 |
||||
|
{ |
||||
|
// round 0
|
||||
|
u, p[0] = bits.Mul64(x[0], x[0]) |
||||
|
m := p[0] * 14042775128853446655 |
||||
|
C := madd0(m, 4891460686036598785, p[0]) |
||||
|
var t uint64 |
||||
|
t, u, v = madd1sb(x[0], x[1], u) |
||||
|
C, p[0] = madd2(m, 2896914383306846353, v, C) |
||||
|
t, u, v = madd1s(x[0], x[2], t, u) |
||||
|
C, p[1] = madd2(m, 13281191951274694749, v, C) |
||||
|
_, u, v = madd1s(x[0], x[3], t, u) |
||||
|
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u) |
||||
|
} |
||||
|
{ |
||||
|
// round 1
|
||||
|
m := p[0] * 14042775128853446655 |
||||
|
C := madd0(m, 4891460686036598785, p[0]) |
||||
|
u, v = madd1(x[1], x[1], p[1]) |
||||
|
C, p[0] = madd2(m, 2896914383306846353, v, C) |
||||
|
var t uint64 |
||||
|
t, u, v = madd2sb(x[1], x[2], p[2], u) |
||||
|
C, p[1] = madd2(m, 13281191951274694749, v, C) |
||||
|
_, u, v = madd2s(x[1], x[3], p[3], t, u) |
||||
|
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u) |
||||
|
} |
||||
|
{ |
||||
|
// round 2
|
||||
|
m := p[0] * 14042775128853446655 |
||||
|
C := madd0(m, 4891460686036598785, p[0]) |
||||
|
C, p[0] = madd2(m, 2896914383306846353, p[1], C) |
||||
|
u, v = madd1(x[2], x[2], p[2]) |
||||
|
C, p[1] = madd2(m, 13281191951274694749, v, C) |
||||
|
_, u, v = madd2sb(x[2], x[3], p[3], u) |
||||
|
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u) |
||||
|
} |
||||
|
{ |
||||
|
// round 3
|
||||
|
m := p[0] * 14042775128853446655 |
||||
|
C := madd0(m, 4891460686036598785, p[0]) |
||||
|
C, z[0] = madd2(m, 2896914383306846353, p[1], C) |
||||
|
C, z[1] = madd2(m, 13281191951274694749, p[2], C) |
||||
|
u, v = madd1(x[3], x[3], p[3]) |
||||
|
z[3], z[2] = madd3(m, 3486998266802970665, v, C, u) |
||||
|
} |
||||
|
|
||||
|
// if z > q --> z -= q
|
||||
|
// note: this is NOT constant time
|
||||
|
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { |
||||
|
var b uint64 |
||||
|
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) |
||||
|
z[1], b = bits.Sub64(z[1], 2896914383306846353, b) |
||||
|
z[2], b = bits.Sub64(z[2], 13281191951274694749, b) |
||||
|
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b) |
||||
|
} |
||||
|
return z |
||||
|
|
||||
|
} |
@ -0,0 +1,34 @@ |
|||||
|
// Copyright 2020 ConsenSys AG
|
||||
|
//
|
||||
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
|
// you may not use this file except in compliance with the License.
|
||||
|
// You may obtain a copy of the License at
|
||||
|
//
|
||||
|
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
//
|
||||
|
// Unless required by applicable law or agreed to in writing, software
|
||||
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
|
// See the License for the specific language governing permissions and
|
||||
|
// limitations under the License.
|
||||
|
|
||||
|
// Code generated by goff (v0.2.0) DO NOT EDIT
|
||||
|
|
||||
|
// Package ff contains field arithmetic operations
|
||||
|
package ff |
||||
|
|
||||
|
// SquareElement z = x * x mod q
|
||||
|
// calling this instead of z.Square(x) is prefered for performance critical path
|
||||
|
// go - noescape
|
||||
|
// func SquareElement(res,x *Element)
|
||||
|
|
||||
|
// Square z = x * x mod q
|
||||
|
// see https://hackmd.io/@zkteam/modular_multiplication
|
||||
|
func (z *Element) Square(x *Element) *Element { |
||||
|
if z != x { |
||||
|
z.Set(x) |
||||
|
} |
||||
|
MulAssignElement(z, x) |
||||
|
// SquareElement(z, x)
|
||||
|
return z |
||||
|
} |