@ -0,0 +1,170 @@ |
|||
// +build !amd64
|
|||
|
|||
// Copyright 2020 ConsenSys AG
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by goff (v0.2.0) DO NOT EDIT
|
|||
|
|||
// Package ff contains field arithmetic operations
|
|||
package ff |
|||
|
|||
// /!\ WARNING /!\
|
|||
// this code has not been audited and is provided as-is. In particular,
|
|||
// there is no security guarantees such as constant time implementation
|
|||
// or side-channel attack resistance
|
|||
// /!\ WARNING /!\
|
|||
|
|||
import "math/bits" |
|||
|
|||
// Mul z = x * y mod q
|
|||
// see https://hackmd.io/@zkteam/modular_multiplication
|
|||
func (z *Element) Mul(x, y *Element) *Element { |
|||
|
|||
var t [4]uint64 |
|||
var c [3]uint64 |
|||
{ |
|||
// round 0
|
|||
v := x[0] |
|||
c[1], c[0] = bits.Mul64(v, y[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd1(v, y[1], c[1]) |
|||
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd1(v, y[2], c[1]) |
|||
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd1(v, y[3], c[1]) |
|||
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
{ |
|||
// round 1
|
|||
v := x[1] |
|||
c[1], c[0] = madd1(v, y[0], t[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd2(v, y[1], c[1], t[1]) |
|||
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, y[2], c[1], t[2]) |
|||
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, y[3], c[1], t[3]) |
|||
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
{ |
|||
// round 2
|
|||
v := x[2] |
|||
c[1], c[0] = madd1(v, y[0], t[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd2(v, y[1], c[1], t[1]) |
|||
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, y[2], c[1], t[2]) |
|||
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, y[3], c[1], t[3]) |
|||
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
{ |
|||
// round 3
|
|||
v := x[3] |
|||
c[1], c[0] = madd1(v, y[0], t[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd2(v, y[1], c[1], t[1]) |
|||
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, y[2], c[1], t[2]) |
|||
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, y[3], c[1], t[3]) |
|||
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
|
|||
// if z > q --> z -= q
|
|||
// note: this is NOT constant time
|
|||
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { |
|||
var b uint64 |
|||
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) |
|||
z[1], b = bits.Sub64(z[1], 2896914383306846353, b) |
|||
z[2], b = bits.Sub64(z[2], 13281191951274694749, b) |
|||
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b) |
|||
} |
|||
return z |
|||
} |
|||
|
|||
// MulAssign z = z * x mod q
|
|||
// see https://hackmd.io/@zkteam/modular_multiplication
|
|||
func (z *Element) MulAssign(x *Element) *Element { |
|||
|
|||
var t [4]uint64 |
|||
var c [3]uint64 |
|||
{ |
|||
// round 0
|
|||
v := z[0] |
|||
c[1], c[0] = bits.Mul64(v, x[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd1(v, x[1], c[1]) |
|||
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd1(v, x[2], c[1]) |
|||
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd1(v, x[3], c[1]) |
|||
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
{ |
|||
// round 1
|
|||
v := z[1] |
|||
c[1], c[0] = madd1(v, x[0], t[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd2(v, x[1], c[1], t[1]) |
|||
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, x[2], c[1], t[2]) |
|||
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, x[3], c[1], t[3]) |
|||
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
{ |
|||
// round 2
|
|||
v := z[2] |
|||
c[1], c[0] = madd1(v, x[0], t[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd2(v, x[1], c[1], t[1]) |
|||
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, x[2], c[1], t[2]) |
|||
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, x[3], c[1], t[3]) |
|||
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
{ |
|||
// round 3
|
|||
v := z[3] |
|||
c[1], c[0] = madd1(v, x[0], t[0]) |
|||
m := c[0] * 14042775128853446655 |
|||
c[2] = madd0(m, 4891460686036598785, c[0]) |
|||
c[1], c[0] = madd2(v, x[1], c[1], t[1]) |
|||
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, x[2], c[1], t[2]) |
|||
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0]) |
|||
c[1], c[0] = madd2(v, x[3], c[1], t[3]) |
|||
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1]) |
|||
} |
|||
|
|||
// if z > q --> z -= q
|
|||
// note: this is NOT constant time
|
|||
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { |
|||
var b uint64 |
|||
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) |
|||
z[1], b = bits.Sub64(z[1], 2896914383306846353, b) |
|||
z[2], b = bits.Sub64(z[2], 13281191951274694749, b) |
|||
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b) |
|||
} |
|||
return z |
|||
} |
@ -0,0 +1,39 @@ |
|||
// Copyright 2020 ConsenSys AG
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by goff (v0.2.0) DO NOT EDIT
|
|||
|
|||
// Package ff contains field arithmetic operations
|
|||
package ff |
|||
|
|||
// MulAssignElement z = z * x mod q (constant time)
|
|||
// calling this instead of z.MulAssign(x) is prefered for performance critical path
|
|||
//go:noescape
|
|||
func MulAssignElement(res, y *Element) |
|||
|
|||
// Mul z = x * y mod q (constant time)
|
|||
// see https://hackmd.io/@zkteam/modular_multiplication
|
|||
func (z *Element) Mul(x, y *Element) *Element { |
|||
res := *x |
|||
MulAssignElement(&res, y) |
|||
z.Set(&res) |
|||
return z |
|||
} |
|||
|
|||
// MulAssign z = z * x mod q (constant time)
|
|||
// see https://hackmd.io/@zkteam/modular_multiplication
|
|||
func (z *Element) MulAssign(x *Element) *Element { |
|||
MulAssignElement(z, x) |
|||
return z |
|||
} |
@ -0,0 +1,695 @@ |
|||
// Code generated by goff (v0.2.0) DO NOT EDIT |
|||
|
|||
#include "textflag.h" |
|||
|
|||
// func MulAssignElement(res,y *Element) |
|||
// montgomery multiplication of res by y |
|||
// stores the result in res |
|||
TEXT ·MulAssignElement(SB), NOSPLIT, $0-16 |
|||
|
|||
// dereference our parameters |
|||
MOVQ res+0(FP), DI |
|||
MOVQ y+8(FP), R8 |
|||
|
|||
// check if we support adx and mulx |
|||
CMPB ·supportAdx(SB), $1 |
|||
JNE no_adx |
|||
|
|||
// the algorithm is described here |
|||
// https://hackmd.io/@zkteam/modular_multiplication |
|||
// however, to benefit from the ADCX and ADOX carry chains |
|||
// we split the inner loops in 2: |
|||
// for i=0 to N-1 |
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + a[j]*b[i] + A |
|||
// m := t[0]*q'[0] mod W |
|||
// C,_ := t[0] + m*q[0] |
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
// t[N-1] = C + A |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 0 |
|||
|
|||
// clear up the carry flags |
|||
XORQ R9 , R9 |
|||
|
|||
// R12 = y[0] |
|||
MOVQ 0(R8), R12 |
|||
|
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
|
|||
// DX = res[0] |
|||
MOVQ 0(DI), DX |
|||
MULXQ R12, CX , R9 |
|||
|
|||
// DX = res[1] |
|||
MOVQ 8(DI), DX |
|||
MOVQ R9, BX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BX |
|||
|
|||
// DX = res[2] |
|||
MOVQ 16(DI), DX |
|||
MOVQ R9, BP |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BP |
|||
|
|||
// DX = res[3] |
|||
MOVQ 24(DI), DX |
|||
MOVQ R9, SI |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, SI |
|||
|
|||
// add the last carries to R9 |
|||
MOVQ $0, DX |
|||
ADCXQ DX, R9 |
|||
ADOXQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, DX |
|||
MULXQ CX,R11, DX |
|||
|
|||
// clear the carry flags |
|||
XORQ DX, DX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, DX |
|||
MULXQ R11, AX, R10 |
|||
ADCXQ CX ,AX |
|||
|
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
|
|||
MOVQ $0x2833e84879b97091, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BX, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0xb85045b68181585d, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BP, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0x30644e72e131a029, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ SI, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BP |
|||
MOVQ $0, AX |
|||
ADCXQ AX, DX |
|||
ADOXQ DX, R9 |
|||
MOVQ R9, SI |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 1 |
|||
|
|||
// clear up the carry flags |
|||
XORQ R9 , R9 |
|||
|
|||
// R12 = y[1] |
|||
MOVQ 8(R8), R12 |
|||
|
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
|
|||
// DX = res[0] |
|||
MOVQ 0(DI), DX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, CX |
|||
|
|||
// DX = res[1] |
|||
MOVQ 8(DI), DX |
|||
ADCXQ R9, BX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BX |
|||
|
|||
// DX = res[2] |
|||
MOVQ 16(DI), DX |
|||
ADCXQ R9, BP |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BP |
|||
|
|||
// DX = res[3] |
|||
MOVQ 24(DI), DX |
|||
ADCXQ R9, SI |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, SI |
|||
|
|||
// add the last carries to R9 |
|||
MOVQ $0, DX |
|||
ADCXQ DX, R9 |
|||
ADOXQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, DX |
|||
MULXQ CX,R11, DX |
|||
|
|||
// clear the carry flags |
|||
XORQ DX, DX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, DX |
|||
MULXQ R11, AX, R10 |
|||
ADCXQ CX ,AX |
|||
|
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
|
|||
MOVQ $0x2833e84879b97091, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BX, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0xb85045b68181585d, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BP, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0x30644e72e131a029, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ SI, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BP |
|||
MOVQ $0, AX |
|||
ADCXQ AX, DX |
|||
ADOXQ DX, R9 |
|||
MOVQ R9, SI |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 2 |
|||
|
|||
// clear up the carry flags |
|||
XORQ R9 , R9 |
|||
|
|||
// R12 = y[2] |
|||
MOVQ 16(R8), R12 |
|||
|
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
|
|||
// DX = res[0] |
|||
MOVQ 0(DI), DX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, CX |
|||
|
|||
// DX = res[1] |
|||
MOVQ 8(DI), DX |
|||
ADCXQ R9, BX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BX |
|||
|
|||
// DX = res[2] |
|||
MOVQ 16(DI), DX |
|||
ADCXQ R9, BP |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BP |
|||
|
|||
// DX = res[3] |
|||
MOVQ 24(DI), DX |
|||
ADCXQ R9, SI |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, SI |
|||
|
|||
// add the last carries to R9 |
|||
MOVQ $0, DX |
|||
ADCXQ DX, R9 |
|||
ADOXQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, DX |
|||
MULXQ CX,R11, DX |
|||
|
|||
// clear the carry flags |
|||
XORQ DX, DX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, DX |
|||
MULXQ R11, AX, R10 |
|||
ADCXQ CX ,AX |
|||
|
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
|
|||
MOVQ $0x2833e84879b97091, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BX, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0xb85045b68181585d, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BP, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0x30644e72e131a029, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ SI, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BP |
|||
MOVQ $0, AX |
|||
ADCXQ AX, DX |
|||
ADOXQ DX, R9 |
|||
MOVQ R9, SI |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 3 |
|||
|
|||
// clear up the carry flags |
|||
XORQ R9 , R9 |
|||
|
|||
// R12 = y[3] |
|||
MOVQ 24(R8), R12 |
|||
|
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
|
|||
// DX = res[0] |
|||
MOVQ 0(DI), DX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, CX |
|||
|
|||
// DX = res[1] |
|||
MOVQ 8(DI), DX |
|||
ADCXQ R9, BX |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BX |
|||
|
|||
// DX = res[2] |
|||
MOVQ 16(DI), DX |
|||
ADCXQ R9, BP |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, BP |
|||
|
|||
// DX = res[3] |
|||
MOVQ 24(DI), DX |
|||
ADCXQ R9, SI |
|||
MULXQ R12, AX, R9 |
|||
ADOXQ AX, SI |
|||
|
|||
// add the last carries to R9 |
|||
MOVQ $0, DX |
|||
ADCXQ DX, R9 |
|||
ADOXQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, DX |
|||
MULXQ CX,R11, DX |
|||
|
|||
// clear the carry flags |
|||
XORQ DX, DX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, DX |
|||
MULXQ R11, AX, R10 |
|||
ADCXQ CX ,AX |
|||
|
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
|
|||
MOVQ $0x2833e84879b97091, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BX, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0xb85045b68181585d, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ BP, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
|
|||
MOVQ $0x30644e72e131a029, DX |
|||
MULXQ R11, AX, DX |
|||
ADCXQ SI, R10 |
|||
ADOXQ AX, R10 |
|||
MOVQ R10, BP |
|||
MOVQ $0, AX |
|||
ADCXQ AX, DX |
|||
ADOXQ DX, R9 |
|||
MOVQ R9, SI |
|||
|
|||
reduce: |
|||
// reduce, constant time version |
|||
// first we copy registers storing t in a separate set of registers |
|||
// as SUBQ modifies the 2nd operand |
|||
MOVQ CX, DX |
|||
MOVQ BX, R8 |
|||
MOVQ BP, R9 |
|||
MOVQ SI, R10 |
|||
MOVQ $0x43e1f593f0000001, R11 |
|||
SUBQ R11, DX |
|||
MOVQ $0x2833e84879b97091, R11 |
|||
SBBQ R11, R8 |
|||
MOVQ $0xb85045b68181585d, R11 |
|||
SBBQ R11, R9 |
|||
MOVQ $0x30644e72e131a029, R11 |
|||
SBBQ R11, R10 |
|||
JCS t_is_smaller // no borrow, we return t |
|||
|
|||
// borrow is set, we return u |
|||
MOVQ DX, (DI) |
|||
MOVQ R8, 8(DI) |
|||
MOVQ R9, 16(DI) |
|||
MOVQ R10, 24(DI) |
|||
RET |
|||
t_is_smaller: |
|||
MOVQ CX, 0(DI) |
|||
MOVQ BX, 8(DI) |
|||
MOVQ BP, 16(DI) |
|||
MOVQ SI, 24(DI) |
|||
RET |
|||
|
|||
no_adx: |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 0 |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[0] |
|||
MOVQ (DI), AX // x[0] |
|||
MOVQ 0(R8), R12 |
|||
MULQ R12 // x[0] * y[0] |
|||
MOVQ DX, R9 |
|||
MOVQ AX, CX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, R11 |
|||
IMULQ CX , R11 |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, AX |
|||
MULQ R11 |
|||
ADDQ CX ,AX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R10 |
|||
|
|||
// for j=1 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
MOVQ 8(DI), AX |
|||
MULQ R12 // x[1] * y[0] |
|||
MOVQ R9, BX |
|||
ADDQ AX, BX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x2833e84879b97091, AX |
|||
MULQ R11 |
|||
ADDQ BX, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
MOVQ 16(DI), AX |
|||
MULQ R12 // x[2] * y[0] |
|||
MOVQ R9, BP |
|||
ADDQ AX, BP |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0xb85045b68181585d, AX |
|||
MULQ R11 |
|||
ADDQ BP, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
MOVQ 24(DI), AX |
|||
MULQ R12 // x[3] * y[0] |
|||
MOVQ R9, SI |
|||
ADDQ AX, SI |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x30644e72e131a029, AX |
|||
MULQ R11 |
|||
ADDQ SI, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BP |
|||
MOVQ DX, R10 |
|||
|
|||
ADDQ R10, R9 |
|||
MOVQ R9, SI |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 1 |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[1] |
|||
MOVQ (DI), AX // x[0] |
|||
MOVQ 8(R8), R12 |
|||
MULQ R12 // x[0] * y[1] |
|||
ADDQ AX, CX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, R11 |
|||
IMULQ CX , R11 |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, AX |
|||
MULQ R11 |
|||
ADDQ CX ,AX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R10 |
|||
|
|||
// for j=1 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
MOVQ 8(DI), AX |
|||
MULQ R12 // x[1] * y[1] |
|||
ADDQ R9, BX |
|||
ADCQ $0, DX |
|||
ADDQ AX, BX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x2833e84879b97091, AX |
|||
MULQ R11 |
|||
ADDQ BX, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
MOVQ 16(DI), AX |
|||
MULQ R12 // x[2] * y[1] |
|||
ADDQ R9, BP |
|||
ADCQ $0, DX |
|||
ADDQ AX, BP |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0xb85045b68181585d, AX |
|||
MULQ R11 |
|||
ADDQ BP, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
MOVQ 24(DI), AX |
|||
MULQ R12 // x[3] * y[1] |
|||
ADDQ R9, SI |
|||
ADCQ $0, DX |
|||
ADDQ AX, SI |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x30644e72e131a029, AX |
|||
MULQ R11 |
|||
ADDQ SI, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BP |
|||
MOVQ DX, R10 |
|||
|
|||
ADDQ R10, R9 |
|||
MOVQ R9, SI |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 2 |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[2] |
|||
MOVQ (DI), AX // x[0] |
|||
MOVQ 16(R8), R12 |
|||
MULQ R12 // x[0] * y[2] |
|||
ADDQ AX, CX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, R11 |
|||
IMULQ CX , R11 |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, AX |
|||
MULQ R11 |
|||
ADDQ CX ,AX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R10 |
|||
|
|||
// for j=1 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
MOVQ 8(DI), AX |
|||
MULQ R12 // x[1] * y[2] |
|||
ADDQ R9, BX |
|||
ADCQ $0, DX |
|||
ADDQ AX, BX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x2833e84879b97091, AX |
|||
MULQ R11 |
|||
ADDQ BX, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
MOVQ 16(DI), AX |
|||
MULQ R12 // x[2] * y[2] |
|||
ADDQ R9, BP |
|||
ADCQ $0, DX |
|||
ADDQ AX, BP |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0xb85045b68181585d, AX |
|||
MULQ R11 |
|||
ADDQ BP, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
MOVQ 24(DI), AX |
|||
MULQ R12 // x[3] * y[2] |
|||
ADDQ R9, SI |
|||
ADCQ $0, DX |
|||
ADDQ AX, SI |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x30644e72e131a029, AX |
|||
MULQ R11 |
|||
ADDQ SI, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BP |
|||
MOVQ DX, R10 |
|||
|
|||
ADDQ R10, R9 |
|||
MOVQ R9, SI |
|||
|
|||
// --------------------------------------------------------------------------------------------- |
|||
// outter loop 3 |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[3] |
|||
MOVQ (DI), AX // x[0] |
|||
MOVQ 24(R8), R12 |
|||
MULQ R12 // x[0] * y[3] |
|||
ADDQ AX, CX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ $0xc2e1f593efffffff, R11 |
|||
IMULQ CX , R11 |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MOVQ $0x43e1f593f0000001, AX |
|||
MULQ R11 |
|||
ADDQ CX ,AX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R10 |
|||
|
|||
// for j=1 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
MOVQ 8(DI), AX |
|||
MULQ R12 // x[1] * y[3] |
|||
ADDQ R9, BX |
|||
ADCQ $0, DX |
|||
ADDQ AX, BX |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x2833e84879b97091, AX |
|||
MULQ R11 |
|||
ADDQ BX, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, CX |
|||
MOVQ DX, R10 |
|||
MOVQ 16(DI), AX |
|||
MULQ R12 // x[2] * y[3] |
|||
ADDQ R9, BP |
|||
ADCQ $0, DX |
|||
ADDQ AX, BP |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0xb85045b68181585d, AX |
|||
MULQ R11 |
|||
ADDQ BP, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BX |
|||
MOVQ DX, R10 |
|||
MOVQ 24(DI), AX |
|||
MULQ R12 // x[3] * y[3] |
|||
ADDQ R9, SI |
|||
ADCQ $0, DX |
|||
ADDQ AX, SI |
|||
ADCQ $0, DX |
|||
MOVQ DX, R9 |
|||
|
|||
MOVQ $0x30644e72e131a029, AX |
|||
MULQ R11 |
|||
ADDQ SI, R10 |
|||
ADCQ $0, DX |
|||
ADDQ AX, R10 |
|||
ADCQ $0, DX |
|||
|
|||
MOVQ R10, BP |
|||
MOVQ DX, R10 |
|||
|
|||
ADDQ R10, R9 |
|||
MOVQ R9, SI |
|||
|
|||
JMP reduce |
@ -0,0 +1,93 @@ |
|||
// +build !amd64
|
|||
|
|||
// Copyright 2020 ConsenSys AG
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by goff (v0.2.0) DO NOT EDIT
|
|||
|
|||
// Package ff contains field arithmetic operations
|
|||
package ff |
|||
|
|||
// /!\ WARNING /!\
|
|||
// this code has not been audited and is provided as-is. In particular,
|
|||
// there is no security guarantees such as constant time implementation
|
|||
// or side-channel attack resistance
|
|||
// /!\ WARNING /!\
|
|||
|
|||
import "math/bits" |
|||
|
|||
// Square z = x * x mod q
|
|||
// see https://hackmd.io/@zkteam/modular_multiplication
|
|||
func (z *Element) Square(x *Element) *Element { |
|||
|
|||
var p [4]uint64 |
|||
|
|||
var u, v uint64 |
|||
{ |
|||
// round 0
|
|||
u, p[0] = bits.Mul64(x[0], x[0]) |
|||
m := p[0] * 14042775128853446655 |
|||
C := madd0(m, 4891460686036598785, p[0]) |
|||
var t uint64 |
|||
t, u, v = madd1sb(x[0], x[1], u) |
|||
C, p[0] = madd2(m, 2896914383306846353, v, C) |
|||
t, u, v = madd1s(x[0], x[2], t, u) |
|||
C, p[1] = madd2(m, 13281191951274694749, v, C) |
|||
_, u, v = madd1s(x[0], x[3], t, u) |
|||
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u) |
|||
} |
|||
{ |
|||
// round 1
|
|||
m := p[0] * 14042775128853446655 |
|||
C := madd0(m, 4891460686036598785, p[0]) |
|||
u, v = madd1(x[1], x[1], p[1]) |
|||
C, p[0] = madd2(m, 2896914383306846353, v, C) |
|||
var t uint64 |
|||
t, u, v = madd2sb(x[1], x[2], p[2], u) |
|||
C, p[1] = madd2(m, 13281191951274694749, v, C) |
|||
_, u, v = madd2s(x[1], x[3], p[3], t, u) |
|||
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u) |
|||
} |
|||
{ |
|||
// round 2
|
|||
m := p[0] * 14042775128853446655 |
|||
C := madd0(m, 4891460686036598785, p[0]) |
|||
C, p[0] = madd2(m, 2896914383306846353, p[1], C) |
|||
u, v = madd1(x[2], x[2], p[2]) |
|||
C, p[1] = madd2(m, 13281191951274694749, v, C) |
|||
_, u, v = madd2sb(x[2], x[3], p[3], u) |
|||
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u) |
|||
} |
|||
{ |
|||
// round 3
|
|||
m := p[0] * 14042775128853446655 |
|||
C := madd0(m, 4891460686036598785, p[0]) |
|||
C, z[0] = madd2(m, 2896914383306846353, p[1], C) |
|||
C, z[1] = madd2(m, 13281191951274694749, p[2], C) |
|||
u, v = madd1(x[3], x[3], p[3]) |
|||
z[3], z[2] = madd3(m, 3486998266802970665, v, C, u) |
|||
} |
|||
|
|||
// if z > q --> z -= q
|
|||
// note: this is NOT constant time
|
|||
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { |
|||
var b uint64 |
|||
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) |
|||
z[1], b = bits.Sub64(z[1], 2896914383306846353, b) |
|||
z[2], b = bits.Sub64(z[2], 13281191951274694749, b) |
|||
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b) |
|||
} |
|||
return z |
|||
|
|||
} |
@ -0,0 +1,34 @@ |
|||
// Copyright 2020 ConsenSys AG
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by goff (v0.2.0) DO NOT EDIT
|
|||
|
|||
// Package ff contains field arithmetic operations
|
|||
package ff |
|||
|
|||
// SquareElement z = x * x mod q
|
|||
// calling this instead of z.Square(x) is prefered for performance critical path
|
|||
// go - noescape
|
|||
// func SquareElement(res,x *Element)
|
|||
|
|||
// Square z = x * x mod q
|
|||
// see https://hackmd.io/@zkteam/modular_multiplication
|
|||
func (z *Element) Square(x *Element) *Element { |
|||
if z != x { |
|||
z.Set(x) |
|||
} |
|||
MulAssignElement(z, x) |
|||
// SquareElement(z, x)
|
|||
return z |
|||
} |