@ -0,0 +1,24 @@ |
|||
//go:build !noadx
|
|||
// +build !noadx
|
|||
|
|||
// Copyright 2020 ConsenSys Software Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|||
|
|||
package ff |
|||
|
|||
import "golang.org/x/sys/cpu" |
|||
|
|||
var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2 |
@ -0,0 +1,25 @@ |
|||
//go:build noadx
|
|||
// +build noadx
|
|||
|
|||
// Copyright 2020 ConsenSys Software Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|||
|
|||
package ff |
|||
|
|||
// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
|
|||
// certain errors (like fatal error: missing stackmap)
|
|||
// this ensures we test all asm path.
|
|||
var supportAdx = false |
@ -0,0 +1,43 @@ |
|||
// Copyright 2020 ConsenSys Software Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|||
|
|||
// Package ff contains field arithmetic operations for modulus = 0x30644e...000001.
|
|||
//
|
|||
// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
|
|||
//
|
|||
// The modulus is hardcoded in all the operations.
|
|||
//
|
|||
// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
|
|||
// type Element [4]uint64
|
|||
//
|
|||
// Example API signature
|
|||
// // Mul z = x * y mod q
|
|||
// func (z *Element) Mul(x, y *Element) *Element
|
|||
//
|
|||
// and can be used like so:
|
|||
// var a, b Element
|
|||
// a.SetUint64(2)
|
|||
// b.SetString("984896738")
|
|||
// a.Mul(a, b)
|
|||
// a.Sub(a, a)
|
|||
// .Add(a, b)
|
|||
// .Inv(a)
|
|||
// b.Exp(b, new(big.Int).SetUint64(42))
|
|||
//
|
|||
// Modulus
|
|||
// 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 // base 16
|
|||
// 21888242871839275222246405745257275088548364400416034343698204186575808495617 // base 10
|
|||
package ff |
@ -0,0 +1,136 @@ |
|||
//go:build gofuzz
|
|||
// +build gofuzz
|
|||
|
|||
// Copyright 2020 ConsenSys Software Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|||
|
|||
package ff |
|||
|
|||
import ( |
|||
"bytes" |
|||
"encoding/binary" |
|||
"io" |
|||
"math/big" |
|||
"math/bits" |
|||
) |
|||
|
|||
const ( |
|||
fuzzInteresting = 1 |
|||
fuzzNormal = 0 |
|||
fuzzDiscard = -1 |
|||
) |
|||
|
|||
// Fuzz arithmetic operations fuzzer
|
|||
func Fuzz(data []byte) int { |
|||
r := bytes.NewReader(data) |
|||
|
|||
var e1, e2 Element |
|||
e1.SetRawBytes(r) |
|||
e2.SetRawBytes(r) |
|||
|
|||
{ |
|||
// mul assembly
|
|||
|
|||
var c, _c Element |
|||
a, _a, b, _b := e1, e1, e2, e2 |
|||
c.Mul(&a, &b) |
|||
_mulGeneric(&_c, &_a, &_b) |
|||
|
|||
if !c.Equal(&_c) { |
|||
panic("mul asm != mul generic on Element") |
|||
} |
|||
} |
|||
|
|||
{ |
|||
// inverse
|
|||
inv := e1 |
|||
inv.Inverse(&inv) |
|||
|
|||
var bInv, b1, b2 big.Int |
|||
e1.ToBigIntRegular(&b1) |
|||
bInv.ModInverse(&b1, Modulus()) |
|||
inv.ToBigIntRegular(&b2) |
|||
|
|||
if b2.Cmp(&bInv) != 0 { |
|||
panic("inverse operation doesn't match big int result") |
|||
} |
|||
} |
|||
|
|||
{ |
|||
// a + -a == 0
|
|||
a, b := e1, e1 |
|||
b.Neg(&b) |
|||
a.Add(&a, &b) |
|||
if !a.IsZero() { |
|||
panic("a + -a != 0") |
|||
} |
|||
} |
|||
|
|||
return fuzzNormal |
|||
|
|||
} |
|||
|
|||
// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
|
|||
// and interpret it as big endian uint64
|
|||
// used for fuzzing purposes only
|
|||
func (z *Element) SetRawBytes(r io.Reader) { |
|||
|
|||
buf := make([]byte, 8) |
|||
|
|||
for i := 0; i < len(z); i++ { |
|||
if _, err := io.ReadFull(r, buf); err != nil { |
|||
goto eof |
|||
} |
|||
z[i] = binary.BigEndian.Uint64(buf[:]) |
|||
} |
|||
eof: |
|||
z[3] %= qElement[3] |
|||
|
|||
if z.BiggerModulus() { |
|||
var b uint64 |
|||
z[0], b = bits.Sub64(z[0], qElement[0], 0) |
|||
z[1], b = bits.Sub64(z[1], qElement[1], b) |
|||
z[2], b = bits.Sub64(z[2], qElement[2], b) |
|||
z[3], b = bits.Sub64(z[3], qElement[3], b) |
|||
} |
|||
|
|||
return |
|||
} |
|||
|
|||
func (z *Element) BiggerModulus() bool { |
|||
if z[3] > qElement[3] { |
|||
return true |
|||
} |
|||
if z[3] < qElement[3] { |
|||
return false |
|||
} |
|||
|
|||
if z[2] > qElement[2] { |
|||
return true |
|||
} |
|||
if z[2] < qElement[2] { |
|||
return false |
|||
} |
|||
|
|||
if z[1] > qElement[1] { |
|||
return true |
|||
} |
|||
if z[1] < qElement[1] { |
|||
return false |
|||
} |
|||
|
|||
return z[0] >= qElement[0] |
|||
} |
@ -0,0 +1,466 @@ |
|||
// +build amd64_adx |
|||
|
|||
// Copyright 2020 ConsenSys Software Inc. |
|||
// |
|||
// Licensed under the Apache License, Version 2.0 (the "License"); |
|||
// you may not use this file except in compliance with the License. |
|||
// You may obtain a copy of the License at |
|||
// |
|||
// http://www.apache.org/licenses/LICENSE-2.0 |
|||
// |
|||
// Unless required by applicable law or agreed to in writing, software |
|||
// distributed under the License is distributed on an "AS IS" BASIS, |
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
// See the License for the specific language governing permissions and |
|||
// limitations under the License. |
|||
|
|||
#include "textflag.h" |
|||
#include "funcdata.h" |
|||
|
|||
// modulus q |
|||
DATA q<>+0(SB)/8, $0x43e1f593f0000001 |
|||
DATA q<>+8(SB)/8, $0x2833e84879b97091 |
|||
DATA q<>+16(SB)/8, $0xb85045b68181585d |
|||
DATA q<>+24(SB)/8, $0x30644e72e131a029 |
|||
GLOBL q<>(SB), (RODATA+NOPTR), $32 |
|||
|
|||
// qInv0 q'[0] |
|||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff |
|||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 |
|||
|
|||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \ |
|||
MOVQ ra0, rb0; \ |
|||
SUBQ q<>(SB), ra0; \ |
|||
MOVQ ra1, rb1; \ |
|||
SBBQ q<>+8(SB), ra1; \ |
|||
MOVQ ra2, rb2; \ |
|||
SBBQ q<>+16(SB), ra2; \ |
|||
MOVQ ra3, rb3; \ |
|||
SBBQ q<>+24(SB), ra3; \ |
|||
CMOVQCS rb0, ra0; \ |
|||
CMOVQCS rb1, ra1; \ |
|||
CMOVQCS rb2, ra2; \ |
|||
CMOVQCS rb3, ra3; \ |
|||
|
|||
// mul(res, x, y *Element) |
|||
TEXT ·mul(SB), NOSPLIT, $0-24 |
|||
|
|||
// the algorithm is described here |
|||
// https://hackmd.io/@zkteam/modular_multiplication |
|||
// however, to benefit from the ADCX and ADOX carry chains |
|||
// we split the inner loops in 2: |
|||
// for i=0 to N-1 |
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
// m := t[0]*q'[0] mod W |
|||
// C,_ := t[0] + m*q[0] |
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
// t[N-1] = C + A |
|||
|
|||
MOVQ x+8(FP), SI |
|||
|
|||
// x[0] -> DI |
|||
// x[1] -> R8 |
|||
// x[2] -> R9 |
|||
// x[3] -> R10 |
|||
MOVQ 0(SI), DI |
|||
MOVQ 8(SI), R8 |
|||
MOVQ 16(SI), R9 |
|||
MOVQ 24(SI), R10 |
|||
MOVQ y+16(FP), R11 |
|||
|
|||
// A -> BP |
|||
// t[0] -> R14 |
|||
// t[1] -> R15 |
|||
// t[2] -> CX |
|||
// t[3] -> BX |
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 0(R11), DX |
|||
|
|||
// (A,t[0]) := x[0]*y[0] + A |
|||
MULXQ DI, R14, R15 |
|||
|
|||
// (A,t[1]) := x[1]*y[0] + A |
|||
MULXQ R8, AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := x[2]*y[0] + A |
|||
MULXQ R9, AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := x[3]*y[0] + A |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 8(R11), DX |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[1] + A |
|||
MULXQ DI, AX, BP |
|||
ADOXQ AX, R14 |
|||
|
|||
// (A,t[1]) := t[1] + x[1]*y[1] + A |
|||
ADCXQ BP, R15 |
|||
MULXQ R8, AX, BP |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := t[2] + x[2]*y[1] + A |
|||
ADCXQ BP, CX |
|||
MULXQ R9, AX, BP |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := t[3] + x[3]*y[1] + A |
|||
ADCXQ BP, BX |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BP |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 16(R11), DX |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[2] + A |
|||
MULXQ DI, AX, BP |
|||
ADOXQ AX, R14 |
|||
|
|||
// (A,t[1]) := t[1] + x[1]*y[2] + A |
|||
ADCXQ BP, R15 |
|||
MULXQ R8, AX, BP |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := t[2] + x[2]*y[2] + A |
|||
ADCXQ BP, CX |
|||
MULXQ R9, AX, BP |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := t[3] + x[3]*y[2] + A |
|||
ADCXQ BP, BX |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BP |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 24(R11), DX |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[3] + A |
|||
MULXQ DI, AX, BP |
|||
ADOXQ AX, R14 |
|||
|
|||
// (A,t[1]) := t[1] + x[1]*y[3] + A |
|||
ADCXQ BP, R15 |
|||
MULXQ R8, AX, BP |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := t[2] + x[2]*y[3] + A |
|||
ADCXQ BP, CX |
|||
MULXQ R9, AX, BP |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := t[3] + x[3]*y[3] + A |
|||
ADCXQ BP, BX |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BP |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11) |
|||
REDUCE(R14,R15,CX,BX,R13,SI,R12,R11) |
|||
|
|||
MOVQ res+0(FP), AX |
|||
MOVQ R14, 0(AX) |
|||
MOVQ R15, 8(AX) |
|||
MOVQ CX, 16(AX) |
|||
MOVQ BX, 24(AX) |
|||
RET |
|||
|
|||
TEXT ·fromMont(SB), NOSPLIT, $0-8 |
|||
|
|||
// the algorithm is described here |
|||
// https://hackmd.io/@zkteam/modular_multiplication |
|||
// when y = 1 we have: |
|||
// for i=0 to N-1 |
|||
// t[i] = x[i] |
|||
// for i=0 to N-1 |
|||
// m := t[0]*q'[0] mod W |
|||
// C,_ := t[0] + m*q[0] |
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
// t[N-1] = C |
|||
MOVQ res+0(FP), DX |
|||
MOVQ 0(DX), R14 |
|||
MOVQ 8(DX), R15 |
|||
MOVQ 16(DX), CX |
|||
MOVQ 24(DX), BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
|
|||
// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9) |
|||
REDUCE(R14,R15,CX,BX,SI,DI,R8,R9) |
|||
|
|||
MOVQ res+0(FP), AX |
|||
MOVQ R14, 0(AX) |
|||
MOVQ R15, 8(AX) |
|||
MOVQ CX, 16(AX) |
|||
MOVQ BX, 24(AX) |
|||
RET |
@ -0,0 +1,488 @@ |
|||
// +build !amd64_adx |
|||
|
|||
// Copyright 2020 ConsenSys Software Inc. |
|||
// |
|||
// Licensed under the Apache License, Version 2.0 (the "License"); |
|||
// you may not use this file except in compliance with the License. |
|||
// You may obtain a copy of the License at |
|||
// |
|||
// http://www.apache.org/licenses/LICENSE-2.0 |
|||
// |
|||
// Unless required by applicable law or agreed to in writing, software |
|||
// distributed under the License is distributed on an "AS IS" BASIS, |
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
// See the License for the specific language governing permissions and |
|||
// limitations under the License. |
|||
|
|||
#include "textflag.h" |
|||
#include "funcdata.h" |
|||
|
|||
// modulus q |
|||
DATA q<>+0(SB)/8, $0x43e1f593f0000001 |
|||
DATA q<>+8(SB)/8, $0x2833e84879b97091 |
|||
DATA q<>+16(SB)/8, $0xb85045b68181585d |
|||
DATA q<>+24(SB)/8, $0x30644e72e131a029 |
|||
GLOBL q<>(SB), (RODATA+NOPTR), $32 |
|||
|
|||
// qInv0 q'[0] |
|||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff |
|||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 |
|||
|
|||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \ |
|||
MOVQ ra0, rb0; \ |
|||
SUBQ q<>(SB), ra0; \ |
|||
MOVQ ra1, rb1; \ |
|||
SBBQ q<>+8(SB), ra1; \ |
|||
MOVQ ra2, rb2; \ |
|||
SBBQ q<>+16(SB), ra2; \ |
|||
MOVQ ra3, rb3; \ |
|||
SBBQ q<>+24(SB), ra3; \ |
|||
CMOVQCS rb0, ra0; \ |
|||
CMOVQCS rb1, ra1; \ |
|||
CMOVQCS rb2, ra2; \ |
|||
CMOVQCS rb3, ra3; \ |
|||
|
|||
// mul(res, x, y *Element) |
|||
TEXT ·mul(SB), $24-24 |
|||
|
|||
// the algorithm is described here |
|||
// https://hackmd.io/@zkteam/modular_multiplication |
|||
// however, to benefit from the ADCX and ADOX carry chains |
|||
// we split the inner loops in 2: |
|||
// for i=0 to N-1 |
|||
// for j=0 to N-1 |
|||
// (A,t[j]) := t[j] + x[j]*y[i] + A |
|||
// m := t[0]*q'[0] mod W |
|||
// C,_ := t[0] + m*q[0] |
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
// t[N-1] = C + A |
|||
|
|||
NO_LOCAL_POINTERS |
|||
CMPB ·supportAdx(SB), $1 |
|||
JNE l1 |
|||
MOVQ x+8(FP), SI |
|||
|
|||
// x[0] -> DI |
|||
// x[1] -> R8 |
|||
// x[2] -> R9 |
|||
// x[3] -> R10 |
|||
MOVQ 0(SI), DI |
|||
MOVQ 8(SI), R8 |
|||
MOVQ 16(SI), R9 |
|||
MOVQ 24(SI), R10 |
|||
MOVQ y+16(FP), R11 |
|||
|
|||
// A -> BP |
|||
// t[0] -> R14 |
|||
// t[1] -> R15 |
|||
// t[2] -> CX |
|||
// t[3] -> BX |
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 0(R11), DX |
|||
|
|||
// (A,t[0]) := x[0]*y[0] + A |
|||
MULXQ DI, R14, R15 |
|||
|
|||
// (A,t[1]) := x[1]*y[0] + A |
|||
MULXQ R8, AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := x[2]*y[0] + A |
|||
MULXQ R9, AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := x[3]*y[0] + A |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 8(R11), DX |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[1] + A |
|||
MULXQ DI, AX, BP |
|||
ADOXQ AX, R14 |
|||
|
|||
// (A,t[1]) := t[1] + x[1]*y[1] + A |
|||
ADCXQ BP, R15 |
|||
MULXQ R8, AX, BP |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := t[2] + x[2]*y[1] + A |
|||
ADCXQ BP, CX |
|||
MULXQ R9, AX, BP |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := t[3] + x[3]*y[1] + A |
|||
ADCXQ BP, BX |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BP |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 16(R11), DX |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[2] + A |
|||
MULXQ DI, AX, BP |
|||
ADOXQ AX, R14 |
|||
|
|||
// (A,t[1]) := t[1] + x[1]*y[2] + A |
|||
ADCXQ BP, R15 |
|||
MULXQ R8, AX, BP |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := t[2] + x[2]*y[2] + A |
|||
ADCXQ BP, CX |
|||
MULXQ R9, AX, BP |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := t[3] + x[3]*y[2] + A |
|||
ADCXQ BP, BX |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BP |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
MOVQ 24(R11), DX |
|||
|
|||
// (A,t[0]) := t[0] + x[0]*y[3] + A |
|||
MULXQ DI, AX, BP |
|||
ADOXQ AX, R14 |
|||
|
|||
// (A,t[1]) := t[1] + x[1]*y[3] + A |
|||
ADCXQ BP, R15 |
|||
MULXQ R8, AX, BP |
|||
ADOXQ AX, R15 |
|||
|
|||
// (A,t[2]) := t[2] + x[2]*y[3] + A |
|||
ADCXQ BP, CX |
|||
MULXQ R9, AX, BP |
|||
ADOXQ AX, CX |
|||
|
|||
// (A,t[3]) := t[3] + x[3]*y[3] + A |
|||
ADCXQ BP, BX |
|||
MULXQ R10, AX, BP |
|||
ADOXQ AX, BX |
|||
|
|||
// A += carries from ADCXQ and ADOXQ |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BP |
|||
ADOXQ AX, BP |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
|
|||
// clear the flags |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, R12 |
|||
ADCXQ R14, AX |
|||
MOVQ R12, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
|
|||
// t[3] = C + A |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ BP, BX |
|||
|
|||
// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11) |
|||
REDUCE(R14,R15,CX,BX,R13,SI,R12,R11) |
|||
|
|||
MOVQ res+0(FP), AX |
|||
MOVQ R14, 0(AX) |
|||
MOVQ R15, 8(AX) |
|||
MOVQ CX, 16(AX) |
|||
MOVQ BX, 24(AX) |
|||
RET |
|||
|
|||
l1: |
|||
MOVQ res+0(FP), AX |
|||
MOVQ AX, (SP) |
|||
MOVQ x+8(FP), AX |
|||
MOVQ AX, 8(SP) |
|||
MOVQ y+16(FP), AX |
|||
MOVQ AX, 16(SP) |
|||
CALL ·_mulGeneric(SB) |
|||
RET |
|||
|
|||
TEXT ·fromMont(SB), $8-8 |
|||
NO_LOCAL_POINTERS |
|||
|
|||
// the algorithm is described here |
|||
// https://hackmd.io/@zkteam/modular_multiplication |
|||
// when y = 1 we have: |
|||
// for i=0 to N-1 |
|||
// t[i] = x[i] |
|||
// for i=0 to N-1 |
|||
// m := t[0]*q'[0] mod W |
|||
// C,_ := t[0] + m*q[0] |
|||
// for j=1 to N-1 |
|||
// (C,t[j-1]) := t[j] + m*q[j] + C |
|||
// t[N-1] = C |
|||
CMPB ·supportAdx(SB), $1 |
|||
JNE l2 |
|||
MOVQ res+0(FP), DX |
|||
MOVQ 0(DX), R14 |
|||
MOVQ 8(DX), R15 |
|||
MOVQ 16(DX), CX |
|||
MOVQ 24(DX), BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
XORQ DX, DX |
|||
|
|||
// m := t[0]*q'[0] mod W |
|||
MOVQ qInv0<>(SB), DX |
|||
IMULQ R14, DX |
|||
XORQ AX, AX |
|||
|
|||
// C,_ := t[0] + m*q[0] |
|||
MULXQ q<>+0(SB), AX, BP |
|||
ADCXQ R14, AX |
|||
MOVQ BP, R14 |
|||
|
|||
// (C,t[0]) := t[1] + m*q[1] + C |
|||
ADCXQ R15, R14 |
|||
MULXQ q<>+8(SB), AX, R15 |
|||
ADOXQ AX, R14 |
|||
|
|||
// (C,t[1]) := t[2] + m*q[2] + C |
|||
ADCXQ CX, R15 |
|||
MULXQ q<>+16(SB), AX, CX |
|||
ADOXQ AX, R15 |
|||
|
|||
// (C,t[2]) := t[3] + m*q[3] + C |
|||
ADCXQ BX, CX |
|||
MULXQ q<>+24(SB), AX, BX |
|||
ADOXQ AX, CX |
|||
MOVQ $0, AX |
|||
ADCXQ AX, BX |
|||
ADOXQ AX, BX |
|||
|
|||
// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9) |
|||
REDUCE(R14,R15,CX,BX,SI,DI,R8,R9) |
|||
|
|||
MOVQ res+0(FP), AX |
|||
MOVQ R14, 0(AX) |
|||
MOVQ R15, 8(AX) |
|||
MOVQ CX, 16(AX) |
|||
MOVQ BX, 24(AX) |
|||
RET |
|||
|
|||
l2: |
|||
MOVQ res+0(FP), AX |
|||
MOVQ AX, (SP) |
|||
CALL ·_fromMontGeneric(SB) |
|||
RET |
@ -0,0 +1,50 @@ |
|||
// Copyright 2020 ConsenSys Software Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|||
|
|||
package ff |
|||
|
|||
//go:noescape
|
|||
func MulBy3(x *Element) |
|||
|
|||
//go:noescape
|
|||
func MulBy5(x *Element) |
|||
|
|||
//go:noescape
|
|||
func MulBy13(x *Element) |
|||
|
|||
//go:noescape
|
|||
func add(res, x, y *Element) |
|||
|
|||
//go:noescape
|
|||
func sub(res, x, y *Element) |
|||
|
|||
//go:noescape
|
|||
func neg(res, x *Element) |
|||
|
|||
//go:noescape
|
|||
func double(res, x *Element) |
|||
|
|||
//go:noescape
|
|||
func mul(res, x, y *Element) |
|||
|
|||
//go:noescape
|
|||
func fromMont(res *Element) |
|||
|
|||
//go:noescape
|
|||
func reduce(res *Element) |
|||
|
|||
//go:noescape
|
|||
func Butterfly(a, b *Element) |
@ -0,0 +1,340 @@ |
|||
// Copyright 2020 ConsenSys Software Inc. |
|||
// |
|||
// Licensed under the Apache License, Version 2.0 (the "License"); |
|||
// you may not use this file except in compliance with the License. |
|||
// You may obtain a copy of the License at |
|||
// |
|||
// http://www.apache.org/licenses/LICENSE-2.0 |
|||
// |
|||
// Unless required by applicable law or agreed to in writing, software |
|||
// distributed under the License is distributed on an "AS IS" BASIS, |
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|||
// See the License for the specific language governing permissions and |
|||
// limitations under the License. |
|||
|
|||
#include "textflag.h" |
|||
#include "funcdata.h" |
|||
|
|||
// modulus q |
|||
DATA q<>+0(SB)/8, $0x43e1f593f0000001 |
|||
DATA q<>+8(SB)/8, $0x2833e84879b97091 |
|||
DATA q<>+16(SB)/8, $0xb85045b68181585d |
|||
DATA q<>+24(SB)/8, $0x30644e72e131a029 |
|||
GLOBL q<>(SB), (RODATA+NOPTR), $32 |
|||
|
|||
// qInv0 q'[0] |
|||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff |
|||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8 |
|||
|
|||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \ |
|||
MOVQ ra0, rb0; \ |
|||
SUBQ q<>(SB), ra0; \ |
|||
MOVQ ra1, rb1; \ |
|||
SBBQ q<>+8(SB), ra1; \ |
|||
MOVQ ra2, rb2; \ |
|||
SBBQ q<>+16(SB), ra2; \ |
|||
MOVQ ra3, rb3; \ |
|||
SBBQ q<>+24(SB), ra3; \ |
|||
CMOVQCS rb0, ra0; \ |
|||
CMOVQCS rb1, ra1; \ |
|||
CMOVQCS rb2, ra2; \ |
|||
CMOVQCS rb3, ra3; \ |
|||
|
|||
// add(res, x, y *Element) |
|||
TEXT ·add(SB), NOSPLIT, $0-24 |
|||
MOVQ x+8(FP), AX |
|||
MOVQ 0(AX), CX |
|||
MOVQ 8(AX), BX |
|||
MOVQ 16(AX), SI |
|||
MOVQ 24(AX), DI |
|||
MOVQ y+16(FP), DX |
|||
ADDQ 0(DX), CX |
|||
ADCQ 8(DX), BX |
|||
ADCQ 16(DX), SI |
|||
ADCQ 24(DX), DI |
|||
|
|||
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11) |
|||
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11) |
|||
|
|||
MOVQ res+0(FP), R12 |
|||
MOVQ CX, 0(R12) |
|||
MOVQ BX, 8(R12) |
|||
MOVQ SI, 16(R12) |
|||
MOVQ DI, 24(R12) |
|||
RET |
|||
|
|||
// sub(res, x, y *Element) |
|||
TEXT ·sub(SB), NOSPLIT, $0-24 |
|||
XORQ DI, DI |
|||
MOVQ x+8(FP), SI |
|||
MOVQ 0(SI), AX |
|||
MOVQ 8(SI), DX |
|||
MOVQ 16(SI), CX |
|||
MOVQ 24(SI), BX |
|||
MOVQ y+16(FP), SI |
|||
SUBQ 0(SI), AX |
|||
SBBQ 8(SI), DX |
|||
SBBQ 16(SI), CX |
|||
SBBQ 24(SI), BX |
|||
MOVQ $0x43e1f593f0000001, R8 |
|||
MOVQ $0x2833e84879b97091, R9 |
|||
MOVQ $0xb85045b68181585d, R10 |
|||
MOVQ $0x30644e72e131a029, R11 |
|||
CMOVQCC DI, R8 |
|||
CMOVQCC DI, R9 |
|||
CMOVQCC DI, R10 |
|||
CMOVQCC DI, R11 |
|||
ADDQ R8, AX |
|||
ADCQ R9, DX |
|||
ADCQ R10, CX |
|||
ADCQ R11, BX |
|||
MOVQ res+0(FP), R12 |
|||
MOVQ AX, 0(R12) |
|||
MOVQ DX, 8(R12) |
|||
MOVQ CX, 16(R12) |
|||
MOVQ BX, 24(R12) |
|||
RET |
|||
|
|||
// double(res, x *Element) |
|||
TEXT ·double(SB), NOSPLIT, $0-16 |
|||
MOVQ x+8(FP), AX |
|||
MOVQ 0(AX), DX |
|||
MOVQ 8(AX), CX |
|||
MOVQ 16(AX), BX |
|||
MOVQ 24(AX), SI |
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
MOVQ res+0(FP), R11 |
|||
MOVQ DX, 0(R11) |
|||
MOVQ CX, 8(R11) |
|||
MOVQ BX, 16(R11) |
|||
MOVQ SI, 24(R11) |
|||
RET |
|||
|
|||
// neg(res, x *Element) |
|||
TEXT ·neg(SB), NOSPLIT, $0-16 |
|||
MOVQ res+0(FP), DI |
|||
MOVQ x+8(FP), AX |
|||
MOVQ 0(AX), DX |
|||
MOVQ 8(AX), CX |
|||
MOVQ 16(AX), BX |
|||
MOVQ 24(AX), SI |
|||
MOVQ DX, AX |
|||
ORQ CX, AX |
|||
ORQ BX, AX |
|||
ORQ SI, AX |
|||
TESTQ AX, AX |
|||
JEQ l1 |
|||
MOVQ $0x43e1f593f0000001, R8 |
|||
SUBQ DX, R8 |
|||
MOVQ R8, 0(DI) |
|||
MOVQ $0x2833e84879b97091, R8 |
|||
SBBQ CX, R8 |
|||
MOVQ R8, 8(DI) |
|||
MOVQ $0xb85045b68181585d, R8 |
|||
SBBQ BX, R8 |
|||
MOVQ R8, 16(DI) |
|||
MOVQ $0x30644e72e131a029, R8 |
|||
SBBQ SI, R8 |
|||
MOVQ R8, 24(DI) |
|||
RET |
|||
|
|||
l1: |
|||
MOVQ AX, 0(DI) |
|||
MOVQ AX, 8(DI) |
|||
MOVQ AX, 16(DI) |
|||
MOVQ AX, 24(DI) |
|||
RET |
|||
|
|||
TEXT ·reduce(SB), NOSPLIT, $0-8 |
|||
MOVQ res+0(FP), AX |
|||
MOVQ 0(AX), DX |
|||
MOVQ 8(AX), CX |
|||
MOVQ 16(AX), BX |
|||
MOVQ 24(AX), SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
MOVQ DX, 0(AX) |
|||
MOVQ CX, 8(AX) |
|||
MOVQ BX, 16(AX) |
|||
MOVQ SI, 24(AX) |
|||
RET |
|||
|
|||
// MulBy3(x *Element) |
|||
TEXT ·MulBy3(SB), NOSPLIT, $0-8 |
|||
MOVQ x+0(FP), AX |
|||
MOVQ 0(AX), DX |
|||
MOVQ 8(AX), CX |
|||
MOVQ 16(AX), BX |
|||
MOVQ 24(AX), SI |
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
ADDQ 0(AX), DX |
|||
ADCQ 8(AX), CX |
|||
ADCQ 16(AX), BX |
|||
ADCQ 24(AX), SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) |
|||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14) |
|||
|
|||
MOVQ DX, 0(AX) |
|||
MOVQ CX, 8(AX) |
|||
MOVQ BX, 16(AX) |
|||
MOVQ SI, 24(AX) |
|||
RET |
|||
|
|||
// MulBy5(x *Element) |
|||
TEXT ·MulBy5(SB), NOSPLIT, $0-8 |
|||
MOVQ x+0(FP), AX |
|||
MOVQ 0(AX), DX |
|||
MOVQ 8(AX), CX |
|||
MOVQ 16(AX), BX |
|||
MOVQ 24(AX), SI |
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) |
|||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14) |
|||
|
|||
ADDQ 0(AX), DX |
|||
ADCQ 8(AX), CX |
|||
ADCQ 16(AX), BX |
|||
ADCQ 24(AX), SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9) |
|||
REDUCE(DX,CX,BX,SI,R15,DI,R8,R9) |
|||
|
|||
MOVQ DX, 0(AX) |
|||
MOVQ CX, 8(AX) |
|||
MOVQ BX, 16(AX) |
|||
MOVQ SI, 24(AX) |
|||
RET |
|||
|
|||
// MulBy13(x *Element) |
|||
TEXT ·MulBy13(SB), NOSPLIT, $0-8 |
|||
MOVQ x+0(FP), AX |
|||
MOVQ 0(AX), DX |
|||
MOVQ 8(AX), CX |
|||
MOVQ 16(AX), BX |
|||
MOVQ 24(AX), SI |
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14) |
|||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14) |
|||
|
|||
MOVQ DX, R11 |
|||
MOVQ CX, R12 |
|||
MOVQ BX, R13 |
|||
MOVQ SI, R14 |
|||
ADDQ DX, DX |
|||
ADCQ CX, CX |
|||
ADCQ BX, BX |
|||
ADCQ SI, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
ADDQ R11, DX |
|||
ADCQ R12, CX |
|||
ADCQ R13, BX |
|||
ADCQ R14, SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
ADDQ 0(AX), DX |
|||
ADCQ 8(AX), CX |
|||
ADCQ 16(AX), BX |
|||
ADCQ 24(AX), SI |
|||
|
|||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10) |
|||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10) |
|||
|
|||
MOVQ DX, 0(AX) |
|||
MOVQ CX, 8(AX) |
|||
MOVQ BX, 16(AX) |
|||
MOVQ SI, 24(AX) |
|||
RET |
|||
|
|||
// Butterfly(a, b *Element) sets a = a + b; b = a - b |
|||
TEXT ·Butterfly(SB), NOSPLIT, $0-16 |
|||
MOVQ a+0(FP), AX |
|||
MOVQ 0(AX), CX |
|||
MOVQ 8(AX), BX |
|||
MOVQ 16(AX), SI |
|||
MOVQ 24(AX), DI |
|||
MOVQ CX, R8 |
|||
MOVQ BX, R9 |
|||
MOVQ SI, R10 |
|||
MOVQ DI, R11 |
|||
XORQ AX, AX |
|||
MOVQ b+8(FP), DX |
|||
ADDQ 0(DX), CX |
|||
ADCQ 8(DX), BX |
|||
ADCQ 16(DX), SI |
|||
ADCQ 24(DX), DI |
|||
SUBQ 0(DX), R8 |
|||
SBBQ 8(DX), R9 |
|||
SBBQ 16(DX), R10 |
|||
SBBQ 24(DX), R11 |
|||
MOVQ $0x43e1f593f0000001, R12 |
|||
MOVQ $0x2833e84879b97091, R13 |
|||
MOVQ $0xb85045b68181585d, R14 |
|||
MOVQ $0x30644e72e131a029, R15 |
|||
CMOVQCC AX, R12 |
|||
CMOVQCC AX, R13 |
|||
CMOVQCC AX, R14 |
|||
CMOVQCC AX, R15 |
|||
ADDQ R12, R8 |
|||
ADCQ R13, R9 |
|||
ADCQ R14, R10 |
|||
ADCQ R15, R11 |
|||
MOVQ R8, 0(DX) |
|||
MOVQ R9, 8(DX) |
|||
MOVQ R10, 16(DX) |
|||
MOVQ R11, 24(DX) |
|||
|
|||
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11) |
|||
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11) |
|||
|
|||
MOVQ a+0(FP), AX |
|||
MOVQ CX, 0(AX) |
|||
MOVQ BX, 8(AX) |
|||
MOVQ SI, 16(AX) |
|||
MOVQ DI, 24(AX) |
|||
RET |
@ -0,0 +1,78 @@ |
|||
//go:build !amd64
|
|||
// +build !amd64
|
|||
|
|||
// Copyright 2020 ConsenSys Software Inc.
|
|||
//
|
|||
// Licensed under the Apache License, Version 2.0 (the "License");
|
|||
// you may not use this file except in compliance with the License.
|
|||
// You may obtain a copy of the License at
|
|||
//
|
|||
// http://www.apache.org/licenses/LICENSE-2.0
|
|||
//
|
|||
// Unless required by applicable law or agreed to in writing, software
|
|||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|||
// See the License for the specific language governing permissions and
|
|||
// limitations under the License.
|
|||
|
|||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
|||
|
|||
package ff |
|||
|
|||
// /!\ WARNING /!\
|
|||
// this code has not been audited and is provided as-is. In particular,
|
|||
// there is no security guarantees such as constant time implementation
|
|||
// or side-channel attack resistance
|
|||
// /!\ WARNING /!\
|
|||
|
|||
// MulBy3 x *= 3
|
|||
func MulBy3(x *Element) { |
|||
mulByConstant(x, 3) |
|||
} |
|||
|
|||
// MulBy5 x *= 5
|
|||
func MulBy5(x *Element) { |
|||
mulByConstant(x, 5) |
|||
} |
|||
|
|||
// MulBy13 x *= 13
|
|||
func MulBy13(x *Element) { |
|||
mulByConstant(x, 13) |
|||
} |
|||
|
|||
// Butterfly sets
|
|||
// a = a + b
|
|||
// b = a - b
|
|||
func Butterfly(a, b *Element) { |
|||
_butterflyGeneric(a, b) |
|||
} |
|||
|
|||
func mul(z, x, y *Element) { |
|||
_mulGeneric(z, x, y) |
|||
} |
|||
|
|||
// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
|
|||
// sets and returns z = z * 1
|
|||
func fromMont(z *Element) { |
|||
_fromMontGeneric(z) |
|||
} |
|||
|
|||
func add(z, x, y *Element) { |
|||
_addGeneric(z, x, y) |
|||
} |
|||
|
|||
func double(z, x *Element) { |
|||
_doubleGeneric(z, x) |
|||
} |
|||
|
|||
func sub(z, x, y *Element) { |
|||
_subGeneric(z, x, y) |
|||
} |
|||
|
|||
func neg(z, x *Element) { |
|||
_negGeneric(z, x) |
|||
} |
|||
|
|||
func reduce(z *Element) { |
|||
_reduceGeneric(z) |
|||
} |
@ -1,6 +0,0 @@ |
|||
package ff |
|||
|
|||
// NewElement returns a new empty *Element
|
|||
func NewElement() *Element { |
|||
return &Element{} |
|||
} |