mirror of
https://github.com/arnaucube/go-iden3-crypto.git
synced 2026-02-07 03:26:39 +01:00
Faster ff arithmetics (regenerated code with the newest goff) (#43)
This commit is contained in:
committed by
GitHub
parent
f597e20569
commit
69354ae29c
@@ -95,20 +95,20 @@ func (p *PointProjective) Add(q *PointProjective, o *PointProjective) *PointProj
|
||||
c := ff.NewElement().Mul(q.X, o.X)
|
||||
d := ff.NewElement().Mul(q.Y, o.Y)
|
||||
e := ff.NewElement().Mul(Dff, c)
|
||||
e.MulAssign(d)
|
||||
e.Mul(e, d)
|
||||
f := ff.NewElement().Sub(b, e)
|
||||
g := ff.NewElement().Add(b, e)
|
||||
x1y1 := ff.NewElement().Add(q.X, q.Y)
|
||||
x2y2 := ff.NewElement().Add(o.X, o.Y)
|
||||
x3 := ff.NewElement().Mul(x1y1, x2y2)
|
||||
x3.SubAssign(c)
|
||||
x3.SubAssign(d)
|
||||
x3.MulAssign(a)
|
||||
x3.MulAssign(f)
|
||||
x3.Sub(x3, c)
|
||||
x3.Sub(x3, d)
|
||||
x3.Mul(x3, a)
|
||||
x3.Mul(x3, f)
|
||||
ac := ff.NewElement().Mul(Aff, c)
|
||||
y3 := ff.NewElement().Sub(d, ac)
|
||||
y3.MulAssign(a)
|
||||
y3.MulAssign(g)
|
||||
y3.Mul(y3, a)
|
||||
y3.Mul(y3, g)
|
||||
z3 := ff.NewElement().Mul(f, g)
|
||||
|
||||
p.X = x3
|
||||
|
||||
66
ff/arith.go
66
ff/arith.go
@@ -1,4 +1,4 @@
|
||||
// Copyright 2020 ConsenSys AG
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
@@ -12,7 +12,7 @@
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by goff DO NOT EDIT
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
package ff
|
||||
|
||||
@@ -20,15 +20,6 @@ import (
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
func madd(a, b, t, u, v uint64) (uint64, uint64, uint64) {
|
||||
var carry uint64
|
||||
hi, lo := bits.Mul64(a, b)
|
||||
v, carry = bits.Add64(lo, v, 0)
|
||||
u, carry = bits.Add64(hi, u, carry)
|
||||
t, _ = bits.Add64(t, 0, carry)
|
||||
return t, u, v
|
||||
}
|
||||
|
||||
// madd0 hi = a*b + c (discards lo bits)
|
||||
func madd0(a, b, c uint64) (hi uint64) {
|
||||
var carry, lo uint64
|
||||
@@ -58,59 +49,6 @@ func madd2(a, b, c, d uint64) (hi uint64, lo uint64) {
|
||||
return
|
||||
}
|
||||
|
||||
// madd2s superhi, hi, lo = 2*a*b + c + d + e
|
||||
func madd2s(a, b, c, d, e uint64) (superhi, hi, lo uint64) {
|
||||
var carry, sum uint64
|
||||
|
||||
hi, lo = bits.Mul64(a, b)
|
||||
lo, carry = bits.Add64(lo, lo, 0)
|
||||
hi, superhi = bits.Add64(hi, hi, carry)
|
||||
|
||||
sum, carry = bits.Add64(c, e, 0)
|
||||
hi, _ = bits.Add64(hi, 0, carry)
|
||||
lo, carry = bits.Add64(lo, sum, 0)
|
||||
hi, _ = bits.Add64(hi, 0, carry)
|
||||
hi, _ = bits.Add64(hi, 0, d)
|
||||
return
|
||||
}
|
||||
|
||||
func madd1s(a, b, d, e uint64) (superhi, hi, lo uint64) {
|
||||
var carry uint64
|
||||
|
||||
hi, lo = bits.Mul64(a, b)
|
||||
lo, carry = bits.Add64(lo, lo, 0)
|
||||
hi, superhi = bits.Add64(hi, hi, carry)
|
||||
lo, carry = bits.Add64(lo, e, 0)
|
||||
hi, _ = bits.Add64(hi, 0, carry)
|
||||
hi, _ = bits.Add64(hi, 0, d)
|
||||
return
|
||||
}
|
||||
|
||||
func madd2sb(a, b, c, e uint64) (superhi, hi, lo uint64) {
|
||||
var carry, sum uint64
|
||||
|
||||
hi, lo = bits.Mul64(a, b)
|
||||
lo, carry = bits.Add64(lo, lo, 0)
|
||||
hi, superhi = bits.Add64(hi, hi, carry)
|
||||
|
||||
sum, carry = bits.Add64(c, e, 0)
|
||||
hi, _ = bits.Add64(hi, 0, carry)
|
||||
lo, carry = bits.Add64(lo, sum, 0)
|
||||
hi, _ = bits.Add64(hi, 0, carry)
|
||||
return
|
||||
}
|
||||
|
||||
func madd1sb(a, b, e uint64) (superhi, hi, lo uint64) {
|
||||
var carry uint64
|
||||
|
||||
hi, lo = bits.Mul64(a, b)
|
||||
lo, carry = bits.Add64(lo, lo, 0)
|
||||
hi, superhi = bits.Add64(hi, hi, carry)
|
||||
lo, carry = bits.Add64(lo, e, 0)
|
||||
hi, _ = bits.Add64(hi, 0, carry)
|
||||
return
|
||||
}
|
||||
|
||||
func madd3(a, b, c, d, e uint64) (hi uint64, lo uint64) {
|
||||
var carry uint64
|
||||
hi, lo = bits.Mul64(a, b)
|
||||
|
||||
24
ff/asm.go
Normal file
24
ff/asm.go
Normal file
@@ -0,0 +1,24 @@
|
||||
//go:build !noadx
|
||||
// +build !noadx
|
||||
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
package ff
|
||||
|
||||
import "golang.org/x/sys/cpu"
|
||||
|
||||
var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
|
||||
25
ff/asm_noadx.go
Normal file
25
ff/asm_noadx.go
Normal file
@@ -0,0 +1,25 @@
|
||||
//go:build noadx
|
||||
// +build noadx
|
||||
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
package ff
|
||||
|
||||
// note: this is needed for test purposes, as dynamically changing supportAdx doesn't flag
|
||||
// certain errors (like fatal error: missing stackmap)
|
||||
// this ensures we test all asm path.
|
||||
var supportAdx = false
|
||||
43
ff/doc.go
Normal file
43
ff/doc.go
Normal file
@@ -0,0 +1,43 @@
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
// Package ff contains field arithmetic operations for modulus = 0x30644e...000001.
|
||||
//
|
||||
// The API is similar to math/big (big.Int), but the operations are significantly faster (up to 20x for the modular multiplication on amd64, see also https://hackmd.io/@zkteam/modular_multiplication)
|
||||
//
|
||||
// The modulus is hardcoded in all the operations.
|
||||
//
|
||||
// Field elements are represented as an array, and assumed to be in Montgomery form in all methods:
|
||||
// type Element [4]uint64
|
||||
//
|
||||
// Example API signature
|
||||
// // Mul z = x * y mod q
|
||||
// func (z *Element) Mul(x, y *Element) *Element
|
||||
//
|
||||
// and can be used like so:
|
||||
// var a, b Element
|
||||
// a.SetUint64(2)
|
||||
// b.SetString("984896738")
|
||||
// a.Mul(a, b)
|
||||
// a.Sub(a, a)
|
||||
// .Add(a, b)
|
||||
// .Inv(a)
|
||||
// b.Exp(b, new(big.Int).SetUint64(42))
|
||||
//
|
||||
// Modulus
|
||||
// 0x30644e72e131a029b85045b68181585d2833e84879b9709143e1f593f0000001 // base 16
|
||||
// 21888242871839275222246405745257275088548364400416034343698204186575808495617 // base 10
|
||||
package ff
|
||||
1381
ff/element.go
1381
ff/element.go
File diff suppressed because it is too large
Load Diff
136
ff/element_fuzz.go
Normal file
136
ff/element_fuzz.go
Normal file
@@ -0,0 +1,136 @@
|
||||
//go:build gofuzz
|
||||
// +build gofuzz
|
||||
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
package ff
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"io"
|
||||
"math/big"
|
||||
"math/bits"
|
||||
)
|
||||
|
||||
const (
|
||||
fuzzInteresting = 1
|
||||
fuzzNormal = 0
|
||||
fuzzDiscard = -1
|
||||
)
|
||||
|
||||
// Fuzz arithmetic operations fuzzer
|
||||
func Fuzz(data []byte) int {
|
||||
r := bytes.NewReader(data)
|
||||
|
||||
var e1, e2 Element
|
||||
e1.SetRawBytes(r)
|
||||
e2.SetRawBytes(r)
|
||||
|
||||
{
|
||||
// mul assembly
|
||||
|
||||
var c, _c Element
|
||||
a, _a, b, _b := e1, e1, e2, e2
|
||||
c.Mul(&a, &b)
|
||||
_mulGeneric(&_c, &_a, &_b)
|
||||
|
||||
if !c.Equal(&_c) {
|
||||
panic("mul asm != mul generic on Element")
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// inverse
|
||||
inv := e1
|
||||
inv.Inverse(&inv)
|
||||
|
||||
var bInv, b1, b2 big.Int
|
||||
e1.ToBigIntRegular(&b1)
|
||||
bInv.ModInverse(&b1, Modulus())
|
||||
inv.ToBigIntRegular(&b2)
|
||||
|
||||
if b2.Cmp(&bInv) != 0 {
|
||||
panic("inverse operation doesn't match big int result")
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// a + -a == 0
|
||||
a, b := e1, e1
|
||||
b.Neg(&b)
|
||||
a.Add(&a, &b)
|
||||
if !a.IsZero() {
|
||||
panic("a + -a != 0")
|
||||
}
|
||||
}
|
||||
|
||||
return fuzzNormal
|
||||
|
||||
}
|
||||
|
||||
// SetRawBytes reads up to Bytes (bytes needed to represent Element) from reader
|
||||
// and interpret it as big endian uint64
|
||||
// used for fuzzing purposes only
|
||||
func (z *Element) SetRawBytes(r io.Reader) {
|
||||
|
||||
buf := make([]byte, 8)
|
||||
|
||||
for i := 0; i < len(z); i++ {
|
||||
if _, err := io.ReadFull(r, buf); err != nil {
|
||||
goto eof
|
||||
}
|
||||
z[i] = binary.BigEndian.Uint64(buf[:])
|
||||
}
|
||||
eof:
|
||||
z[3] %= qElement[3]
|
||||
|
||||
if z.BiggerModulus() {
|
||||
var b uint64
|
||||
z[0], b = bits.Sub64(z[0], qElement[0], 0)
|
||||
z[1], b = bits.Sub64(z[1], qElement[1], b)
|
||||
z[2], b = bits.Sub64(z[2], qElement[2], b)
|
||||
z[3], b = bits.Sub64(z[3], qElement[3], b)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (z *Element) BiggerModulus() bool {
|
||||
if z[3] > qElement[3] {
|
||||
return true
|
||||
}
|
||||
if z[3] < qElement[3] {
|
||||
return false
|
||||
}
|
||||
|
||||
if z[2] > qElement[2] {
|
||||
return true
|
||||
}
|
||||
if z[2] < qElement[2] {
|
||||
return false
|
||||
}
|
||||
|
||||
if z[1] > qElement[1] {
|
||||
return true
|
||||
}
|
||||
if z[1] < qElement[1] {
|
||||
return false
|
||||
}
|
||||
|
||||
return z[0] >= qElement[0]
|
||||
}
|
||||
466
ff/element_mul_adx_amd64.s
Normal file
466
ff/element_mul_adx_amd64.s
Normal file
@@ -0,0 +1,466 @@
|
||||
// +build amd64_adx
|
||||
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
|
||||
// modulus q
|
||||
DATA q<>+0(SB)/8, $0x43e1f593f0000001
|
||||
DATA q<>+8(SB)/8, $0x2833e84879b97091
|
||||
DATA q<>+16(SB)/8, $0xb85045b68181585d
|
||||
DATA q<>+24(SB)/8, $0x30644e72e131a029
|
||||
GLOBL q<>(SB), (RODATA+NOPTR), $32
|
||||
|
||||
// qInv0 q'[0]
|
||||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff
|
||||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
|
||||
|
||||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
|
||||
MOVQ ra0, rb0; \
|
||||
SUBQ q<>(SB), ra0; \
|
||||
MOVQ ra1, rb1; \
|
||||
SBBQ q<>+8(SB), ra1; \
|
||||
MOVQ ra2, rb2; \
|
||||
SBBQ q<>+16(SB), ra2; \
|
||||
MOVQ ra3, rb3; \
|
||||
SBBQ q<>+24(SB), ra3; \
|
||||
CMOVQCS rb0, ra0; \
|
||||
CMOVQCS rb1, ra1; \
|
||||
CMOVQCS rb2, ra2; \
|
||||
CMOVQCS rb3, ra3; \
|
||||
|
||||
// mul(res, x, y *Element)
|
||||
TEXT ·mul(SB), NOSPLIT, $0-24
|
||||
|
||||
// the algorithm is described here
|
||||
// https://hackmd.io/@zkteam/modular_multiplication
|
||||
// however, to benefit from the ADCX and ADOX carry chains
|
||||
// we split the inner loops in 2:
|
||||
// for i=0 to N-1
|
||||
// for j=0 to N-1
|
||||
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
||||
// m := t[0]*q'[0] mod W
|
||||
// C,_ := t[0] + m*q[0]
|
||||
// for j=1 to N-1
|
||||
// (C,t[j-1]) := t[j] + m*q[j] + C
|
||||
// t[N-1] = C + A
|
||||
|
||||
MOVQ x+8(FP), SI
|
||||
|
||||
// x[0] -> DI
|
||||
// x[1] -> R8
|
||||
// x[2] -> R9
|
||||
// x[3] -> R10
|
||||
MOVQ 0(SI), DI
|
||||
MOVQ 8(SI), R8
|
||||
MOVQ 16(SI), R9
|
||||
MOVQ 24(SI), R10
|
||||
MOVQ y+16(FP), R11
|
||||
|
||||
// A -> BP
|
||||
// t[0] -> R14
|
||||
// t[1] -> R15
|
||||
// t[2] -> CX
|
||||
// t[3] -> BX
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 0(R11), DX
|
||||
|
||||
// (A,t[0]) := x[0]*y[0] + A
|
||||
MULXQ DI, R14, R15
|
||||
|
||||
// (A,t[1]) := x[1]*y[0] + A
|
||||
MULXQ R8, AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := x[2]*y[0] + A
|
||||
MULXQ R9, AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := x[3]*y[0] + A
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 8(R11), DX
|
||||
|
||||
// (A,t[0]) := t[0] + x[0]*y[1] + A
|
||||
MULXQ DI, AX, BP
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (A,t[1]) := t[1] + x[1]*y[1] + A
|
||||
ADCXQ BP, R15
|
||||
MULXQ R8, AX, BP
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := t[2] + x[2]*y[1] + A
|
||||
ADCXQ BP, CX
|
||||
MULXQ R9, AX, BP
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := t[3] + x[3]*y[1] + A
|
||||
ADCXQ BP, BX
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BP
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 16(R11), DX
|
||||
|
||||
// (A,t[0]) := t[0] + x[0]*y[2] + A
|
||||
MULXQ DI, AX, BP
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (A,t[1]) := t[1] + x[1]*y[2] + A
|
||||
ADCXQ BP, R15
|
||||
MULXQ R8, AX, BP
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := t[2] + x[2]*y[2] + A
|
||||
ADCXQ BP, CX
|
||||
MULXQ R9, AX, BP
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := t[3] + x[3]*y[2] + A
|
||||
ADCXQ BP, BX
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BP
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 24(R11), DX
|
||||
|
||||
// (A,t[0]) := t[0] + x[0]*y[3] + A
|
||||
MULXQ DI, AX, BP
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (A,t[1]) := t[1] + x[1]*y[3] + A
|
||||
ADCXQ BP, R15
|
||||
MULXQ R8, AX, BP
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := t[2] + x[2]*y[3] + A
|
||||
ADCXQ BP, CX
|
||||
MULXQ R9, AX, BP
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := t[3] + x[3]*y[3] + A
|
||||
ADCXQ BP, BX
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BP
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
|
||||
REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
|
||||
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ R14, 0(AX)
|
||||
MOVQ R15, 8(AX)
|
||||
MOVQ CX, 16(AX)
|
||||
MOVQ BX, 24(AX)
|
||||
RET
|
||||
|
||||
TEXT ·fromMont(SB), NOSPLIT, $0-8
|
||||
|
||||
// the algorithm is described here
|
||||
// https://hackmd.io/@zkteam/modular_multiplication
|
||||
// when y = 1 we have:
|
||||
// for i=0 to N-1
|
||||
// t[i] = x[i]
|
||||
// for i=0 to N-1
|
||||
// m := t[0]*q'[0] mod W
|
||||
// C,_ := t[0] + m*q[0]
|
||||
// for j=1 to N-1
|
||||
// (C,t[j-1]) := t[j] + m*q[j] + C
|
||||
// t[N-1] = C
|
||||
MOVQ res+0(FP), DX
|
||||
MOVQ 0(DX), R14
|
||||
MOVQ 8(DX), R15
|
||||
MOVQ 16(DX), CX
|
||||
MOVQ 24(DX), BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
|
||||
// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
|
||||
REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
|
||||
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ R14, 0(AX)
|
||||
MOVQ R15, 8(AX)
|
||||
MOVQ CX, 16(AX)
|
||||
MOVQ BX, 24(AX)
|
||||
RET
|
||||
488
ff/element_mul_amd64.s
Normal file
488
ff/element_mul_amd64.s
Normal file
@@ -0,0 +1,488 @@
|
||||
// +build !amd64_adx
|
||||
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
|
||||
// modulus q
|
||||
DATA q<>+0(SB)/8, $0x43e1f593f0000001
|
||||
DATA q<>+8(SB)/8, $0x2833e84879b97091
|
||||
DATA q<>+16(SB)/8, $0xb85045b68181585d
|
||||
DATA q<>+24(SB)/8, $0x30644e72e131a029
|
||||
GLOBL q<>(SB), (RODATA+NOPTR), $32
|
||||
|
||||
// qInv0 q'[0]
|
||||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff
|
||||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
|
||||
|
||||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
|
||||
MOVQ ra0, rb0; \
|
||||
SUBQ q<>(SB), ra0; \
|
||||
MOVQ ra1, rb1; \
|
||||
SBBQ q<>+8(SB), ra1; \
|
||||
MOVQ ra2, rb2; \
|
||||
SBBQ q<>+16(SB), ra2; \
|
||||
MOVQ ra3, rb3; \
|
||||
SBBQ q<>+24(SB), ra3; \
|
||||
CMOVQCS rb0, ra0; \
|
||||
CMOVQCS rb1, ra1; \
|
||||
CMOVQCS rb2, ra2; \
|
||||
CMOVQCS rb3, ra3; \
|
||||
|
||||
// mul(res, x, y *Element)
|
||||
TEXT ·mul(SB), $24-24
|
||||
|
||||
// the algorithm is described here
|
||||
// https://hackmd.io/@zkteam/modular_multiplication
|
||||
// however, to benefit from the ADCX and ADOX carry chains
|
||||
// we split the inner loops in 2:
|
||||
// for i=0 to N-1
|
||||
// for j=0 to N-1
|
||||
// (A,t[j]) := t[j] + x[j]*y[i] + A
|
||||
// m := t[0]*q'[0] mod W
|
||||
// C,_ := t[0] + m*q[0]
|
||||
// for j=1 to N-1
|
||||
// (C,t[j-1]) := t[j] + m*q[j] + C
|
||||
// t[N-1] = C + A
|
||||
|
||||
NO_LOCAL_POINTERS
|
||||
CMPB ·supportAdx(SB), $1
|
||||
JNE l1
|
||||
MOVQ x+8(FP), SI
|
||||
|
||||
// x[0] -> DI
|
||||
// x[1] -> R8
|
||||
// x[2] -> R9
|
||||
// x[3] -> R10
|
||||
MOVQ 0(SI), DI
|
||||
MOVQ 8(SI), R8
|
||||
MOVQ 16(SI), R9
|
||||
MOVQ 24(SI), R10
|
||||
MOVQ y+16(FP), R11
|
||||
|
||||
// A -> BP
|
||||
// t[0] -> R14
|
||||
// t[1] -> R15
|
||||
// t[2] -> CX
|
||||
// t[3] -> BX
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 0(R11), DX
|
||||
|
||||
// (A,t[0]) := x[0]*y[0] + A
|
||||
MULXQ DI, R14, R15
|
||||
|
||||
// (A,t[1]) := x[1]*y[0] + A
|
||||
MULXQ R8, AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := x[2]*y[0] + A
|
||||
MULXQ R9, AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := x[3]*y[0] + A
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 8(R11), DX
|
||||
|
||||
// (A,t[0]) := t[0] + x[0]*y[1] + A
|
||||
MULXQ DI, AX, BP
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (A,t[1]) := t[1] + x[1]*y[1] + A
|
||||
ADCXQ BP, R15
|
||||
MULXQ R8, AX, BP
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := t[2] + x[2]*y[1] + A
|
||||
ADCXQ BP, CX
|
||||
MULXQ R9, AX, BP
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := t[3] + x[3]*y[1] + A
|
||||
ADCXQ BP, BX
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BP
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 16(R11), DX
|
||||
|
||||
// (A,t[0]) := t[0] + x[0]*y[2] + A
|
||||
MULXQ DI, AX, BP
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (A,t[1]) := t[1] + x[1]*y[2] + A
|
||||
ADCXQ BP, R15
|
||||
MULXQ R8, AX, BP
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := t[2] + x[2]*y[2] + A
|
||||
ADCXQ BP, CX
|
||||
MULXQ R9, AX, BP
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := t[3] + x[3]*y[2] + A
|
||||
ADCXQ BP, BX
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BP
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
MOVQ 24(R11), DX
|
||||
|
||||
// (A,t[0]) := t[0] + x[0]*y[3] + A
|
||||
MULXQ DI, AX, BP
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (A,t[1]) := t[1] + x[1]*y[3] + A
|
||||
ADCXQ BP, R15
|
||||
MULXQ R8, AX, BP
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (A,t[2]) := t[2] + x[2]*y[3] + A
|
||||
ADCXQ BP, CX
|
||||
MULXQ R9, AX, BP
|
||||
ADOXQ AX, CX
|
||||
|
||||
// (A,t[3]) := t[3] + x[3]*y[3] + A
|
||||
ADCXQ BP, BX
|
||||
MULXQ R10, AX, BP
|
||||
ADOXQ AX, BX
|
||||
|
||||
// A += carries from ADCXQ and ADOXQ
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BP
|
||||
ADOXQ AX, BP
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
|
||||
// clear the flags
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, R12
|
||||
ADCXQ R14, AX
|
||||
MOVQ R12, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
|
||||
// t[3] = C + A
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ BP, BX
|
||||
|
||||
// reduce element(R14,R15,CX,BX) using temp registers (R13,SI,R12,R11)
|
||||
REDUCE(R14,R15,CX,BX,R13,SI,R12,R11)
|
||||
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ R14, 0(AX)
|
||||
MOVQ R15, 8(AX)
|
||||
MOVQ CX, 16(AX)
|
||||
MOVQ BX, 24(AX)
|
||||
RET
|
||||
|
||||
l1:
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ AX, (SP)
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ AX, 8(SP)
|
||||
MOVQ y+16(FP), AX
|
||||
MOVQ AX, 16(SP)
|
||||
CALL ·_mulGeneric(SB)
|
||||
RET
|
||||
|
||||
TEXT ·fromMont(SB), $8-8
|
||||
NO_LOCAL_POINTERS
|
||||
|
||||
// the algorithm is described here
|
||||
// https://hackmd.io/@zkteam/modular_multiplication
|
||||
// when y = 1 we have:
|
||||
// for i=0 to N-1
|
||||
// t[i] = x[i]
|
||||
// for i=0 to N-1
|
||||
// m := t[0]*q'[0] mod W
|
||||
// C,_ := t[0] + m*q[0]
|
||||
// for j=1 to N-1
|
||||
// (C,t[j-1]) := t[j] + m*q[j] + C
|
||||
// t[N-1] = C
|
||||
CMPB ·supportAdx(SB), $1
|
||||
JNE l2
|
||||
MOVQ res+0(FP), DX
|
||||
MOVQ 0(DX), R14
|
||||
MOVQ 8(DX), R15
|
||||
MOVQ 16(DX), CX
|
||||
MOVQ 24(DX), BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
XORQ DX, DX
|
||||
|
||||
// m := t[0]*q'[0] mod W
|
||||
MOVQ qInv0<>(SB), DX
|
||||
IMULQ R14, DX
|
||||
XORQ AX, AX
|
||||
|
||||
// C,_ := t[0] + m*q[0]
|
||||
MULXQ q<>+0(SB), AX, BP
|
||||
ADCXQ R14, AX
|
||||
MOVQ BP, R14
|
||||
|
||||
// (C,t[0]) := t[1] + m*q[1] + C
|
||||
ADCXQ R15, R14
|
||||
MULXQ q<>+8(SB), AX, R15
|
||||
ADOXQ AX, R14
|
||||
|
||||
// (C,t[1]) := t[2] + m*q[2] + C
|
||||
ADCXQ CX, R15
|
||||
MULXQ q<>+16(SB), AX, CX
|
||||
ADOXQ AX, R15
|
||||
|
||||
// (C,t[2]) := t[3] + m*q[3] + C
|
||||
ADCXQ BX, CX
|
||||
MULXQ q<>+24(SB), AX, BX
|
||||
ADOXQ AX, CX
|
||||
MOVQ $0, AX
|
||||
ADCXQ AX, BX
|
||||
ADOXQ AX, BX
|
||||
|
||||
// reduce element(R14,R15,CX,BX) using temp registers (SI,DI,R8,R9)
|
||||
REDUCE(R14,R15,CX,BX,SI,DI,R8,R9)
|
||||
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ R14, 0(AX)
|
||||
MOVQ R15, 8(AX)
|
||||
MOVQ CX, 16(AX)
|
||||
MOVQ BX, 24(AX)
|
||||
RET
|
||||
|
||||
l2:
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ AX, (SP)
|
||||
CALL ·_fromMontGeneric(SB)
|
||||
RET
|
||||
50
ff/element_ops_amd64.go
Normal file
50
ff/element_ops_amd64.go
Normal file
@@ -0,0 +1,50 @@
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
package ff
|
||||
|
||||
//go:noescape
|
||||
func MulBy3(x *Element)
|
||||
|
||||
//go:noescape
|
||||
func MulBy5(x *Element)
|
||||
|
||||
//go:noescape
|
||||
func MulBy13(x *Element)
|
||||
|
||||
//go:noescape
|
||||
func add(res, x, y *Element)
|
||||
|
||||
//go:noescape
|
||||
func sub(res, x, y *Element)
|
||||
|
||||
//go:noescape
|
||||
func neg(res, x *Element)
|
||||
|
||||
//go:noescape
|
||||
func double(res, x *Element)
|
||||
|
||||
//go:noescape
|
||||
func mul(res, x, y *Element)
|
||||
|
||||
//go:noescape
|
||||
func fromMont(res *Element)
|
||||
|
||||
//go:noescape
|
||||
func reduce(res *Element)
|
||||
|
||||
//go:noescape
|
||||
func Butterfly(a, b *Element)
|
||||
340
ff/element_ops_amd64.s
Normal file
340
ff/element_ops_amd64.s
Normal file
@@ -0,0 +1,340 @@
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
|
||||
// modulus q
|
||||
DATA q<>+0(SB)/8, $0x43e1f593f0000001
|
||||
DATA q<>+8(SB)/8, $0x2833e84879b97091
|
||||
DATA q<>+16(SB)/8, $0xb85045b68181585d
|
||||
DATA q<>+24(SB)/8, $0x30644e72e131a029
|
||||
GLOBL q<>(SB), (RODATA+NOPTR), $32
|
||||
|
||||
// qInv0 q'[0]
|
||||
DATA qInv0<>(SB)/8, $0xc2e1f593efffffff
|
||||
GLOBL qInv0<>(SB), (RODATA+NOPTR), $8
|
||||
|
||||
#define REDUCE(ra0, ra1, ra2, ra3, rb0, rb1, rb2, rb3) \
|
||||
MOVQ ra0, rb0; \
|
||||
SUBQ q<>(SB), ra0; \
|
||||
MOVQ ra1, rb1; \
|
||||
SBBQ q<>+8(SB), ra1; \
|
||||
MOVQ ra2, rb2; \
|
||||
SBBQ q<>+16(SB), ra2; \
|
||||
MOVQ ra3, rb3; \
|
||||
SBBQ q<>+24(SB), ra3; \
|
||||
CMOVQCS rb0, ra0; \
|
||||
CMOVQCS rb1, ra1; \
|
||||
CMOVQCS rb2, ra2; \
|
||||
CMOVQCS rb3, ra3; \
|
||||
|
||||
// add(res, x, y *Element)
|
||||
TEXT ·add(SB), NOSPLIT, $0-24
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ 0(AX), CX
|
||||
MOVQ 8(AX), BX
|
||||
MOVQ 16(AX), SI
|
||||
MOVQ 24(AX), DI
|
||||
MOVQ y+16(FP), DX
|
||||
ADDQ 0(DX), CX
|
||||
ADCQ 8(DX), BX
|
||||
ADCQ 16(DX), SI
|
||||
ADCQ 24(DX), DI
|
||||
|
||||
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
|
||||
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
|
||||
|
||||
MOVQ res+0(FP), R12
|
||||
MOVQ CX, 0(R12)
|
||||
MOVQ BX, 8(R12)
|
||||
MOVQ SI, 16(R12)
|
||||
MOVQ DI, 24(R12)
|
||||
RET
|
||||
|
||||
// sub(res, x, y *Element)
|
||||
TEXT ·sub(SB), NOSPLIT, $0-24
|
||||
XORQ DI, DI
|
||||
MOVQ x+8(FP), SI
|
||||
MOVQ 0(SI), AX
|
||||
MOVQ 8(SI), DX
|
||||
MOVQ 16(SI), CX
|
||||
MOVQ 24(SI), BX
|
||||
MOVQ y+16(FP), SI
|
||||
SUBQ 0(SI), AX
|
||||
SBBQ 8(SI), DX
|
||||
SBBQ 16(SI), CX
|
||||
SBBQ 24(SI), BX
|
||||
MOVQ $0x43e1f593f0000001, R8
|
||||
MOVQ $0x2833e84879b97091, R9
|
||||
MOVQ $0xb85045b68181585d, R10
|
||||
MOVQ $0x30644e72e131a029, R11
|
||||
CMOVQCC DI, R8
|
||||
CMOVQCC DI, R9
|
||||
CMOVQCC DI, R10
|
||||
CMOVQCC DI, R11
|
||||
ADDQ R8, AX
|
||||
ADCQ R9, DX
|
||||
ADCQ R10, CX
|
||||
ADCQ R11, BX
|
||||
MOVQ res+0(FP), R12
|
||||
MOVQ AX, 0(R12)
|
||||
MOVQ DX, 8(R12)
|
||||
MOVQ CX, 16(R12)
|
||||
MOVQ BX, 24(R12)
|
||||
RET
|
||||
|
||||
// double(res, x *Element)
|
||||
TEXT ·double(SB), NOSPLIT, $0-16
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
MOVQ res+0(FP), R11
|
||||
MOVQ DX, 0(R11)
|
||||
MOVQ CX, 8(R11)
|
||||
MOVQ BX, 16(R11)
|
||||
MOVQ SI, 24(R11)
|
||||
RET
|
||||
|
||||
// neg(res, x *Element)
|
||||
TEXT ·neg(SB), NOSPLIT, $0-16
|
||||
MOVQ res+0(FP), DI
|
||||
MOVQ x+8(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
MOVQ DX, AX
|
||||
ORQ CX, AX
|
||||
ORQ BX, AX
|
||||
ORQ SI, AX
|
||||
TESTQ AX, AX
|
||||
JEQ l1
|
||||
MOVQ $0x43e1f593f0000001, R8
|
||||
SUBQ DX, R8
|
||||
MOVQ R8, 0(DI)
|
||||
MOVQ $0x2833e84879b97091, R8
|
||||
SBBQ CX, R8
|
||||
MOVQ R8, 8(DI)
|
||||
MOVQ $0xb85045b68181585d, R8
|
||||
SBBQ BX, R8
|
||||
MOVQ R8, 16(DI)
|
||||
MOVQ $0x30644e72e131a029, R8
|
||||
SBBQ SI, R8
|
||||
MOVQ R8, 24(DI)
|
||||
RET
|
||||
|
||||
l1:
|
||||
MOVQ AX, 0(DI)
|
||||
MOVQ AX, 8(DI)
|
||||
MOVQ AX, 16(DI)
|
||||
MOVQ AX, 24(DI)
|
||||
RET
|
||||
|
||||
TEXT ·reduce(SB), NOSPLIT, $0-8
|
||||
MOVQ res+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// MulBy3(x *Element)
|
||||
TEXT ·MulBy3(SB), NOSPLIT, $0-8
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ 0(AX), DX
|
||||
ADCQ 8(AX), CX
|
||||
ADCQ 16(AX), BX
|
||||
ADCQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
||||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// MulBy5(x *Element)
|
||||
TEXT ·MulBy5(SB), NOSPLIT, $0-8
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
||||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
||||
|
||||
ADDQ 0(AX), DX
|
||||
ADCQ 8(AX), CX
|
||||
ADCQ 16(AX), BX
|
||||
ADCQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R15,DI,R8,R9)
|
||||
REDUCE(DX,CX,BX,SI,R15,DI,R8,R9)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// MulBy13(x *Element)
|
||||
TEXT ·MulBy13(SB), NOSPLIT, $0-8
|
||||
MOVQ x+0(FP), AX
|
||||
MOVQ 0(AX), DX
|
||||
MOVQ 8(AX), CX
|
||||
MOVQ 16(AX), BX
|
||||
MOVQ 24(AX), SI
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (R11,R12,R13,R14)
|
||||
REDUCE(DX,CX,BX,SI,R11,R12,R13,R14)
|
||||
|
||||
MOVQ DX, R11
|
||||
MOVQ CX, R12
|
||||
MOVQ BX, R13
|
||||
MOVQ SI, R14
|
||||
ADDQ DX, DX
|
||||
ADCQ CX, CX
|
||||
ADCQ BX, BX
|
||||
ADCQ SI, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ R11, DX
|
||||
ADCQ R12, CX
|
||||
ADCQ R13, BX
|
||||
ADCQ R14, SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
ADDQ 0(AX), DX
|
||||
ADCQ 8(AX), CX
|
||||
ADCQ 16(AX), BX
|
||||
ADCQ 24(AX), SI
|
||||
|
||||
// reduce element(DX,CX,BX,SI) using temp registers (DI,R8,R9,R10)
|
||||
REDUCE(DX,CX,BX,SI,DI,R8,R9,R10)
|
||||
|
||||
MOVQ DX, 0(AX)
|
||||
MOVQ CX, 8(AX)
|
||||
MOVQ BX, 16(AX)
|
||||
MOVQ SI, 24(AX)
|
||||
RET
|
||||
|
||||
// Butterfly(a, b *Element) sets a = a + b; b = a - b
|
||||
TEXT ·Butterfly(SB), NOSPLIT, $0-16
|
||||
MOVQ a+0(FP), AX
|
||||
MOVQ 0(AX), CX
|
||||
MOVQ 8(AX), BX
|
||||
MOVQ 16(AX), SI
|
||||
MOVQ 24(AX), DI
|
||||
MOVQ CX, R8
|
||||
MOVQ BX, R9
|
||||
MOVQ SI, R10
|
||||
MOVQ DI, R11
|
||||
XORQ AX, AX
|
||||
MOVQ b+8(FP), DX
|
||||
ADDQ 0(DX), CX
|
||||
ADCQ 8(DX), BX
|
||||
ADCQ 16(DX), SI
|
||||
ADCQ 24(DX), DI
|
||||
SUBQ 0(DX), R8
|
||||
SBBQ 8(DX), R9
|
||||
SBBQ 16(DX), R10
|
||||
SBBQ 24(DX), R11
|
||||
MOVQ $0x43e1f593f0000001, R12
|
||||
MOVQ $0x2833e84879b97091, R13
|
||||
MOVQ $0xb85045b68181585d, R14
|
||||
MOVQ $0x30644e72e131a029, R15
|
||||
CMOVQCC AX, R12
|
||||
CMOVQCC AX, R13
|
||||
CMOVQCC AX, R14
|
||||
CMOVQCC AX, R15
|
||||
ADDQ R12, R8
|
||||
ADCQ R13, R9
|
||||
ADCQ R14, R10
|
||||
ADCQ R15, R11
|
||||
MOVQ R8, 0(DX)
|
||||
MOVQ R9, 8(DX)
|
||||
MOVQ R10, 16(DX)
|
||||
MOVQ R11, 24(DX)
|
||||
|
||||
// reduce element(CX,BX,SI,DI) using temp registers (R8,R9,R10,R11)
|
||||
REDUCE(CX,BX,SI,DI,R8,R9,R10,R11)
|
||||
|
||||
MOVQ a+0(FP), AX
|
||||
MOVQ CX, 0(AX)
|
||||
MOVQ BX, 8(AX)
|
||||
MOVQ SI, 16(AX)
|
||||
MOVQ DI, 24(AX)
|
||||
RET
|
||||
78
ff/element_ops_noasm.go
Normal file
78
ff/element_ops_noasm.go
Normal file
@@ -0,0 +1,78 @@
|
||||
//go:build !amd64
|
||||
// +build !amd64
|
||||
|
||||
// Copyright 2020 ConsenSys Software Inc.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
// Code generated by consensys/gnark-crypto DO NOT EDIT
|
||||
|
||||
package ff
|
||||
|
||||
// /!\ WARNING /!\
|
||||
// this code has not been audited and is provided as-is. In particular,
|
||||
// there is no security guarantees such as constant time implementation
|
||||
// or side-channel attack resistance
|
||||
// /!\ WARNING /!\
|
||||
|
||||
// MulBy3 x *= 3
|
||||
func MulBy3(x *Element) {
|
||||
mulByConstant(x, 3)
|
||||
}
|
||||
|
||||
// MulBy5 x *= 5
|
||||
func MulBy5(x *Element) {
|
||||
mulByConstant(x, 5)
|
||||
}
|
||||
|
||||
// MulBy13 x *= 13
|
||||
func MulBy13(x *Element) {
|
||||
mulByConstant(x, 13)
|
||||
}
|
||||
|
||||
// Butterfly sets
|
||||
// a = a + b
|
||||
// b = a - b
|
||||
func Butterfly(a, b *Element) {
|
||||
_butterflyGeneric(a, b)
|
||||
}
|
||||
|
||||
func mul(z, x, y *Element) {
|
||||
_mulGeneric(z, x, y)
|
||||
}
|
||||
|
||||
// FromMont converts z in place (i.e. mutates) from Montgomery to regular representation
|
||||
// sets and returns z = z * 1
|
||||
func fromMont(z *Element) {
|
||||
_fromMontGeneric(z)
|
||||
}
|
||||
|
||||
func add(z, x, y *Element) {
|
||||
_addGeneric(z, x, y)
|
||||
}
|
||||
|
||||
func double(z, x *Element) {
|
||||
_doubleGeneric(z, x)
|
||||
}
|
||||
|
||||
func sub(z, x, y *Element) {
|
||||
_subGeneric(z, x, y)
|
||||
}
|
||||
|
||||
func neg(z, x *Element) {
|
||||
_negGeneric(z, x)
|
||||
}
|
||||
|
||||
func reduce(z *Element) {
|
||||
_reduceGeneric(z)
|
||||
}
|
||||
1898
ff/element_test.go
1898
ff/element_test.go
File diff suppressed because it is too large
Load Diff
@@ -1,6 +0,0 @@
|
||||
package ff
|
||||
|
||||
// NewElement returns a new empty *Element
|
||||
func NewElement() *Element {
|
||||
return &Element{}
|
||||
}
|
||||
5
go.mod
5
go.mod
@@ -6,4 +6,9 @@ require (
|
||||
github.com/dchest/blake512 v1.0.0
|
||||
github.com/stretchr/testify v1.7.0
|
||||
golang.org/x/crypto v0.0.0-20211117183948-ae814b36b871
|
||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect
|
||||
github.com/davecgh/go-spew v1.1.0 // indirect
|
||||
github.com/leanovate/gopter v0.2.9 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c // indirect
|
||||
)
|
||||
|
||||
4
go.sum
4
go.sum
@@ -2,6 +2,8 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/dchest/blake512 v1.0.0 h1:oDFEQFIqFSeuA34xLtXZ/rWxCXdSjirjzPhey5EUvmA=
|
||||
github.com/dchest/blake512 v1.0.0/go.mod h1:FV1x7xPPLWukZlpDpWQ88rF/SFwZ5qbskrzhLMB92JI=
|
||||
github.com/leanovate/gopter v0.2.9 h1:fQjYxZaynp97ozCzfOyOuAGOU4aU/z37zf/tOujFk7c=
|
||||
github.com/leanovate/gopter v0.2.9/go.mod h1:U2L/78B+KVFIx2VmW6onHJQzXtFb+p5y3y2Sh+Jxxv8=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
@@ -14,6 +16,8 @@ golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7w
|
||||
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1 h1:SrN+KX8Art/Sf4HNj6Zcz06G7VEz+7w9tdXTPOZ7+l4=
|
||||
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e h1:fLOSk5Q00efkSvAm+4xcoXD+RRmLmmulPn5I3Y9F2EM=
|
||||
golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
|
||||
@@ -20,7 +20,7 @@ func zero() *ff.Element {
|
||||
// exp5 performs x^5 mod p
|
||||
// https://eprint.iacr.org/2019/458.pdf page 8
|
||||
func exp5(a *ff.Element) {
|
||||
a.Exp(*a, 5) //nolint:gomnd
|
||||
a.Exp(*a, big.NewInt(5)) //nolint:gomnd
|
||||
}
|
||||
|
||||
// exp5state perform exp5 for whole state
|
||||
|
||||
Reference in New Issue
Block a user