Browse Source

Update to goff v0.2.0

feature/goff-0-2-0
arnaucube 4 years ago
parent
commit
048941e5e0
9 changed files with 1444 additions and 205 deletions
  1. +6
    -1
      ff/arith.go
  2. +161
    -199
      ff/element.go
  3. +170
    -0
      ff/element_mul.go
  4. +39
    -0
      ff/element_mul_amd64.go
  5. +695
    -0
      ff/element_mul_amd64.s
  6. +93
    -0
      ff/element_square.go
  7. +34
    -0
      ff/element_square_amd64.go
  8. +245
    -5
      ff/element_test.go
  9. +1
    -0
      go.mod

+ 6
- 1
ff/arith.go

@ -12,14 +12,19 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// Code generated by goff DO NOT EDIT
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff contains field arithmetic operations
package ff package ff
import ( import (
"math/bits" "math/bits"
"golang.org/x/sys/cpu"
) )
var supportAdx = cpu.X86.HasADX && cpu.X86.HasBMI2
func madd(a, b, t, u, v uint64) (uint64, uint64, uint64) { func madd(a, b, t, u, v uint64) (uint64, uint64, uint64) {
var carry uint64 var carry uint64
hi, lo := bits.Mul64(a, b) hi, lo := bits.Mul64(a, b)

+ 161
- 199
ff/element.go

@ -12,29 +12,33 @@
// See the License for the specific language governing permissions and // See the License for the specific language governing permissions and
// limitations under the License. // limitations under the License.
// field modulus q =
//
// 21888242871839275222246405745257275088548364400416034343698204186575808495617
// Code generated by goff DO NOT EDIT
// goff version: - build:
// Element are assumed to be in Montgomery form in all methods
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff (generated by goff) contains field arithmetics operations
// Package ff contains field arithmetic operations
package ff package ff
// /!\ WARNING /!\
// this code has not been audited and is provided as-is. In particular,
// there is no security guarantees such as constant time implementation
// or side-channel attack resistance
// /!\ WARNING /!\
import ( import (
"crypto/rand" "crypto/rand"
"encoding/binary" "encoding/binary"
"io" "io"
"math/big" "math/big"
"math/bits" "math/bits"
"strconv"
"sync" "sync"
"unsafe" "unsafe"
) )
// Element represents a field element stored on 4 words (uint64) // Element represents a field element stored on 4 words (uint64)
// Element are assumed to be in Montgomery form in all methods // Element are assumed to be in Montgomery form in all methods
// field modulus q =
//
// 21888242871839275222246405745257275088548364400416034343698204186575808495617
type Element [4]uint64 type Element [4]uint64
// ElementLimbs number of 64 bits words needed to represent Element // ElementLimbs number of 64 bits words needed to represent Element
@ -311,6 +315,7 @@ func (z *Element) SetRandom() *Element {
z[3] %= 3486998266802970665 z[3] %= 3486998266802970665
// if z > q --> z -= q // if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64 var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
@ -322,6 +327,38 @@ func (z *Element) SetRandom() *Element {
return z return z
} }
// One returns 1 (in montgommery form)
func One() Element {
var one Element
one.SetOne()
return one
}
// FromInterface converts i1 from uint64, int, string, or Element, big.Int into Element
// panic if provided type is not supported
func FromInterface(i1 interface{}) Element {
var val Element
switch c1 := i1.(type) {
case uint64:
val.SetUint64(c1)
case int:
val.SetString(strconv.Itoa(c1))
case string:
val.SetString(c1)
case big.Int:
val.SetBigInt(&c1)
case Element:
val = c1
case *Element:
val.Set(c1)
default:
panic("invalid type")
}
return val
}
// Add z = x + y mod q // Add z = x + y mod q
func (z *Element) Add(x, y *Element) *Element { func (z *Element) Add(x, y *Element) *Element {
var carry uint64 var carry uint64
@ -332,6 +369,7 @@ func (z *Element) Add(x, y *Element) *Element {
z[3], _ = bits.Add64(x[3], y[3], carry) z[3], _ = bits.Add64(x[3], y[3], carry)
// if z > q --> z -= q // if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64 var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
@ -352,6 +390,7 @@ func (z *Element) AddAssign(x *Element) *Element {
z[3], _ = bits.Add64(z[3], x[3], carry) z[3], _ = bits.Add64(z[3], x[3], carry)
// if z > q --> z -= q // if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64 var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
@ -372,6 +411,7 @@ func (z *Element) Double(x *Element) *Element {
z[3], _ = bits.Add64(x[3], x[3], carry) z[3], _ = bits.Add64(x[3], x[3], carry)
// if z > q --> z -= q // if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64 var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
@ -416,18 +456,31 @@ func (z *Element) SubAssign(x *Element) *Element {
return z return z
} }
// Exp z = x^e mod q
func (z *Element) Exp(x Element, e uint64) *Element {
if e == 0 {
// Exp z = x^exponent mod q
// (not optimized)
// exponent (non-montgomery form) is ordered from least significant word to most significant word
func (z *Element) Exp(x Element, exponent ...uint64) *Element {
r := 0
msb := 0
for i := len(exponent) - 1; i >= 0; i-- {
if exponent[i] == 0 {
r++
} else {
msb = (i * 64) + bits.Len64(exponent[i])
break
}
}
exponent = exponent[:len(exponent)-r]
if len(exponent) == 0 {
return z.SetOne() return z.SetOne()
} }
z.Set(&x) z.Set(&x)
l := bits.Len64(e) - 2
l := msb - 2
for i := l; i >= 0; i-- { for i := l; i >= 0; i-- {
z.Square(z) z.Square(z)
if e&(1<<uint(i)) != 0 {
if exponent[i/64]&(1<<uint(i%64)) != 0 {
z.MulAssign(&x) z.MulAssign(&x)
} }
} }
@ -478,6 +531,7 @@ func (z *Element) FromMont() *Element {
} }
// if z > q --> z -= q // if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) { if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64 var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0) z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
@ -549,6 +603,19 @@ func (z *Element) SetBigInt(v *big.Int) *Element {
zero := big.NewInt(0) zero := big.NewInt(0)
q := elementModulusBigInt() q := elementModulusBigInt()
// fast path
c := v.Cmp(q)
if c == 0 {
return z
} else if c != 1 && v.Cmp(zero) != -1 {
// v should
vBits := v.Bits()
for i := 0; i < len(vBits); i++ {
z[i] = uint64(vBits[i])
}
return z.ToMont()
}
// copy input // copy input
vv := new(big.Int).Set(v) vv := new(big.Int).Set(v)
@ -591,202 +658,97 @@ func (z *Element) SetString(s string) *Element {
return z.SetBigInt(x) return z.SetBigInt(x)
} }
// Mul z = x * y mod q
func (z *Element) Mul(x, y *Element) *Element {
// Legendre returns the Legendre symbol of z (either +1, -1, or 0.)
func (z *Element) Legendre() int {
var l Element
// z^((q-1)/2)
l.Exp(*z,
11669102379873075200,
10671829228508198984,
15863968012492123182,
1743499133401485332,
)
var t [4]uint64
var c [3]uint64
{
// round 0
v := x[0]
c[1], c[0] = bits.Mul64(v, y[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd1(v, y[1], c[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd1(v, y[2], c[1])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd1(v, y[3], c[1])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 1
v := x[1]
c[1], c[0] = madd1(v, y[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, y[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, y[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, y[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 2
v := x[2]
c[1], c[0] = madd1(v, y[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, y[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, y[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, y[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 3
v := x[3]
c[1], c[0] = madd1(v, y[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, y[1], c[1], t[1])
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, y[2], c[1], t[2])
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, y[3], c[1], t[3])
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
if l.IsZero() {
return 0
} }
// if z > q --> z -= q
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
// if l == 1
if (l[3] == 1011752739694698287) && (l[2] == 7381016538464732718) && (l[1] == 3962172157175319849) && (l[0] == 12436184717236109307) {
return 1
} }
return z
return -1
} }
// MulAssign z = z * x mod q
func (z *Element) MulAssign(x *Element) *Element {
var t [4]uint64
var c [3]uint64
{
// round 0
v := z[0]
c[1], c[0] = bits.Mul64(v, x[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd1(v, x[1], c[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd1(v, x[2], c[1])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd1(v, x[3], c[1])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 1
v := z[1]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 2
v := z[2]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
// Sqrt z = √x mod q
// if the square root doesn't exist (x is not a square mod q)
// Sqrt leaves z unchanged and returns nil
func (z *Element) Sqrt(x *Element) *Element {
// q ≡ 1 (mod 4)
// see modSqrtTonelliShanks in math/big/int.go
// using https://www.maa.org/sites/default/files/pdf/upload_library/22/Polya/07468342.di020786.02p0470a.pdf
var y, b, t, w Element
// w = x^((s-1)/2))
w.Exp(*x,
14829091926808964255,
867720185306366531,
688207751544974772,
6495040407,
)
// y = x^((s+1)/2)) = w * x
y.Mul(x, &w)
// b = x^s = w * w * x = y * x
b.Mul(&w, &y)
// g = nonResidue ^ s
var g = Element{
7164790868263648668,
11685701338293206998,
6216421865291908056,
1756667274303109607,
}
r := uint64(28)
// compute legendre symbol
// t = x^((q-1)/2) = r-1 squaring of x^s
t = b
for i := uint64(0); i < r-1; i++ {
t.Square(&t)
}
if t.IsZero() {
return z.SetZero()
} }
{
// round 3
v := z[3]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
if !((t[3] == 1011752739694698287) && (t[2] == 7381016538464732718) && (t[1] == 3962172157175319849) && (t[0] == 12436184717236109307)) {
// t != 1, we don't have a square root
return nil
} }
for {
var m uint64
t = b
// if z > q --> z -= q
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
}
return z
}
// Square z = x * x mod q
func (z *Element) Square(x *Element) *Element {
var p [4]uint64
// for t != 1
for !((t[3] == 1011752739694698287) && (t[2] == 7381016538464732718) && (t[1] == 3962172157175319849) && (t[0] == 12436184717236109307)) {
t.Square(&t)
m++
}
var u, v uint64
{
// round 0
u, p[0] = bits.Mul64(x[0], x[0])
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
var t uint64
t, u, v = madd1sb(x[0], x[1], u)
C, p[0] = madd2(m, 2896914383306846353, v, C)
t, u, v = madd1s(x[0], x[2], t, u)
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd1s(x[0], x[3], t, u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 1
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
u, v = madd1(x[1], x[1], p[1])
C, p[0] = madd2(m, 2896914383306846353, v, C)
var t uint64
t, u, v = madd2sb(x[1], x[2], p[2], u)
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd2s(x[1], x[3], p[3], t, u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 2
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
C, p[0] = madd2(m, 2896914383306846353, p[1], C)
u, v = madd1(x[2], x[2], p[2])
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd2sb(x[2], x[3], p[3], u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 3
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
C, z[0] = madd2(m, 2896914383306846353, p[1], C)
C, z[1] = madd2(m, 13281191951274694749, p[2], C)
u, v = madd1(x[3], x[3], p[3])
z[3], z[2] = madd3(m, 3486998266802970665, v, C, u)
}
if m == 0 {
return z.Set(&y)
}
// t = g^(2^(r-m-1)) mod q
ge := int(r - m - 1)
t = g
for ge > 0 {
t.Square(&t)
ge--
}
// if z > q --> z -= q
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
g.Square(&t)
y.MulAssign(&t)
b.MulAssign(&g)
r = m
} }
return z
} }

+ 170
- 0
ff/element_mul.go

@ -0,0 +1,170 @@
// +build !amd64
// Copyright 2020 ConsenSys AG
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff contains field arithmetic operations
package ff
// /!\ WARNING /!\
// this code has not been audited and is provided as-is. In particular,
// there is no security guarantees such as constant time implementation
// or side-channel attack resistance
// /!\ WARNING /!\
import "math/bits"
// Mul z = x * y mod q
// see https://hackmd.io/@zkteam/modular_multiplication
func (z *Element) Mul(x, y *Element) *Element {
var t [4]uint64
var c [3]uint64
{
// round 0
v := x[0]
c[1], c[0] = bits.Mul64(v, y[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd1(v, y[1], c[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd1(v, y[2], c[1])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd1(v, y[3], c[1])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 1
v := x[1]
c[1], c[0] = madd1(v, y[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, y[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, y[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, y[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 2
v := x[2]
c[1], c[0] = madd1(v, y[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, y[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, y[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, y[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 3
v := x[3]
c[1], c[0] = madd1(v, y[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, y[1], c[1], t[1])
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, y[2], c[1], t[2])
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, y[3], c[1], t[3])
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
// if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
}
return z
}
// MulAssign z = z * x mod q
// see https://hackmd.io/@zkteam/modular_multiplication
func (z *Element) MulAssign(x *Element) *Element {
var t [4]uint64
var c [3]uint64
{
// round 0
v := z[0]
c[1], c[0] = bits.Mul64(v, x[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd1(v, x[1], c[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd1(v, x[2], c[1])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd1(v, x[3], c[1])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 1
v := z[1]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 2
v := z[2]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 3
v := z[3]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
// if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
}
return z
}

+ 39
- 0
ff/element_mul_amd64.go

@ -0,0 +1,39 @@
// Copyright 2020 ConsenSys AG
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff contains field arithmetic operations
package ff
// MulAssignElement z = z * x mod q (constant time)
// calling this instead of z.MulAssign(x) is prefered for performance critical path
//go:noescape
func MulAssignElement(res, y *Element)
// Mul z = x * y mod q (constant time)
// see https://hackmd.io/@zkteam/modular_multiplication
func (z *Element) Mul(x, y *Element) *Element {
res := *x
MulAssignElement(&res, y)
z.Set(&res)
return z
}
// MulAssign z = z * x mod q (constant time)
// see https://hackmd.io/@zkteam/modular_multiplication
func (z *Element) MulAssign(x *Element) *Element {
MulAssignElement(z, x)
return z
}

+ 695
- 0
ff/element_mul_amd64.s

@ -0,0 +1,695 @@
// Code generated by goff (v0.2.0) DO NOT EDIT
#include "textflag.h"
// func MulAssignElement(res,y *Element)
// montgomery multiplication of res by y
// stores the result in res
TEXT ·MulAssignElement(SB), NOSPLIT, $0-16
// dereference our parameters
MOVQ res+0(FP), DI
MOVQ y+8(FP), R8
// check if we support adx and mulx
CMPB ·supportAdx(SB), $1
JNE no_adx
// the algorithm is described here
// https://hackmd.io/@zkteam/modular_multiplication
// however, to benefit from the ADCX and ADOX carry chains
// we split the inner loops in 2:
// for i=0 to N-1
// for j=0 to N-1
// (A,t[j]) := t[j] + a[j]*b[i] + A
// m := t[0]*q'[0] mod W
// C,_ := t[0] + m*q[0]
// for j=1 to N-1
// (C,t[j-1]) := t[j] + m*q[j] + C
// t[N-1] = C + A
// ---------------------------------------------------------------------------------------------
// outter loop 0
// clear up the carry flags
XORQ R9 , R9
// R12 = y[0]
MOVQ 0(R8), R12
// for j=0 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// DX = res[0]
MOVQ 0(DI), DX
MULXQ R12, CX , R9
// DX = res[1]
MOVQ 8(DI), DX
MOVQ R9, BX
MULXQ R12, AX, R9
ADOXQ AX, BX
// DX = res[2]
MOVQ 16(DI), DX
MOVQ R9, BP
MULXQ R12, AX, R9
ADOXQ AX, BP
// DX = res[3]
MOVQ 24(DI), DX
MOVQ R9, SI
MULXQ R12, AX, R9
ADOXQ AX, SI
// add the last carries to R9
MOVQ $0, DX
ADCXQ DX, R9
ADOXQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, DX
MULXQ CX,R11, DX
// clear the carry flags
XORQ DX, DX
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, DX
MULXQ R11, AX, R10
ADCXQ CX ,AX
// for j=1 to N-1
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ $0x2833e84879b97091, DX
MULXQ R11, AX, DX
ADCXQ BX, R10
ADOXQ AX, R10
MOVQ R10, CX
MOVQ DX, R10
MOVQ $0xb85045b68181585d, DX
MULXQ R11, AX, DX
ADCXQ BP, R10
ADOXQ AX, R10
MOVQ R10, BX
MOVQ DX, R10
MOVQ $0x30644e72e131a029, DX
MULXQ R11, AX, DX
ADCXQ SI, R10
ADOXQ AX, R10
MOVQ R10, BP
MOVQ $0, AX
ADCXQ AX, DX
ADOXQ DX, R9
MOVQ R9, SI
// ---------------------------------------------------------------------------------------------
// outter loop 1
// clear up the carry flags
XORQ R9 , R9
// R12 = y[1]
MOVQ 8(R8), R12
// for j=0 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// DX = res[0]
MOVQ 0(DI), DX
MULXQ R12, AX, R9
ADOXQ AX, CX
// DX = res[1]
MOVQ 8(DI), DX
ADCXQ R9, BX
MULXQ R12, AX, R9
ADOXQ AX, BX
// DX = res[2]
MOVQ 16(DI), DX
ADCXQ R9, BP
MULXQ R12, AX, R9
ADOXQ AX, BP
// DX = res[3]
MOVQ 24(DI), DX
ADCXQ R9, SI
MULXQ R12, AX, R9
ADOXQ AX, SI
// add the last carries to R9
MOVQ $0, DX
ADCXQ DX, R9
ADOXQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, DX
MULXQ CX,R11, DX
// clear the carry flags
XORQ DX, DX
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, DX
MULXQ R11, AX, R10
ADCXQ CX ,AX
// for j=1 to N-1
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ $0x2833e84879b97091, DX
MULXQ R11, AX, DX
ADCXQ BX, R10
ADOXQ AX, R10
MOVQ R10, CX
MOVQ DX, R10
MOVQ $0xb85045b68181585d, DX
MULXQ R11, AX, DX
ADCXQ BP, R10
ADOXQ AX, R10
MOVQ R10, BX
MOVQ DX, R10
MOVQ $0x30644e72e131a029, DX
MULXQ R11, AX, DX
ADCXQ SI, R10
ADOXQ AX, R10
MOVQ R10, BP
MOVQ $0, AX
ADCXQ AX, DX
ADOXQ DX, R9
MOVQ R9, SI
// ---------------------------------------------------------------------------------------------
// outter loop 2
// clear up the carry flags
XORQ R9 , R9
// R12 = y[2]
MOVQ 16(R8), R12
// for j=0 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// DX = res[0]
MOVQ 0(DI), DX
MULXQ R12, AX, R9
ADOXQ AX, CX
// DX = res[1]
MOVQ 8(DI), DX
ADCXQ R9, BX
MULXQ R12, AX, R9
ADOXQ AX, BX
// DX = res[2]
MOVQ 16(DI), DX
ADCXQ R9, BP
MULXQ R12, AX, R9
ADOXQ AX, BP
// DX = res[3]
MOVQ 24(DI), DX
ADCXQ R9, SI
MULXQ R12, AX, R9
ADOXQ AX, SI
// add the last carries to R9
MOVQ $0, DX
ADCXQ DX, R9
ADOXQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, DX
MULXQ CX,R11, DX
// clear the carry flags
XORQ DX, DX
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, DX
MULXQ R11, AX, R10
ADCXQ CX ,AX
// for j=1 to N-1
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ $0x2833e84879b97091, DX
MULXQ R11, AX, DX
ADCXQ BX, R10
ADOXQ AX, R10
MOVQ R10, CX
MOVQ DX, R10
MOVQ $0xb85045b68181585d, DX
MULXQ R11, AX, DX
ADCXQ BP, R10
ADOXQ AX, R10
MOVQ R10, BX
MOVQ DX, R10
MOVQ $0x30644e72e131a029, DX
MULXQ R11, AX, DX
ADCXQ SI, R10
ADOXQ AX, R10
MOVQ R10, BP
MOVQ $0, AX
ADCXQ AX, DX
ADOXQ DX, R9
MOVQ R9, SI
// ---------------------------------------------------------------------------------------------
// outter loop 3
// clear up the carry flags
XORQ R9 , R9
// R12 = y[3]
MOVQ 24(R8), R12
// for j=0 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// DX = res[0]
MOVQ 0(DI), DX
MULXQ R12, AX, R9
ADOXQ AX, CX
// DX = res[1]
MOVQ 8(DI), DX
ADCXQ R9, BX
MULXQ R12, AX, R9
ADOXQ AX, BX
// DX = res[2]
MOVQ 16(DI), DX
ADCXQ R9, BP
MULXQ R12, AX, R9
ADOXQ AX, BP
// DX = res[3]
MOVQ 24(DI), DX
ADCXQ R9, SI
MULXQ R12, AX, R9
ADOXQ AX, SI
// add the last carries to R9
MOVQ $0, DX
ADCXQ DX, R9
ADOXQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, DX
MULXQ CX,R11, DX
// clear the carry flags
XORQ DX, DX
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, DX
MULXQ R11, AX, R10
ADCXQ CX ,AX
// for j=1 to N-1
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ $0x2833e84879b97091, DX
MULXQ R11, AX, DX
ADCXQ BX, R10
ADOXQ AX, R10
MOVQ R10, CX
MOVQ DX, R10
MOVQ $0xb85045b68181585d, DX
MULXQ R11, AX, DX
ADCXQ BP, R10
ADOXQ AX, R10
MOVQ R10, BX
MOVQ DX, R10
MOVQ $0x30644e72e131a029, DX
MULXQ R11, AX, DX
ADCXQ SI, R10
ADOXQ AX, R10
MOVQ R10, BP
MOVQ $0, AX
ADCXQ AX, DX
ADOXQ DX, R9
MOVQ R9, SI
reduce:
// reduce, constant time version
// first we copy registers storing t in a separate set of registers
// as SUBQ modifies the 2nd operand
MOVQ CX, DX
MOVQ BX, R8
MOVQ BP, R9
MOVQ SI, R10
MOVQ $0x43e1f593f0000001, R11
SUBQ R11, DX
MOVQ $0x2833e84879b97091, R11
SBBQ R11, R8
MOVQ $0xb85045b68181585d, R11
SBBQ R11, R9
MOVQ $0x30644e72e131a029, R11
SBBQ R11, R10
JCS t_is_smaller // no borrow, we return t
// borrow is set, we return u
MOVQ DX, (DI)
MOVQ R8, 8(DI)
MOVQ R9, 16(DI)
MOVQ R10, 24(DI)
RET
t_is_smaller:
MOVQ CX, 0(DI)
MOVQ BX, 8(DI)
MOVQ BP, 16(DI)
MOVQ SI, 24(DI)
RET
no_adx:
// ---------------------------------------------------------------------------------------------
// outter loop 0
// (A,t[0]) := t[0] + x[0]*y[0]
MOVQ (DI), AX // x[0]
MOVQ 0(R8), R12
MULQ R12 // x[0] * y[0]
MOVQ DX, R9
MOVQ AX, CX
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, R11
IMULQ CX , R11
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, AX
MULQ R11
ADDQ CX ,AX
ADCQ $0, DX
MOVQ DX, R10
// for j=1 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ 8(DI), AX
MULQ R12 // x[1] * y[0]
MOVQ R9, BX
ADDQ AX, BX
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x2833e84879b97091, AX
MULQ R11
ADDQ BX, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, CX
MOVQ DX, R10
MOVQ 16(DI), AX
MULQ R12 // x[2] * y[0]
MOVQ R9, BP
ADDQ AX, BP
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0xb85045b68181585d, AX
MULQ R11
ADDQ BP, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BX
MOVQ DX, R10
MOVQ 24(DI), AX
MULQ R12 // x[3] * y[0]
MOVQ R9, SI
ADDQ AX, SI
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x30644e72e131a029, AX
MULQ R11
ADDQ SI, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BP
MOVQ DX, R10
ADDQ R10, R9
MOVQ R9, SI
// ---------------------------------------------------------------------------------------------
// outter loop 1
// (A,t[0]) := t[0] + x[0]*y[1]
MOVQ (DI), AX // x[0]
MOVQ 8(R8), R12
MULQ R12 // x[0] * y[1]
ADDQ AX, CX
ADCQ $0, DX
MOVQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, R11
IMULQ CX , R11
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, AX
MULQ R11
ADDQ CX ,AX
ADCQ $0, DX
MOVQ DX, R10
// for j=1 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ 8(DI), AX
MULQ R12 // x[1] * y[1]
ADDQ R9, BX
ADCQ $0, DX
ADDQ AX, BX
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x2833e84879b97091, AX
MULQ R11
ADDQ BX, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, CX
MOVQ DX, R10
MOVQ 16(DI), AX
MULQ R12 // x[2] * y[1]
ADDQ R9, BP
ADCQ $0, DX
ADDQ AX, BP
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0xb85045b68181585d, AX
MULQ R11
ADDQ BP, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BX
MOVQ DX, R10
MOVQ 24(DI), AX
MULQ R12 // x[3] * y[1]
ADDQ R9, SI
ADCQ $0, DX
ADDQ AX, SI
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x30644e72e131a029, AX
MULQ R11
ADDQ SI, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BP
MOVQ DX, R10
ADDQ R10, R9
MOVQ R9, SI
// ---------------------------------------------------------------------------------------------
// outter loop 2
// (A,t[0]) := t[0] + x[0]*y[2]
MOVQ (DI), AX // x[0]
MOVQ 16(R8), R12
MULQ R12 // x[0] * y[2]
ADDQ AX, CX
ADCQ $0, DX
MOVQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, R11
IMULQ CX , R11
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, AX
MULQ R11
ADDQ CX ,AX
ADCQ $0, DX
MOVQ DX, R10
// for j=1 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ 8(DI), AX
MULQ R12 // x[1] * y[2]
ADDQ R9, BX
ADCQ $0, DX
ADDQ AX, BX
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x2833e84879b97091, AX
MULQ R11
ADDQ BX, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, CX
MOVQ DX, R10
MOVQ 16(DI), AX
MULQ R12 // x[2] * y[2]
ADDQ R9, BP
ADCQ $0, DX
ADDQ AX, BP
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0xb85045b68181585d, AX
MULQ R11
ADDQ BP, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BX
MOVQ DX, R10
MOVQ 24(DI), AX
MULQ R12 // x[3] * y[2]
ADDQ R9, SI
ADCQ $0, DX
ADDQ AX, SI
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x30644e72e131a029, AX
MULQ R11
ADDQ SI, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BP
MOVQ DX, R10
ADDQ R10, R9
MOVQ R9, SI
// ---------------------------------------------------------------------------------------------
// outter loop 3
// (A,t[0]) := t[0] + x[0]*y[3]
MOVQ (DI), AX // x[0]
MOVQ 24(R8), R12
MULQ R12 // x[0] * y[3]
ADDQ AX, CX
ADCQ $0, DX
MOVQ DX, R9
// m := t[0]*q'[0] mod W
MOVQ $0xc2e1f593efffffff, R11
IMULQ CX , R11
// C,_ := t[0] + m*q[0]
MOVQ $0x43e1f593f0000001, AX
MULQ R11
ADDQ CX ,AX
ADCQ $0, DX
MOVQ DX, R10
// for j=1 to N-1
// (A,t[j]) := t[j] + x[j]*y[i] + A
// (C,t[j-1]) := t[j] + m*q[j] + C
MOVQ 8(DI), AX
MULQ R12 // x[1] * y[3]
ADDQ R9, BX
ADCQ $0, DX
ADDQ AX, BX
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x2833e84879b97091, AX
MULQ R11
ADDQ BX, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, CX
MOVQ DX, R10
MOVQ 16(DI), AX
MULQ R12 // x[2] * y[3]
ADDQ R9, BP
ADCQ $0, DX
ADDQ AX, BP
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0xb85045b68181585d, AX
MULQ R11
ADDQ BP, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BX
MOVQ DX, R10
MOVQ 24(DI), AX
MULQ R12 // x[3] * y[3]
ADDQ R9, SI
ADCQ $0, DX
ADDQ AX, SI
ADCQ $0, DX
MOVQ DX, R9
MOVQ $0x30644e72e131a029, AX
MULQ R11
ADDQ SI, R10
ADCQ $0, DX
ADDQ AX, R10
ADCQ $0, DX
MOVQ R10, BP
MOVQ DX, R10
ADDQ R10, R9
MOVQ R9, SI
JMP reduce

+ 93
- 0
ff/element_square.go

@ -0,0 +1,93 @@
// +build !amd64
// Copyright 2020 ConsenSys AG
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff contains field arithmetic operations
package ff
// /!\ WARNING /!\
// this code has not been audited and is provided as-is. In particular,
// there is no security guarantees such as constant time implementation
// or side-channel attack resistance
// /!\ WARNING /!\
import "math/bits"
// Square z = x * x mod q
// see https://hackmd.io/@zkteam/modular_multiplication
func (z *Element) Square(x *Element) *Element {
var p [4]uint64
var u, v uint64
{
// round 0
u, p[0] = bits.Mul64(x[0], x[0])
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
var t uint64
t, u, v = madd1sb(x[0], x[1], u)
C, p[0] = madd2(m, 2896914383306846353, v, C)
t, u, v = madd1s(x[0], x[2], t, u)
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd1s(x[0], x[3], t, u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 1
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
u, v = madd1(x[1], x[1], p[1])
C, p[0] = madd2(m, 2896914383306846353, v, C)
var t uint64
t, u, v = madd2sb(x[1], x[2], p[2], u)
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd2s(x[1], x[3], p[3], t, u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 2
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
C, p[0] = madd2(m, 2896914383306846353, p[1], C)
u, v = madd1(x[2], x[2], p[2])
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd2sb(x[2], x[3], p[3], u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 3
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
C, z[0] = madd2(m, 2896914383306846353, p[1], C)
C, z[1] = madd2(m, 13281191951274694749, p[2], C)
u, v = madd1(x[3], x[3], p[3])
z[3], z[2] = madd3(m, 3486998266802970665, v, C, u)
}
// if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
}
return z
}

+ 34
- 0
ff/element_square_amd64.go

@ -0,0 +1,34 @@
// Copyright 2020 ConsenSys AG
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff contains field arithmetic operations
package ff
// SquareElement z = x * x mod q
// calling this instead of z.Square(x) is prefered for performance critical path
// go - noescape
// func SquareElement(res,x *Element)
// Square z = x * x mod q
// see https://hackmd.io/@zkteam/modular_multiplication
func (z *Element) Square(x *Element) *Element {
if z != x {
z.Set(x)
}
MulAssignElement(z, x)
// SquareElement(z, x)
return z
}

+ 245
- 5
ff/element_test.go

@ -1,9 +1,26 @@
// Code generated by goff DO NOT EDIT
// Copyright 2020 ConsenSys AG
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Code generated by goff (v0.2.0) DO NOT EDIT
// Package ff contains field arithmetic operations
package ff package ff
import ( import (
"crypto/rand" "crypto/rand"
"math/big" "math/big"
"math/bits"
mrand "math/rand" mrand "math/rand"
"testing" "testing"
) )
@ -21,7 +38,14 @@ func TestELEMENTCorrectnessAgainstBigInt(t *testing.T) {
modulusMinusOne.Sub(modulus, &one) modulusMinusOne.Sub(modulus, &one)
for i := 0; i < 1000; i++ {
var n int
if testing.Short() {
n = 10
} else {
n = 500
}
for i := 0; i < n; i++ {
// sample 2 random big int // sample 2 random big int
b1, _ := rand.Int(rand.Reader, modulus) b1, _ := rand.Int(rand.Reader, modulus)
@ -57,7 +81,7 @@ func TestELEMENTCorrectnessAgainstBigInt(t *testing.T) {
rbExp := new(big.Int).SetUint64(rExp) rbExp := new(big.Int).SetUint64(rExp)
var bMul, bAdd, bSub, bDiv, bNeg, bLsh, bInv, bExp, bSquare big.Int
var bMul, bAdd, bSub, bDiv, bNeg, bLsh, bInv, bExp, bExp2, bSquare big.Int
// e1 = mont(b1), e2 = mont(b2) // e1 = mont(b1), e2 = mont(b2)
var e1, e2, eMul, eAdd, eSub, eDiv, eNeg, eLsh, eInv, eExp, eSquare, eMulAssign, eSubAssign, eAddAssign Element var e1, e2, eMul, eAdd, eSub, eDiv, eNeg, eLsh, eInv, eExp, eSquare, eMulAssign, eSubAssign, eAddAssign Element
@ -106,12 +130,40 @@ func TestELEMENTCorrectnessAgainstBigInt(t *testing.T) {
cmpEandB(&eNeg, &bNeg, "Neg") cmpEandB(&eNeg, &bNeg, "Neg")
cmpEandB(&eInv, &bInv, "Inv") cmpEandB(&eInv, &bInv, "Inv")
cmpEandB(&eExp, &bExp, "Exp") cmpEandB(&eExp, &bExp, "Exp")
cmpEandB(&eLsh, &bLsh, "Lsh") cmpEandB(&eLsh, &bLsh, "Lsh")
// legendre symbol
if e1.Legendre() != big.Jacobi(b1, modulus) {
t.Fatal("legendre symbol computation failed")
}
if e2.Legendre() != big.Jacobi(b2, modulus) {
t.Fatal("legendre symbol computation failed")
}
// these are slow, killing circle ci
if n <= 5 {
// sqrt
var eSqrt, eExp2 Element
var bSqrt big.Int
bSqrt.ModSqrt(b1, modulus)
eSqrt.Sqrt(&e1)
cmpEandB(&eSqrt, &bSqrt, "Sqrt")
bits := b2.Bits()
exponent := make([]uint64, len(bits))
for k := 0; k < len(bits); k++ {
exponent[k] = uint64(bits[k])
}
eExp2.Exp(e1, exponent...)
bExp2.Exp(b1, b2, modulus)
cmpEandB(&eExp2, &bExp2, "Exp multi words")
}
} }
} }
func TestELEMENTIsRandom(t *testing.T) { func TestELEMENTIsRandom(t *testing.T) {
for i := 0; i < 1000; i++ {
for i := 0; i < 50; i++ {
var x, y Element var x, y Element
x.SetRandom() x.SetRandom()
y.SetRandom() y.SetRandom()
@ -125,7 +177,6 @@ func TestELEMENTIsRandom(t *testing.T) {
// benchmarks // benchmarks
// most benchmarks are rudimentary and should sample a large number of random inputs // most benchmarks are rudimentary and should sample a large number of random inputs
// or be run multiple times to ensure it didn't measure the fastest path of the function // or be run multiple times to ensure it didn't measure the fastest path of the function
// TODO: clean up and push benchmarking branch
var benchResElement Element var benchResElement Element
@ -219,6 +270,15 @@ func BenchmarkSquareELEMENT(b *testing.B) {
} }
} }
func BenchmarkSqrtELEMENT(b *testing.B) {
var a Element
a.SetRandom()
b.ResetTimer()
for i := 0; i < b.N; i++ {
benchResElement.Sqrt(&a)
}
}
func BenchmarkMulAssignELEMENT(b *testing.B) { func BenchmarkMulAssignELEMENT(b *testing.B) {
x := Element{ x := Element{
1997599621687373223, 1997599621687373223,
@ -232,3 +292,183 @@ func BenchmarkMulAssignELEMENT(b *testing.B) {
benchResElement.MulAssign(&x) benchResElement.MulAssign(&x)
} }
} }
func BenchmarkMulAssignASMELEMENT(b *testing.B) {
x := Element{
1997599621687373223,
6052339484930628067,
10108755138030829701,
150537098327114917,
}
benchResElement.SetOne()
b.ResetTimer()
for i := 0; i < b.N; i++ {
MulAssignElement(&benchResElement, &x)
}
}
func TestELEMENTAsm(t *testing.T) {
// ensure ASM implementations matches the ones using math/bits
modulus, _ := new(big.Int).SetString("21888242871839275222246405745257275088548364400416034343698204186575808495617", 10)
for i := 0; i < 500; i++ {
// sample 2 random big int
b1, _ := rand.Int(rand.Reader, modulus)
b2, _ := rand.Int(rand.Reader, modulus)
// e1 = mont(b1), e2 = mont(b2)
var e1, e2, eTestMul, eMulAssign, eSquare, eTestSquare Element
e1.SetBigInt(b1)
e2.SetBigInt(b2)
eTestMul = e1
eTestMul.testMulAssign(&e2)
eMulAssign = e1
eMulAssign.MulAssign(&e2)
if !eTestMul.Equal(&eMulAssign) {
t.Fatal("inconsisntencies between MulAssign and testMulAssign --> check if MulAssign is calling ASM implementaiton on amd64")
}
// square
eSquare.Square(&e1)
eTestSquare.testSquare(&e1)
if !eTestSquare.Equal(&eSquare) {
t.Fatal("inconsisntencies between Square and testSquare --> check if Square is calling ASM implementaiton on amd64")
}
}
}
// this is here for consistency purposes, to ensure MulAssign on AMD64 using asm implementation gives consistent results
func (z *Element) testMulAssign(x *Element) *Element {
var t [4]uint64
var c [3]uint64
{
// round 0
v := z[0]
c[1], c[0] = bits.Mul64(v, x[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd1(v, x[1], c[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd1(v, x[2], c[1])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd1(v, x[3], c[1])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 1
v := z[1]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 2
v := z[2]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], t[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], t[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
t[3], t[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
{
// round 3
v := z[3]
c[1], c[0] = madd1(v, x[0], t[0])
m := c[0] * 14042775128853446655
c[2] = madd0(m, 4891460686036598785, c[0])
c[1], c[0] = madd2(v, x[1], c[1], t[1])
c[2], z[0] = madd2(m, 2896914383306846353, c[2], c[0])
c[1], c[0] = madd2(v, x[2], c[1], t[2])
c[2], z[1] = madd2(m, 13281191951274694749, c[2], c[0])
c[1], c[0] = madd2(v, x[3], c[1], t[3])
z[3], z[2] = madd3(m, 3486998266802970665, c[0], c[2], c[1])
}
// if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
}
return z
}
// this is here for consistency purposes, to ensure Square on AMD64 using asm implementation gives consistent results
func (z *Element) testSquare(x *Element) *Element {
var p [4]uint64
var u, v uint64
{
// round 0
u, p[0] = bits.Mul64(x[0], x[0])
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
var t uint64
t, u, v = madd1sb(x[0], x[1], u)
C, p[0] = madd2(m, 2896914383306846353, v, C)
t, u, v = madd1s(x[0], x[2], t, u)
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd1s(x[0], x[3], t, u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 1
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
u, v = madd1(x[1], x[1], p[1])
C, p[0] = madd2(m, 2896914383306846353, v, C)
var t uint64
t, u, v = madd2sb(x[1], x[2], p[2], u)
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd2s(x[1], x[3], p[3], t, u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 2
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
C, p[0] = madd2(m, 2896914383306846353, p[1], C)
u, v = madd1(x[2], x[2], p[2])
C, p[1] = madd2(m, 13281191951274694749, v, C)
_, u, v = madd2sb(x[2], x[3], p[3], u)
p[3], p[2] = madd3(m, 3486998266802970665, v, C, u)
}
{
// round 3
m := p[0] * 14042775128853446655
C := madd0(m, 4891460686036598785, p[0])
C, z[0] = madd2(m, 2896914383306846353, p[1], C)
C, z[1] = madd2(m, 13281191951274694749, p[2], C)
u, v = madd1(x[3], x[3], p[3])
z[3], z[2] = madd3(m, 3486998266802970665, v, C, u)
}
// if z > q --> z -= q
// note: this is NOT constant time
if !(z[3] < 3486998266802970665 || (z[3] == 3486998266802970665 && (z[2] < 13281191951274694749 || (z[2] == 13281191951274694749 && (z[1] < 2896914383306846353 || (z[1] == 2896914383306846353 && (z[0] < 4891460686036598785))))))) {
var b uint64
z[0], b = bits.Sub64(z[0], 4891460686036598785, 0)
z[1], b = bits.Sub64(z[1], 2896914383306846353, b)
z[2], b = bits.Sub64(z[2], 13281191951274694749, b)
z[3], _ = bits.Sub64(z[3], 3486998266802970665, b)
}
return z
}

+ 1
- 0
go.mod

@ -7,4 +7,5 @@ require (
github.com/ethereum/go-ethereum v1.8.27 github.com/ethereum/go-ethereum v1.8.27
github.com/stretchr/testify v1.3.0 github.com/stretchr/testify v1.3.0
golang.org/x/crypto v0.0.0-20190621222207-cc06ce4a13d4 golang.org/x/crypto v0.0.0-20190621222207-cc06ce4a13d4
golang.org/x/sys v0.0.0-20190412213103-97732733099d
) )

Loading…
Cancel
Save