diff --git a/prover/gextra.go b/prover/gextra.go index 028c191..37a4ca0 100644 --- a/prover/gextra.go +++ b/prover/gextra.go @@ -10,6 +10,7 @@ type TableG1 struct{ data []*bn256.G1 } + func (t TableG1) GetData() []*bn256.G1 { return t.data } @@ -56,7 +57,7 @@ func (t *TableG1) NewTableG1(a []*bn256.G1, gsize int){ } // Multiply scalar by precomputed table of G1 elements -func (t *TableG1) MulTableG1(k []*big.Int, gsize int) *bn256.G1 { +func (t *TableG1) MulTableG1(k []*big.Int, Q_prev *bn256.G1, gsize int) *bn256.G1 { // We need at least gsize elements. If not enough, fill with 0 k_ext := make([]*big.Int, 0) k_ext = append(k_ext, k...) @@ -76,13 +77,17 @@ func (t *TableG1) MulTableG1(k []*big.Int, gsize int) *bn256.G1 { if b != 0 { // TODO. bn256 doesn't export mixed addition (Jacobian + Affine), which is more efficient. Q.Add(Q, t.data[b]) - } + } + } + if Q_prev != nil { + return Q.Add(Q,Q_prev) + } else { + return Q } - return Q } // Multiply scalar by precomputed table of G1 elements without intermediate doubling -func MulTableNoDoubleG1(t []TableG1, k []*big.Int, gsize int) *bn256.G1 { +func MulTableNoDoubleG1(t []TableG1, k []*big.Int, Q_prev *bn256.G1, gsize int) *bn256.G1 { // We need at least gsize elements. If not enough, fill with 0 min_nelems := len(t) * gsize k_ext := make([]*big.Int, 0) @@ -107,7 +112,7 @@ func MulTableNoDoubleG1(t []TableG1, k []*big.Int, gsize int) *bn256.G1 { if b != 0 { // TODO. bn256 doesn't export mixed addition (Jacobian + Affine), which is more efficient. Q[i].Add(Q[i], t[j].data[b]) - } + } } } @@ -118,28 +123,37 @@ func MulTableNoDoubleG1(t []TableG1, k []*big.Int, gsize int) *bn256.G1 { R = new(bn256.G1).Add(R,R) R.Add(R,Q[i-1]) } - return R + + if Q_prev != nil { + return R.Add(R,Q_prev) + } else { + return R + } } // Compute tables within function. This solution should still be faster than std multiplication // for gsize = 7 -func ScalarMult(a []*bn256.G1, k []*big.Int, gsize int) *bn256.G1 { +func ScalarMultG1(a []*bn256.G1, k []*big.Int, Q_prev *bn256.G1, gsize int) *bn256.G1 { ntables := int((len(a) + gsize - 1) / gsize) table := TableG1{} Q:= new(bn256.G1).ScalarBaseMult(new(big.Int)) for i:=0; i= 0; i-- { + // TODO. bn256 doesn't export double operation. We will need to fork repo and export it + Q = new(bn256.G2).Add(Q,Q) + b := getBit(k_ext,i) + if b != 0 { + // TODO. bn256 doesn't export mixed addition (Jacobian + Affine), which is more efficient. + Q.Add(Q, t.data[b]) + } + } + if Q_prev != nil { + return Q.Add(Q, Q_prev) + } else { + return Q + } +} + +// Multiply scalar by precomputed table of G2 elements without intermediate doubling +func MulTableNoDoubleG2(t []TableG2, k []*big.Int, Q_prev *bn256.G2, gsize int) *bn256.G2 { + // We need at least gsize elements. If not enough, fill with 0 + min_nelems := len(t) * gsize + k_ext := make([]*big.Int, 0) + k_ext = append(k_ext, k...) + for i := len(k); i < min_nelems; i++ { + k_ext = append(k_ext,new(big.Int).SetUint64(0)) + } + // Init Adders + nbitsQ := cryptoConstants.Q.BitLen() + Q := make([]*bn256.G2,nbitsQ) + + for i:=0; i< nbitsQ; i++ { + Q[i] = new(bn256.G2).ScalarBaseMult(big.NewInt(0)) + } + + // Perform bitwise addition + for j:=0; j < len(t); j++ { + msb := getMsb(k_ext[j*gsize:(j+1)*gsize]) + + for i := msb-1; i >= 0; i-- { + b := getBit(k_ext[j*gsize:(j+1)*gsize],i) + if b != 0 { + // TODO. bn256 doesn't export mixed addition (Jacobian + Affine), which is more efficient. + Q[i].Add(Q[i], t[j].data[b]) + } + } + } + + // Consolidate Addition + R := new(bn256.G2).Set(Q[nbitsQ-1]) + for i:=nbitsQ-1; i>0; i-- { + // TODO. bn256 doesn't export double operation. We will need to fork repo and export it + R = new(bn256.G2).Add(R,R) + R.Add(R,Q[i-1]) + } + if Q_prev != nil { + return R.Add(R,Q_prev) + } else { + return R + } +} + +// Compute tables within function. This solution should still be faster than std multiplication +// for gsize = 7 +func ScalarMultG2(a []*bn256.G2, k []*big.Int, Q_prev *bn256.G2, gsize int) *bn256.G2 { + ntables := int((len(a) + gsize - 1) / gsize) + table := TableG2{} + Q:= new(bn256.G2).ScalarBaseMult(new(big.Int)) + + for i:=0; i= 0; i-- { + b := getBit(k_ext[j*gsize:(j+1)*gsize],i) + if b != 0 { + // TODO. bn256 doesn't export mixed addition (Jacobian + Affine), which is more efficient. + Q[i].Add(Q[i], table.data[b]) + } + } + } + table.NewTableG2( a[(ntables-1)*gsize:], gsize) + msb := getMsb(k_ext[(ntables-1)*gsize:]) + + for i := msb-1; i >= 0; i-- { + b := getBit(k_ext[(ntables-1)*gsize:],i) + if b != 0 { + // TODO. bn256 doesn't export mixed addition (Jacobian + Affine), which is more efficient. + Q[i].Add(Q[i], table.data[b]) + } + } + + // Consolidate Addition + R := new(bn256.G2).Set(Q[nbitsQ-1]) + for i:=nbitsQ-1; i>0; i-- { + // TODO. bn256 doesn't export double operation. We will need to fork repo and export it + R = new(bn256.G2).Add(R,R) + R.Add(R,Q[i-1]) + } + if Q_prev != nil { + return R.Add(R,Q_prev) + } else { + return R + } } // Return most significant bit position in a group of Big Integers diff --git a/prover/gextra_test.go b/prover/gextra_test.go index d609af2..5836e7e 100644 --- a/prover/gextra_test.go +++ b/prover/gextra_test.go @@ -11,7 +11,8 @@ import ( ) const ( - N = 50000 + N1 = 500 + N2 = 500 ) func randomBigIntArray(n int) []*big.Int{ @@ -33,14 +34,24 @@ func randomG1Array(n int) []*bn256.G1 { return arrayG1 } +func randomG2Array(n int) []*bn256.G2 { + arrayG2 := make([]*bn256.G2, n) + + for i:=0; i= pk.NPublic+1 { - proofC[cpu].Add(proofC[cpu], new(bn256.G1).ScalarMult(pk.C[i], w[i])) - } - } + proofA[cpu] = ScalarMultNoDoubleG1(pk.A[ranges[0]:ranges[1]], + w[ranges[0]:ranges[1]], + proofA[cpu], + gsize) + proofB[cpu] = ScalarMultNoDoubleG2(pk.B2[ranges[0]:ranges[1]], + w[ranges[0]:ranges[1]], + proofB[cpu], + gsize) + proofBG1[cpu] = ScalarMultNoDoubleG1(pk.B1[ranges[0]:ranges[1]], + w[ranges[0]:ranges[1]], + proofBG1[cpu], + gsize) + min_lim := pk.NPublic+1 + if ranges[0] > pk.NPublic+1 { + min_lim = ranges[0] + } + if ranges[1] > pk.NPublic + 1 { + proofC[cpu] = ScalarMultNoDoubleG1(pk.C[min_lim:ranges[1]], + w[min_lim:ranges[1]], + proofC[cpu], + gsize) + } wg1.Done() }(_cpu, _ranges) } @@ -121,9 +142,10 @@ func GenerateProof(pk *types.Pk, w types.Witness) (*types.Proof, []*big.Int, err for _cpu, _ranges := range ranges(len(h), numcpu) { // split 2 go func(cpu int, ranges [2]int) { - for i := ranges[0]; i < ranges[1]; i++ { - proofC[cpu].Add(proofC[cpu], new(bn256.G1).ScalarMult(pk.HExps[i], h[i])) - } + proofC[cpu] = ScalarMultNoDoubleG1(pk.HExps[ranges[0]:ranges[1]], + h[ranges[0]:ranges[1]], + proofC[cpu], + gsize) wg2.Done() }(_cpu, _ranges) } diff --git a/prover/prover_test.go b/prover/prover_test.go index 3812b07..ae37f37 100644 --- a/prover/prover_test.go +++ b/prover/prover_test.go @@ -16,8 +16,8 @@ import ( func TestCircuitsGenerateProof(t *testing.T) { testCircuitGenerateProof(t, "circuit1k") // 1000 constraints testCircuitGenerateProof(t, "circuit5k") // 5000 constraints - // testCircuitGenerateProof(t, "circuit10k") // 10000 constraints - // testCircuitGenerateProof(t, "circuit20k") // 20000 constraints + testCircuitGenerateProof(t, "circuit10k") // 10000 constraints + testCircuitGenerateProof(t, "circuit20k") // 20000 constraints } func testCircuitGenerateProof(t *testing.T, circuit string) { diff --git a/prover/tables.md b/prover/tables.md index bd4ec4a..12c4810 100644 --- a/prover/tables.md +++ b/prover/tables.md @@ -10,12 +10,12 @@ Both options can be combined. In the following table, we show the results of using the naive method, Srauss-Shamir and Strauss-Shamir + No doubling. These last two options are repeated for different table grouping order. -There are 5000 G1 Elliptical Curve Points, and the scalars are 254 bits (BN256 curve). +There are 50000 G1 Elliptical Curve Points, and the scalars are 254 bits (BN256 curve). There may be some concern on the additional size of the tables since they need to be loaded into a smartphone during the proof, and the time required to load these tables may exceed the benefits. If this is a problem, another althernative is to compute the tables during the proof itself. Depending on the Group Size, timing may be better than the naive approach. -| Algorithm | GS / Time | +| Algorithm (G1)| GS / Time | |---|---|---| | Naive | 6.63s | | | | | | | | | Strauss | 13.16s | 9.033s | 6.95s | 5.61s | 4.91s | 4.26s | 3.88s | 3.54 s | 1.44 s | @@ -23,3 +23,27 @@ There may be some concern on the additional size of the tables since they need t | No Doubling | 3.74s | 3.00s | 2.38s | 1.96s | 1.79s | 1.54s | 1.50s | 1.44s| | No Doubling + Table Computation | 6.83s | 5.1s | 4.16s | 3.52s| 3.22s | 3.21s | 3.57s | 4.56s | +There are 5000 G2 Elliptical Curve Points, and the scalars are 254 bits (BN256 curve). + +| Algorithm (G2)| GS / Time | +|---|---|---| +| Naive | 3.55s | | | | | | | | +| Strauss | 3.55s | 2.54s | 1.96s | 1.58s | 1.38s | 1.20s | 1.03s | 937ms | +| Strauss + Table Computation | 3.59s | 2.58s | 2.04s | 1.71s | 1.51s | 1.46s | 1.51s | 1.82s | +| No Doubling | 1.49s | 1.16s | 952ms | 719ms | 661ms | 548ms | 506ms| 444ms | +| No Doubling + Table Computation | 1.55s | 1.21s | 984ms | 841ms | 826ms | 847ms | 1.03s | 1.39s | + +| GS | Extra Disk Space per Constraint (G1)| +|----|--------| +| 2 | 64 B | +| 3 | 106 B | +| 4 | 192 B | +| 5 | 346 B | +| 6 | 618 B | +| 7 | 1106 B | +| 8 | 1984 B | +| 9 | 3577 B | +| N | 2^(N+6)/N - 64 B | + +Extra disk space per constraint in G2 is twice the requirements for G1 +