Browse Source

feat: Plonk optimizations (#39)

* Fixed poseidion hash TOOD in fri/fri.go

* optimized goldilocks

* Another optimization

* Down to 16 million

* Finished TODOs
main
puma314 2 years ago
committed by GitHub
parent
commit
89b5a01e4b
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 171 additions and 82 deletions
  1. +4
    -1
      .gitignore
  2. +7
    -13
      README.md
  3. +2
    -2
      benchmark.go
  4. +25
    -45
      fri/fri.go
  5. +80
    -0
      fri/fri_utils.go
  6. +3
    -4
      goldilocks/base.go
  7. +0
    -2
      plonk/plonk.go
  8. +10
    -13
      poseidon/bn254.go
  9. +40
    -2
      types/types.go

+ 4
- 1
.gitignore

@ -15,4 +15,7 @@
# vendor/
gnark-ed25519
gnark.pprof
gnark.pprof
# Output from pprof visualization
verifier.png

+ 7
- 13
README.md

@ -19,20 +19,14 @@ To run the benchmark,
go run benchmark.go
```
Here are relevant numbers from a benchmark ran on an M1 Max with 10 CPU cores.
## Profiling
First run the benchmark with profiling turned on
```
11:04:08 INF compiling circuit
11:04:08 INF parsed circuit inputs nbPublic=0 nbSecret=0
11:12:30 INF building constraint system nbConstraints=6740784
Generating witness 2023-03-28 11:12:42.702566 -0700 PDT m=+514.333410376
Running circuit setup 2023-03-28 11:12:42.702666 -0700 PDT m=+514.333509834
Creating proof 2023-03-28 11:18:58.881518 -0700 PDT m=+890.519971543
11:18:59 DBG constraint system solver done backend=groth16 nbConstraints=6740784 took=675.361625
11:19:10 DBG prover done backend=groth16 curve=bn254 nbConstraints=6740784 took=10512.664584
Verifying proof 2023-03-28 11:19:10.169636 -0700 PDT m=+901.808314709
11:19:10 DBG verifier done backend=groth16 curve=bn254 took=6.288792
bn254 2023-03-28 11:19:10.175992 -0700 PDT m=+901.814670834
go run benchmark.go -profile
```
The circuit can be significantly optimized by using more efficient arithmetic for Goldilocks, among other things.
Then use the following command to generate a visualization of the pprof
```
go tool pprof --png gnark.pprof > verifier.png
```

+ 2
- 2
benchmark.go

@ -282,8 +282,8 @@ func groth16Proof(r1cs constraint.ConstraintSystem, circuitName string, dummy bo
}
func main() {
plonky2Circuit := flag.String("plonky2-circuit", "", "plonky2 circuit to benchmark")
proofSystem := flag.String("proof-system", "groth16", "proof system to benchmark")
plonky2Circuit := flag.String("plonky2-circuit", "step", "plonky2 circuit to benchmark")
proofSystem := flag.String("proof-system", "plonk", "proof system to benchmark")
profileCircuit := flag.Bool("profile", true, "profile the circuit")
dummySetup := flag.Bool("dummy", true, "use dummy setup")
saveArtifacts := flag.Bool("save", false, "save circuit artifacts")

+ 25
- 45
fri/fri.go

@ -106,11 +106,15 @@ func (f *Chip) verifyMerkleProofToCapWithCapIndex(
currentDigest := f.poseidonBN254Chip.HashOrNoop(leafData)
for i, sibling := range proof.Siblings {
bit := leafIndexBits[i]
// TODO: Don't need to do two hashes by using a trick that the plonky2 verifier circuit does
// https://github.com/mir-protocol/plonky2/blob/973624f12d2d12d74422b3ea051358b9eaacb050/plonky2/src/gates/poseidon.rs#L298
leftHash := f.poseidonBN254Chip.TwoToOne(sibling, currentDigest)
rightHash := f.poseidonBN254Chip.TwoToOne(currentDigest, sibling)
currentDigest = f.api.Select(bit, leftHash, rightHash)
var inputs poseidon.BN254State
inputs[0] = frontend.Variable(0)
inputs[1] = frontend.Variable(0)
inputs[2] = f.api.Select(bit, sibling, currentDigest)
inputs[3] = f.api.Select(bit, currentDigest, sibling)
state := f.poseidonBN254Chip.Poseidon(inputs)
currentDigest = state[0]
}
// We assume that the cap_height is 4. Create two levels of the Lookup2 circuit
@ -152,29 +156,6 @@ func (f *Chip) verifyInitialProof(xIndexBits []frontend.Variable, proof *variabl
}
}
// / We decompose FRI query indices into bits without verifying that the decomposition given by
// / the prover is the canonical one. In particular, if `x_index < 2^field_bits - p`, then the
// / prover could supply the binary encoding of either `x_index` or `x_index + p`, since they are
// / congruent mod `p`. However, this only occurs with probability
// / p_ambiguous = (2^field_bits - p) / p
// / which is small for the field that we use in practice.
// /
// / In particular, the soundness error of one FRI query is roughly the codeword rate, which
// / is much larger than this ambiguous-element probability given any reasonable parameters.
// / Thus ambiguous elements contribute a negligible amount to soundness error.
// /
// / Here we compare the probabilities as a sanity check, to verify the claim above.
func (f *Chip) assertNoncanonicalIndicesOK() {
numAmbiguousElems := uint64(math.MaxUint64) - goldilocks.Modulus().Uint64() + 1
queryError := f.friParams.Config.Rate()
pAmbiguous := float64(numAmbiguousElems) / float64(goldilocks.Modulus().Uint64())
// TODO: Check that pAmbiguous value is the same as the one in plonky2 verifier
if pAmbiguous >= queryError*1e-5 {
panic("A non-negligible portion of field elements are in the range that permits non-canonical encodings. Need to do more analysis or enforce canonical encodings.")
}
}
func (f *Chip) expFromBitsConstBase(
base goldilocks.Element,
exponentBits []frontend.Variable,
@ -209,7 +190,7 @@ func (f *Chip) calculateSubgroupX(
) gl.Variable {
// Compute x from its index
// `subgroup_x` is `subgroup[x_index]`, i.e., the actual field element in the domain.
// TODO - Make these as global values
// OPTIMIZE - Make these as global values
g := gl.NewVariable(gl.MULTIPLICATIVE_GROUP_GENERATOR.Uint64())
base := gl.PrimitiveRootOfUnity(nLog)
@ -343,7 +324,7 @@ func (f *Chip) computeEvaluation(
// The evaluation vector needs to be reordered first. Permute the evals array such that each
// element's new index is the bit reverse of it's original index.
// TODO: Optimization - Since the size of the evals array should be constant (e.g. 2^arityBits),
// OPTIMIZE - Since the size of the evals array should be constant (e.g. 2^arityBits),
// we can just hard code the permutation.
permutedEvals := make([]gl.QuadraticExtensionVariable, len(evals))
for i := uint8(0); i < uint8(len(evals)); i++ {
@ -363,14 +344,14 @@ func (f *Chip) computeEvaluation(
xPoints := make([]gl.QuadraticExtensionVariable, len(evals))
yPoints := permutedEvals
// TODO: Make g_F a constant
// OPTIMIZE: Make g_F a constant
g_F := gl.NewVariable(g.Uint64()).ToQuadraticExtension()
xPoints[0] = gl.QuadraticExtensionVariable{cosetStart, gl.Zero()}
for i := 1; i < len(evals); i++ {
xPoints[i] = f.gl.MulExtension(xPoints[i-1], g_F)
}
// TODO: This is n^2. Is there a way to do this better?
// OPTIMIZE: This is n^2. Is there a way to do this better?
// Compute the barycentric weights
barycentricWeights := make([]gl.QuadraticExtensionVariable, len(xPoints))
for i := 0; i < len(xPoints); i++ {
@ -385,7 +366,7 @@ func (f *Chip) computeEvaluation(
}
}
// Take the inverse of the barycentric weights
// TODO: Can provide a witness to this value
// OPTIMIZE: Can provide a witness to this value
barycentricWeights[i] = f.gl.InverseExtension(barycentricWeights[i])
}
@ -403,7 +384,9 @@ func (f *Chip) verifyQueryRound(
nLog uint64,
roundProof *variables.FriQueryRound,
) {
f.assertNoncanonicalIndicesOK()
// Note assertNoncanonicalIndicesOK does not add any constraints, it's a sanity check on the config
assertNoncanonicalIndicesOK(*f.friParams)
xIndex = f.gl.Reduce(xIndex)
xIndexBits := f.api.ToBinary(xIndex.Limb, 64)[0 : f.friParams.DegreeBits+f.friParams.Config.RateBits]
capIndexBits := xIndexBits[len(xIndexBits)-int(f.friParams.Config.CapHeight):]
@ -511,21 +494,18 @@ func (f *Chip) VerifyFriProof(
initialMerkleCaps []variables.FriMerkleCap,
friProof *variables.FriProof,
) {
// TODO: Check fri config
/* if let Some(max_arity_bits) = params.max_arity_bits() {
self.check_recursion_config::<C>(max_arity_bits);
}
debug_assert_eq!(
params.final_poly_len(),
proof.final_poly.len(),
"Final polynomial has wrong degree."
); */
// Not adding any constraints but a sanity check on the proof shape matching the friParams (constant).
validateFriProofShape(friProof, instance, f.friParams)
// Check POW
f.assertLeadingZeros(friChallenges.FriPowResponse, f.friParams.Config)
// Check that parameters are coherent. Not adding any constraints but a sanity check
// on the proof shape matching the friParams.
if int(f.friParams.Config.NumQueryRounds) != len(friProof.QueryRoundProofs) {
panic("Number of query rounds does not match config.")
}
precomputedReducedEvals := f.fromOpeningsAndAlpha(&openings, friChallenges.FriAlpha)
// Size of the LDE domain.

+ 80
- 0
fri/fri_utils.go

@ -1,7 +1,11 @@
package fri
import (
"math"
"github.com/consensys/gnark-crypto/field/goldilocks"
"github.com/succinctlabs/gnark-plonky2-verifier/types"
"github.com/succinctlabs/gnark-plonky2-verifier/variables"
)
type PolynomialInfo struct {
@ -146,3 +150,79 @@ func friAllPolys(c *types.CommonCircuitData) []PolynomialInfo {
return returnArr
}
// This does not add any constraints, it's just a sanity check on the friParams
// It's a 1-1 port of assert_noncanonical_indices_ok from fri::recursive_verifier in plonky2
func assertNoncanonicalIndicesOK(friParams types.FriParams) {
numAmbiguousElems := uint64(math.MaxUint64) - goldilocks.Modulus().Uint64() + 1
queryError := friParams.Config.Rate()
pAmbiguous := float64(numAmbiguousElems) / float64(goldilocks.Modulus().Uint64())
if pAmbiguous >= queryError*1e-5 {
panic("A non-negligible portion of field elements are in the range that permits non-canonical encodings. Need to do more analysis or enforce canonical encodings.")
}
}
// This does not add any constraints, it is just a sanity check on the shapes of the proof variable
// and given FriParams. It's a 1-1 port of validate_fri_proof_shape from fri::validate_shape in plonky2
func validateFriProofShape(proof *variables.FriProof, instance InstanceInfo, params *types.FriParams) {
const SALT_SIZE = 4
commitPhaseMerkleCaps := proof.CommitPhaseMerkleCaps
queryRoundProofs := proof.QueryRoundProofs
finalPoly := proof.FinalPoly
capHeight := params.Config.CapHeight
for _, cap := range commitPhaseMerkleCaps {
if 1<<capHeight != len(cap) {
panic("config cap_height does not match commit_phase_merkle_caps")
}
}
for _, queryRound := range queryRoundProofs {
initialTreesProof := queryRound.InitialTreesProof
steps := queryRound.Steps
if len(initialTreesProof.EvalsProofs) != len(instance.Oracles) {
panic("eval proofs length is not equal to instance oracles length")
}
for i, evalProof := range initialTreesProof.EvalsProofs {
leaf := evalProof.Elements
merkleProof := evalProof.MerkleProof
oracle := instance.Oracles[i]
salt_size := 0
if oracle.Blinding && params.Hiding {
salt_size = SALT_SIZE
}
if len(leaf) != (int(oracle.NumPolys) + salt_size) {
panic("eval proof leaf length doesn't match oracle info")
}
if len(merkleProof.Siblings)+int(capHeight) != params.LdeBits() {
panic("length of merkle proof + capHeight doesn't match lde_bits from params")
}
}
if len(steps) != len(params.ReductionArityBits) {
panic("length of steps != params.reduction_arity_bits")
}
codewordLenBits := params.LdeBits()
for i, step := range steps {
evals := step.Evals
merkleProof := step.MerkleProof
arityBits := params.ReductionArityBits[i]
arity := 1 << arityBits
codewordLenBits -= int(arityBits)
if len(evals) != arity {
panic("len evals doesn't match arity")
}
if len(merkleProof.Siblings)+int(capHeight) != codewordLenBits {
panic("len merkleProof doesn't match codewordLenBits")
}
}
}
if len(finalPoly.Coeffs) != params.FinalPolyLen() {
panic("len finalPoly doesn't match params FinalPolyLen")
}
}

+ 3
- 4
goldilocks/base.go

@ -131,9 +131,8 @@ func (p *Chip) MulAdd(a Variable, b Variable, c Variable) Variable {
quotient := NewVariable(result[0])
remainder := NewVariable(result[1])
lhs := p.api.Mul(a.Limb, b.Limb)
lhs = p.api.Add(lhs, c.Limb)
rhs := p.api.Add(p.api.Mul(quotient.Limb, MODULUS), remainder.Limb)
lhs := p.api.MulAcc(c.Limb, a.Limb, b.Limb)
rhs := p.api.MulAcc(remainder.Limb, MODULUS, quotient.Limb)
p.api.AssertIsEqual(lhs, rhs)
p.RangeCheck(quotient)
@ -144,7 +143,7 @@ func (p *Chip) MulAdd(a Variable, b Variable, c Variable) Variable {
// Multiplies two field elements and adds a field element such that x * y + z = c within the
// Golidlocks field without reducing.
func (p *Chip) MulAddNoReduce(a Variable, b Variable, c Variable) Variable {
return p.AddNoReduce(p.MulNoReduce(a, b), c)
return NewVariable(p.api.MulAcc(c.Limb, a.Limb, b.Limb))
}
// The hint used to compute MulAdd.

+ 0
- 2
plonk/plonk.go

@ -25,8 +25,6 @@ type PlonkChip struct {
}
func NewPlonkChip(api frontend.API, commonData types.CommonCircuitData) *PlonkChip {
// TODO: Should degreeBits be verified that it fits within the field and that degree is within uint64?
// Create the gates based on commonData GateIds
createdGates := []gates.Gate{}
for _, gateId := range commonData.GateIds {

+ 10
- 13
poseidon/bn254.go

@ -47,6 +47,9 @@ func (c *BN254Chip) HashNoPad(input []gl.Variable) BN254HashOut {
frontend.Variable(0),
}
two_to_32 := new(big.Int).SetInt64(1 << 32)
two_to_64 := new(big.Int).Mul(two_to_32, two_to_32)
for i := 0; i < len(input); i += BN254_SPONGE_RATE * 3 {
endI := c.min(len(input), i+BN254_SPONGE_RATE*3)
rateChunk := input[i:endI]
@ -54,13 +57,12 @@ func (c *BN254Chip) HashNoPad(input []gl.Variable) BN254HashOut {
endJ := c.min(len(rateChunk), j+3)
bn254Chunk := rateChunk[j:endJ]
bits := []frontend.Variable{}
inter := frontend.Variable(0)
for k := 0; k < len(bn254Chunk); k++ {
bn254Chunk[k] = c.gl.Reduce(bn254Chunk[k])
bits = append(bits, c.api.ToBinary(bn254Chunk[k].Limb, 64)...)
inter = c.api.MulAcc(inter, bn254Chunk[k].Limb, new(big.Int).Exp(two_to_64, big.NewInt(int64(k)), nil))
}
state[stateIdx+1] = c.api.FromBinary(bits...)
state[stateIdx+1] = inter
}
state = c.Poseidon(state)
@ -75,7 +77,7 @@ func (c *BN254Chip) HashOrNoop(input []gl.Variable) BN254HashOut {
alpha := new(big.Int).SetInt64(1 << 32)
for i, inputElement := range input {
returnVal = c.api.Add(returnVal, c.api.Mul(inputElement, alpha.Exp(alpha, big.NewInt(int64(i)), nil)))
returnVal = c.api.MulAcc(returnVal, inputElement, alpha.Exp(alpha, big.NewInt(int64(i)), nil))
}
return BN254HashOut(returnVal)
@ -145,16 +147,13 @@ func (c *BN254Chip) partialRounds(state BN254State) BN254State {
state[0] = c.exp5(state[0])
state[0] = c.api.Add(state[0], cConstants[(BN254_FULL_ROUNDS/2+1)*BN254_SPONGE_WIDTH+i])
var mul frontend.Variable
newState0 := frontend.Variable(0)
for j := 0; j < BN254_SPONGE_WIDTH; j++ {
mul = c.api.Mul(sConstants[(BN254_SPONGE_WIDTH*2-1)*i+j], state[j])
newState0 = c.api.Add(newState0, mul)
newState0 = c.api.MulAcc(newState0, sConstants[(BN254_SPONGE_WIDTH*2-1)*i+j], state[j])
}
for k := 1; k < BN254_SPONGE_WIDTH; k++ {
mul = c.api.Mul(state[0], sConstants[(BN254_SPONGE_WIDTH*2-1)*i+BN254_SPONGE_WIDTH+k-1])
state[k] = c.api.Add(state[k], mul)
state[k] = c.api.MulAcc(state[k], state[0], sConstants[(BN254_SPONGE_WIDTH*2-1)*i+BN254_SPONGE_WIDTH+k-1])
}
state[0] = newState0
}
@ -186,7 +185,6 @@ func (c *BN254Chip) exp5state(state BN254State) BN254State {
}
func (c *BN254Chip) mix(state_ BN254State, constantMatrix [][]*big.Int) BN254State {
var mul frontend.Variable
var result BN254State
for i := 0; i < BN254_SPONGE_WIDTH; i++ {
@ -195,8 +193,7 @@ func (c *BN254Chip) mix(state_ BN254State, constantMatrix [][]*big.Int) BN254Sta
for i := 0; i < BN254_SPONGE_WIDTH; i++ {
for j := 0; j < BN254_SPONGE_WIDTH; j++ {
mul = c.api.Mul(constantMatrix[j][i], state_[j])
result[i] = c.api.Add(result[i], mul)
result[i] = c.api.MulAcc(result[i], constantMatrix[j][i], state_[j])
}
}

+ 40
- 2
types/types.go

@ -1,13 +1,17 @@
package types
import "github.com/succinctlabs/gnark-plonky2-verifier/plonk/gates"
import (
"github.com/succinctlabs/gnark-plonky2-verifier/plonk/gates"
)
type FriConfig struct {
RateBits uint64
CapHeight uint64
ProofOfWorkBits uint64
NumQueryRounds uint64
// TODO: add FriReductionStrategy
// Note that we do not need `reduction_strategy` of type FriReductionStrategy as the plonky2 FriConfig has.
// reduction_strategy is only used for computing `reduction_arity_bits`, which is serialized in the
// CommonCircuitData.
}
func (fc *FriConfig) Rate() float64 {
@ -21,6 +25,40 @@ type FriParams struct {
ReductionArityBits []uint64
}
func (p *FriParams) TotalArities() int {
res := 0
for _, b := range p.ReductionArityBits {
res += int(b)
}
return res
}
func (p *FriParams) MaxArityBits() int {
res := 0
for _, b := range p.ReductionArityBits {
if int(b) > res {
res = int(b)
}
}
return res
}
func (p *FriParams) LdeBits() int {
return int(p.DegreeBits + p.Config.RateBits)
}
func (p *FriParams) LdeSize() int {
return 1 << p.LdeBits()
}
func (p *FriParams) FinalPolyBits() int {
return int(p.DegreeBits) - p.TotalArities()
}
func (p *FriParams) FinalPolyLen() int {
return int(1 << p.FinalPolyBits())
}
type CircuitConfig struct {
NumWires uint64
NumRoutedWires uint64

Loading…
Cancel
Save