arnaucube
/
hermez-node
mirror of https://github.com/arnaucube/hermez-node.git


								package coordinator


								import (

									"context"

									"fmt"

									"os"

									"strings"

									"sync"

									"time"


									ethCommon "github.com/ethereum/go-ethereum/common"

									"github.com/hermeznetwork/hermez-node/batchbuilder"

									"github.com/hermeznetwork/hermez-node/common"

									"github.com/hermeznetwork/hermez-node/db/historydb"

									"github.com/hermeznetwork/hermez-node/db/l2db"

									"github.com/hermeznetwork/hermez-node/eth"

									"github.com/hermeznetwork/hermez-node/log"

									"github.com/hermeznetwork/hermez-node/prover"

									"github.com/hermeznetwork/hermez-node/synchronizer"

									"github.com/hermeznetwork/hermez-node/txprocessor"

									"github.com/hermeznetwork/hermez-node/txselector"

									"github.com/hermeznetwork/tracerr"

								)


								var (

									errLastL1BatchNotSynced = fmt.Errorf("last L1Batch not synced yet")

								)


								const (

									queueLen         = 16

									longWaitDuration = 999 * time.Hour

									zeroDuration     = 0 * time.Second

								)


								// Config contains the Coordinator configuration

								type Config struct {

									// ForgerAddress is the address under which this coordinator is forging

									ForgerAddress ethCommon.Address

									// ConfirmBlocks is the number of confirmation blocks to wait for sent

									// ethereum transactions before forgetting about them

									ConfirmBlocks int64

									// L1BatchTimeoutPerc is the portion of the range before the L1Batch

									// timeout that will trigger a schedule to forge an L1Batch

									L1BatchTimeoutPerc float64

									// StartSlotBlocksDelay is the number of blocks of delay to wait before

									// starting the pipeline when we reach a slot in which we can forge.

									StartSlotBlocksDelay int64

									// ScheduleBatchBlocksAheadCheck is the number of blocks ahead in which

									// the forger address is checked to be allowed to forge (appart from

									// checking the next block), used to decide when to stop scheduling new

									// batches (by stopping the pipeline).

									// For example, if we are at block 10 and ScheduleBatchBlocksAheadCheck

									// is 5, eventhough at block 11 we canForge, the pipeline will be

									// stopped if we can't forge at block 15.

									// This value should be the expected number of blocks it takes between

									// scheduling a batch and having it mined.

									ScheduleBatchBlocksAheadCheck int64

									// SendBatchBlocksMarginCheck is the number of margin blocks ahead in

									// which the coordinator is also checked to be allowed to forge, appart

									// from the next block; used to decide when to stop sending batches to

									// the smart contract.

									// For example, if we are at block 10 and SendBatchBlocksMarginCheck is

									// 5, eventhough at block 11 we canForge, the batch will be discarded

									// if we can't forge at block 15.

									// This value should be the expected number of blocks it takes between

									// sending a batch and having it mined.

									SendBatchBlocksMarginCheck int64

									// EthClientAttempts is the number of attempts to do an eth client RPC

									// call before giving up

									EthClientAttempts int

									// ForgeRetryInterval is the waiting interval between calls forge a

									// batch after an error

									ForgeRetryInterval time.Duration

									// SyncRetryInterval is the waiting interval between calls to the main

									// handler of a synced block after an error

									SyncRetryInterval time.Duration

									// EthClientAttemptsDelay is delay between attempts do do an eth client

									// RPC call

									EthClientAttemptsDelay time.Duration

									// EthTxResendTimeout is the timeout after which a non-mined ethereum

									// transaction will be resent (reusing the nonce) with a newly

									// calculated gas price

									EthTxResendTimeout time.Duration

									// TxManagerCheckInterval is the waiting interval between receipt

									// checks of ethereum transactions in the TxManager

									TxManagerCheckInterval time.Duration

									// DebugBatchPath if set, specifies the path where batchInfo is stored

									// in JSON in every step/update of the pipeline

									DebugBatchPath string

									Purger         PurgerCfg

									// VerifierIdx is the index of the verifier contract registered in the

									// smart contract

									VerifierIdx       uint8

									TxProcessorConfig txprocessor.Config

								}


								func (c *Config) debugBatchStore(batchInfo *BatchInfo) {

									if c.DebugBatchPath != "" {

										if err := batchInfo.DebugStore(c.DebugBatchPath); err != nil {

											log.Warnw("Error storing debug BatchInfo",

												"path", c.DebugBatchPath, "err", err)

										}

									}

								}


								// Coordinator implements the Coordinator type

								type Coordinator struct {

									// State

									pipelineBatchNum common.BatchNum // batchNum from which we started the pipeline

									provers          []prover.Client

									consts           synchronizer.SCConsts

									vars             synchronizer.SCVariables

									stats            synchronizer.Stats

									started          bool


									cfg Config


									historyDB    *historydb.HistoryDB

									l2DB         *l2db.L2DB

									txSelector   *txselector.TxSelector

									batchBuilder *batchbuilder.BatchBuilder


									msgCh  chan interface{}

									ctx    context.Context

									wg     sync.WaitGroup

									cancel context.CancelFunc


									pipeline *Pipeline


									purger    *Purger

									txManager *TxManager

								}


								// NewCoordinator creates a new Coordinator

								func NewCoordinator(cfg Config,

									historyDB *historydb.HistoryDB,

									l2DB *l2db.L2DB,

									txSelector *txselector.TxSelector,

									batchBuilder *batchbuilder.BatchBuilder,

									serverProofs []prover.Client,

									ethClient eth.ClientInterface,

									scConsts *synchronizer.SCConsts,

									initSCVars *synchronizer.SCVariables,

								) (*Coordinator, error) {

									// nolint reason: hardcoded `1.0`, by design the percentage can't be over 100%

									if cfg.L1BatchTimeoutPerc >= 1.0 { //nolint:gomnd

										return nil, tracerr.Wrap(fmt.Errorf("invalid value for Config.L1BatchTimeoutPerc (%v >= 1.0)",

											cfg.L1BatchTimeoutPerc))

									}

									if cfg.EthClientAttempts < 1 {

										return nil, tracerr.Wrap(fmt.Errorf("invalid value for Config.EthClientAttempts (%v < 1)",

											cfg.EthClientAttempts))

									}


									if cfg.DebugBatchPath != "" {

										if err := os.MkdirAll(cfg.DebugBatchPath, 0744); err != nil {

											return nil, tracerr.Wrap(err)

										}

									}


									purger := Purger{

										cfg:                 cfg.Purger,

										lastPurgeBlock:      0,

										lastPurgeBatch:      0,

										lastInvalidateBlock: 0,

										lastInvalidateBatch: 0,

									}


									ctx, cancel := context.WithCancel(context.Background())

									c := Coordinator{

										pipelineBatchNum: -1,

										provers:          serverProofs,

										consts:           *scConsts,

										vars:             *initSCVars,


										cfg: cfg,


										historyDB:    historyDB,

										l2DB:         l2DB,

										txSelector:   txSelector,

										batchBuilder: batchBuilder,


										purger: &purger,


										msgCh: make(chan interface{}),

										ctx:   ctx,

										// wg

										cancel: cancel,

									}

									ctxTimeout, ctxTimeoutCancel := context.WithTimeout(ctx, 1*time.Second)

									defer ctxTimeoutCancel()

									txManager, err := NewTxManager(ctxTimeout, &cfg, ethClient, l2DB, &c,

										scConsts, initSCVars)

									if err != nil {

										return nil, tracerr.Wrap(err)

									}

									c.txManager = txManager

									// Set Eth LastBlockNum to -1 in stats so that stats.Synced() is

									// guaranteed to return false before it's updated with a real stats

									c.stats.Eth.LastBlock.Num = -1

									return &c, nil

								}


								// TxSelector returns the inner TxSelector

								func (c *Coordinator) TxSelector() *txselector.TxSelector {

									return c.txSelector

								}


								// BatchBuilder returns the inner BatchBuilder

								func (c *Coordinator) BatchBuilder() *batchbuilder.BatchBuilder {

									return c.batchBuilder

								}


								func (c *Coordinator) newPipeline(ctx context.Context) (*Pipeline, error) {

									return NewPipeline(ctx, c.cfg, c.historyDB, c.l2DB, c.txSelector,

										c.batchBuilder, c.purger, c.txManager, c.provers, &c.consts)

								}


								// MsgSyncBlock indicates an update to the Synchronizer stats

								type MsgSyncBlock struct {

									Stats   synchronizer.Stats

									Batches []common.BatchData

									// Vars contains each Smart Contract variables if they are updated, or

									// nil if they haven't changed.

									Vars synchronizer.SCVariablesPtr

								}


								// MsgSyncReorg indicates a reorg

								type MsgSyncReorg struct {

									Stats synchronizer.Stats

									Vars  synchronizer.SCVariablesPtr

								}


								// MsgStopPipeline indicates a signal to reset the pipeline

								type MsgStopPipeline struct {

									Reason string

								}


								// SendMsg is a thread safe method to pass a message to the Coordinator

								func (c *Coordinator) SendMsg(ctx context.Context, msg interface{}) {

									select {

									case c.msgCh <- msg:

									case <-ctx.Done():

									}

								}


								func updateSCVars(vars *synchronizer.SCVariables, update synchronizer.SCVariablesPtr) {

									if update.Rollup != nil {

										vars.Rollup = *update.Rollup

									}

									if update.Auction != nil {

										vars.Auction = *update.Auction

									}

									if update.WDelayer != nil {

										vars.WDelayer = *update.WDelayer

									}

								}


								func (c *Coordinator) syncSCVars(vars synchronizer.SCVariablesPtr) {

									updateSCVars(&c.vars, vars)

								}


								func canForge(auctionConstants *common.AuctionConstants, auctionVars *common.AuctionVariables,

									currentSlot *common.Slot, nextSlot *common.Slot, addr ethCommon.Address, blockNum int64) bool {

									var slot *common.Slot

									if currentSlot.StartBlock <= blockNum && blockNum <= currentSlot.EndBlock {

										slot = currentSlot

									} else if nextSlot.StartBlock <= blockNum && blockNum <= nextSlot.EndBlock {

										slot = nextSlot

									} else {

										log.Warnw("Coordinator: requested blockNum for canForge is outside slot",

											"blockNum", blockNum, "currentSlot", currentSlot,

											"nextSlot", nextSlot,

										)

										return false

									}

									anyoneForge := false

									if !slot.ForgerCommitment &&

										auctionConstants.RelativeBlock(blockNum) >= int64(auctionVars.SlotDeadline) {

										log.Debugw("Coordinator: anyone can forge in the current slot (slotDeadline passed)",

											"block", blockNum)

										anyoneForge = true

									}

									if slot.Forger == addr || anyoneForge {

										return true

									}

									return false

								}


								func (c *Coordinator) canForgeAt(blockNum int64) bool {

									return canForge(&c.consts.Auction, &c.vars.Auction,

										&c.stats.Sync.Auction.CurrentSlot, &c.stats.Sync.Auction.NextSlot,

										c.cfg.ForgerAddress, blockNum)

								}


								func (c *Coordinator) canForge() bool {

									blockNum := c.stats.Eth.LastBlock.Num + 1

									return canForge(&c.consts.Auction, &c.vars.Auction,

										&c.stats.Sync.Auction.CurrentSlot, &c.stats.Sync.Auction.NextSlot,

										c.cfg.ForgerAddress, blockNum)

								}


								func (c *Coordinator) syncStats(ctx context.Context, stats *synchronizer.Stats) error {

									nextBlock := c.stats.Eth.LastBlock.Num + 1

									canForge := c.canForgeAt(nextBlock)

									if c.cfg.ScheduleBatchBlocksAheadCheck != 0 && canForge {

										canForge = c.canForgeAt(nextBlock + c.cfg.ScheduleBatchBlocksAheadCheck)

									}

									if c.pipeline == nil {

										relativeBlock := c.consts.Auction.RelativeBlock(nextBlock)

										if canForge && relativeBlock < c.cfg.StartSlotBlocksDelay {

											log.Debugw("Coordinator: delaying pipeline start due to "+

												"relativeBlock (%v) < cfg.StartSlotBlocksDelay (%v)",

												relativeBlock, c.cfg.StartSlotBlocksDelay)

										} else if canForge {

											log.Infow("Coordinator: forging state begin", "block",

												stats.Eth.LastBlock.Num+1, "batch", stats.Sync.LastBatch)

											batchNum := common.BatchNum(stats.Sync.LastBatch)

											var err error

											if c.pipeline, err = c.newPipeline(ctx); err != nil {

												return tracerr.Wrap(err)

											}

											if err := c.pipeline.Start(batchNum, stats, &c.vars); err != nil {

												c.pipeline = nil

												return tracerr.Wrap(err)

											}

											c.pipelineBatchNum = batchNum

										}

									} else {

										if !canForge {

											log.Infow("Coordinator: forging state end", "block", stats.Eth.LastBlock.Num+1)

											c.pipeline.Stop(c.ctx)

											c.pipeline = nil

										}

									}

									if c.pipeline == nil {

										// Mark invalid in Pool due to forged L2Txs

										// for _, batch := range batches {

										// 	if err := c.l2DB.InvalidateOldNonces(

										// 		idxsNonceFromL2Txs(batch.L2Txs), batch.Batch.BatchNum); err != nil {

										// 		return err

										// 	}

										// }

										if c.purger.CanInvalidate(stats.Sync.LastBlock.Num, stats.Sync.LastBatch) {

											if err := c.txSelector.Reset(common.BatchNum(stats.Sync.LastBatch)); err != nil {

												return tracerr.Wrap(err)

											}

										}

										_, err := c.purger.InvalidateMaybe(c.l2DB, c.txSelector.LocalAccountsDB(),

											stats.Sync.LastBlock.Num, stats.Sync.LastBatch)

										if err != nil {

											return tracerr.Wrap(err)

										}

										_, err = c.purger.PurgeMaybe(c.l2DB, stats.Sync.LastBlock.Num, stats.Sync.LastBatch)

										if err != nil {

											return tracerr.Wrap(err)

										}

									}

									return nil

								}


								func (c *Coordinator) handleMsgSyncBlock(ctx context.Context, msg *MsgSyncBlock) error {

									c.stats = msg.Stats

									c.syncSCVars(msg.Vars)

									c.txManager.SetSyncStatsVars(ctx, &msg.Stats, &msg.Vars)

									if c.pipeline != nil {

										c.pipeline.SetSyncStatsVars(ctx, &msg.Stats, &msg.Vars)

									}

									if !c.stats.Synced() {

										return nil

									}

									return c.syncStats(ctx, &c.stats)

								}


								func (c *Coordinator) handleReorg(ctx context.Context, msg *MsgSyncReorg) error {

									c.stats = msg.Stats

									c.syncSCVars(msg.Vars)

									c.txManager.SetSyncStatsVars(ctx, &msg.Stats, &msg.Vars)

									if c.pipeline != nil {

										c.pipeline.SetSyncStatsVars(ctx, &msg.Stats, &msg.Vars)

									}

									if common.BatchNum(c.stats.Sync.LastBatch) < c.pipelineBatchNum {

										// There's been a reorg and the batch from which the pipeline

										// was started was in a block that was discarded.  The batch

										// may not be in the main chain, so we stop the pipeline as a

										// precaution (it will be started again once the node is in

										// sync).

										log.Infow("Coordinator.handleReorg StopPipeline sync.LastBatch < c.pipelineBatchNum",

											"sync.LastBatch", c.stats.Sync.LastBatch,

											"c.pipelineBatchNum", c.pipelineBatchNum)

										if err := c.handleStopPipeline(ctx, "reorg"); err != nil {

											return tracerr.Wrap(err)

										}

									}

									return nil

								}


								func (c *Coordinator) handleStopPipeline(ctx context.Context, reason string) error {

									if err := c.l2DB.Reorg(common.BatchNum(c.stats.Sync.LastBatch)); err != nil {

										return tracerr.Wrap(err)

									}

									if c.pipeline != nil {

										c.pipeline.Stop(c.ctx)

										c.pipeline = nil

									}

									if strings.Contains(reason, common.AuctionErrMsgCannotForge) { //nolint:staticcheck

										// TODO: Check that we are in a slot in which we can't forge

									}

									return nil

								}


								func (c *Coordinator) handleMsg(ctx context.Context, msg interface{}) error {

									switch msg := msg.(type) {

									case MsgSyncBlock:

										if err := c.handleMsgSyncBlock(ctx, &msg); err != nil {

											return tracerr.Wrap(fmt.Errorf("Coordinator.handleMsgSyncBlock error: %w", err))

										}

									case MsgSyncReorg:

										if err := c.handleReorg(ctx, &msg); err != nil {

											return tracerr.Wrap(fmt.Errorf("Coordinator.handleReorg error: %w", err))

										}

									case MsgStopPipeline:

										log.Infow("Coordinator received MsgStopPipeline", "reason", msg.Reason)

										if err := c.handleStopPipeline(ctx, msg.Reason); err != nil {

											return tracerr.Wrap(fmt.Errorf("Coordinator.handleStopPipeline: %w", err))

										}

									default:

										log.Fatalw("Coordinator Unexpected Coordinator msg of type %T: %+v", msg, msg)

									}

									return nil

								}


								// Start the coordinator

								func (c *Coordinator) Start() {

									if c.started {

										log.Fatal("Coordinator already started")

									}

									c.started = true

									c.wg.Add(1)

									go func() {

										c.txManager.Run(c.ctx)

										c.wg.Done()

									}()


									c.wg.Add(1)

									go func() {

										waitDuration := longWaitDuration

										for {

											select {

											case <-c.ctx.Done():

												log.Info("Coordinator done")

												c.wg.Done()

												return

											case msg := <-c.msgCh:

												if err := c.handleMsg(c.ctx, msg); c.ctx.Err() != nil {

													continue

												} else if err != nil {

													log.Errorw("Coordinator.handleMsg", "err", err)

													waitDuration = c.cfg.SyncRetryInterval

													continue

												}

												waitDuration = longWaitDuration

											case <-time.After(waitDuration):

												if !c.stats.Synced() {

													waitDuration = longWaitDuration

													continue

												}

												if err := c.syncStats(c.ctx, &c.stats); c.ctx.Err() != nil {

													continue

												} else if err != nil {

													log.Errorw("Coordinator.syncStats", "err", err)

													waitDuration = c.cfg.SyncRetryInterval

													continue

												}

												waitDuration = longWaitDuration

											}

										}

									}()

								}


								const stopCtxTimeout = 200 * time.Millisecond


								// Stop the coordinator

								func (c *Coordinator) Stop() {

									if !c.started {

										log.Fatal("Coordinator already stopped")

									}

									c.started = false

									log.Infow("Stopping Coordinator...")

									c.cancel()

									c.wg.Wait()

									if c.pipeline != nil {

										ctx, cancel := context.WithTimeout(context.Background(), stopCtxTimeout)

										defer cancel()

										c.pipeline.Stop(ctx)

										c.pipeline = nil

									}

								}