Update coordinator to work better under real net

- cli / node - Update handler of SIGINT so that after 3 SIGINTs, the process terminates unconditionally - coordinator - Store stats without pointer - In all functions that send a variable via channel, check for context done to avoid deadlock (due to no process reading from the channel, which has no queue) when the node is stopped. - Abstract `canForge` so that it can be used outside of the `Coordinator` - In `canForge` check the blockNumber in current and next slot. - Update tests due to smart contract changes in slot handling, and minimum bid defaults - TxManager - Add consts, vars and stats to allow evaluating `canForge` - Add `canForge` method (not used yet) - Store batch and nonces status (last success and last pending) - Track nonces internally instead of relying on the ethereum node (this is required to work with ganache when there are pending txs) - Handle the (common) case of the receipt not being found after the tx is sent. - Don't start the main loop until we get an initial messae fo the stats and vars (so that in the loop the stats and vars are set to synchronizer values) - When a tx fails, check and discard all the failed transactions before sending the message to stop the pipeline. This will avoid sending consecutive messages of stop the pipeline when multiple txs are detected to be failed consecutively. Also, future txs of the same pipeline after a discarded txs are discarded, and their nonces reused. - Robust handling of nonces: - If geth returns nonce is too low, increase it - If geth returns nonce too hight, decrease it - If geth returns underpriced, increase gas price - If geth returns replace underpriced, increase gas price - Add support for resending transactions after a timeout - Store `BatchInfos` in a queue - Pipeline - When an error is found, stop forging batches and send a message to the coordinator to stop the pipeline with information of the failed batch number so that in a restart, non-failed batches are not repated. - When doing a reset of the stateDB, if possible reset from the local checkpoint instead of resetting from the synchronizer. This allows resetting from a batch that is valid but not yet sent / synced. - Every time a pipeline is started, assign it a number from a counter. This allows the TxManager to ignore batches from stopped pipelines, via a message sent by the coordinator. - Avoid forging when we haven't reached the rollup genesis block number. - Add config parameter `StartSlotBlocksDelay`: StartSlotBlocksDelay is the number of blocks of delay to wait before starting the pipeline when we reach a slot in which we can forge. - When detecting a reorg, only reset the pipeline if the batch from which the pipeline started changed and wasn't sent by us. - Add config parameter `ScheduleBatchBlocksAheadCheck`: ScheduleBatchBlocksAheadCheck is the number of blocks ahead in which the forger address is checked to be allowed to forge (apart from checking the next block), used to decide when to stop scheduling new batches (by stopping the pipeline). For example, if we are at block 10 and ScheduleBatchBlocksAheadCheck is 5, eventhough at block 11 we canForge, the pipeline will be stopped if we can't forge at block 15. This value should be the expected number of blocks it takes between scheduling a batch and having it mined. - Add config parameter `SendBatchBlocksMarginCheck`: SendBatchBlocksMarginCheck is the number of margin blocks ahead in which the coordinator is also checked to be allowed to forge, apart from the next block; used to decide when to stop sending batches to the smart contract. For example, if we are at block 10 and SendBatchBlocksMarginCheck is 5, eventhough at block 11 we canForge, the batch will be discarded if we can't forge at block 15. - Add config parameter `TxResendTimeout`: TxResendTimeout is the timeout after which a non-mined ethereum transaction will be resent (reusing the nonce) with a newly calculated gas price - Add config parameter `MaxGasPrice`: MaxGasPrice is the maximum gas price allowed for ethereum transactions - Add config parameter `NoReuseNonce`: NoReuseNonce disables reusing nonces of pending transactions for new replacement transactions. This is useful for testing with Ganache. - Extend BatchInfo with more useful information for debugging - eth / ethereum client - Add necessary methods to create the auth object for transactions manually so that we can set the nonce, gas price, gas limit, etc manually - Update `RollupForgeBatch` to take an auth object as input (so that the coordinator can set parameters manually) - synchronizer - In stats, add `NextSlot` - In stats, store full last batch instead of just last batch number - Instead of calculating a nextSlot from scratch every time, update the current struct (only updating the forger info if we are Synced) - Afer every processed batch, check that the calculated StateDB MTRoot matches the StateRoot found in the forgeBatch event.
2026-02-06 19:06:42 +01:00 · 2021-02-16 14:22:51 +01:00
parent 26fbeb5c68
commit f0e79f3d55
22 changed files with 935 additions and 285 deletions
--- a/coordinator/batch.go
+++ b/coordinator/batch.go
@@ -47,6 +47,8 @@ type Debug struct {
 	MineBlockNum int64
 	// SendBlockNum is the blockNum when the batch was sent to ethereum
 	SendBlockNum int64
+	// ResendNum is the number of times the tx has been resent
+	ResendNum int
 	// LastScheduledL1BatchBlockNum is the blockNum when the last L1Batch
 	// was scheduled
 	LastScheduledL1BatchBlockNum int64
@@ -64,10 +66,17 @@ type Debug struct {
 	// StartToSendDelay is the delay between starting a batch and sending
 	// it to ethereum, in seconds
 	StartToSendDelay float64
+	// StartToMineDelay is the delay between starting a batch and having
+	// it mined in seconds
+	StartToMineDelay float64
+	// SendToMineDelay is the delay between sending a batch tx and having
+	// it mined in seconds
+	SendToMineDelay float64
 }

 // BatchInfo contans the Batch information
 type BatchInfo struct {
+	PipelineNum           int
 	BatchNum              common.BatchNum
 	ServerProof           prover.Client
 	ZKInputs              *common.ZKInputs
@@ -82,9 +91,16 @@ type BatchInfo struct {
 	CoordIdxs             []common.Idx
 	ForgeBatchArgs        *eth.RollupForgeBatchArgs
 	// FeesInfo
-	EthTx   *types.Transaction
-	Receipt *types.Receipt
-	Debug   Debug
+	EthTx    *types.Transaction
+	EthTxErr error
+	// SendTimestamp  the time of batch sent to ethereum
+	SendTimestamp time.Time
+	Receipt       *types.Receipt
+	// Fail is true if:
+	// - The receipt status is failed
+	// - A previous parent batch is failed
+	Fail  bool
+	Debug Debug
 }

 // DebugStore is a debug function to store the BatchInfo as a json text file in
--- a/coordinator/coordinator.go
+++ b/coordinator/coordinator.go
@@ -3,8 +3,8 @@ package coordinator
 import (
 	"context"
 	"fmt"
+	"math/big"
 	"os"
-	"strings"
 	"sync"
 	"time"

@@ -42,6 +42,29 @@ type Config struct {
 	// L1BatchTimeoutPerc is the portion of the range before the L1Batch
 	// timeout that will trigger a schedule to forge an L1Batch
 	L1BatchTimeoutPerc float64
+	// StartSlotBlocksDelay is the number of blocks of delay to wait before
+	// starting the pipeline when we reach a slot in which we can forge.
+	StartSlotBlocksDelay int64
+	// ScheduleBatchBlocksAheadCheck is the number of blocks ahead in which
+	// the forger address is checked to be allowed to forge (apart from
+	// checking the next block), used to decide when to stop scheduling new
+	// batches (by stopping the pipeline).
+	// For example, if we are at block 10 and ScheduleBatchBlocksAheadCheck
+	// is 5, eventhough at block 11 we canForge, the pipeline will be
+	// stopped if we can't forge at block 15.
+	// This value should be the expected number of blocks it takes between
+	// scheduling a batch and having it mined.
+	ScheduleBatchBlocksAheadCheck int64
+	// SendBatchBlocksMarginCheck is the number of margin blocks ahead in
+	// which the coordinator is also checked to be allowed to forge, apart
+	// from the next block; used to decide when to stop sending batches to
+	// the smart contract.
+	// For example, if we are at block 10 and SendBatchBlocksMarginCheck is
+	// 5, eventhough at block 11 we canForge, the batch will be discarded
+	// if we can't forge at block 15.
+	// This value should be the expected number of blocks it takes between
+	// sending a batch and having it mined.
+	SendBatchBlocksMarginCheck int64
 	// EthClientAttempts is the number of attempts to do an eth client RPC
 	// call before giving up
 	EthClientAttempts int
@@ -54,13 +77,25 @@ type Config struct {
 	// EthClientAttemptsDelay is delay between attempts do do an eth client
 	// RPC call
 	EthClientAttemptsDelay time.Duration
+	// EthTxResendTimeout is the timeout after which a non-mined ethereum
+	// transaction will be resent (reusing the nonce) with a newly
+	// calculated gas price
+	EthTxResendTimeout time.Duration
+	// EthNoReuseNonce disables reusing nonces of pending transactions for
+	// new replacement transactions
+	EthNoReuseNonce bool
+	// MaxGasPrice is the maximum gas price allowed for ethereum
+	// transactions
+	MaxGasPrice *big.Int
 	// TxManagerCheckInterval is the waiting interval between receipt
 	// checks of ethereum transactions in the TxManager
 	TxManagerCheckInterval time.Duration
 	// DebugBatchPath if set, specifies the path where batchInfo is stored
 	// in JSON in every step/update of the pipeline
-	DebugBatchPath    string
-	Purger            PurgerCfg
+	DebugBatchPath string
+	Purger         PurgerCfg
+	// VerifierIdx is the index of the verifier contract registered in the
+	// smart contract
 	VerifierIdx       uint8
 	TxProcessorConfig txprocessor.Config
 }
@@ -74,15 +109,22 @@ func (c *Config) debugBatchStore(batchInfo *BatchInfo) {
 	}
 }

+type fromBatch struct {
+	BatchNum   common.BatchNum
+	ForgerAddr ethCommon.Address
+	StateRoot  *big.Int
+}
+
 // Coordinator implements the Coordinator type
 type Coordinator struct {
 	// State
-	pipelineBatchNum common.BatchNum // batchNum from which we started the pipeline
-	provers          []prover.Client
-	consts           synchronizer.SCConsts
-	vars             synchronizer.SCVariables
-	stats            synchronizer.Stats
-	started          bool
+	pipelineNum       int       // Pipeline sequential number.  The first pipeline is 1
+	pipelineFromBatch fromBatch // batch from which we started the pipeline
+	provers           []prover.Client
+	consts            synchronizer.SCConsts
+	vars              synchronizer.SCVariables
+	stats             synchronizer.Stats
+	started           bool

 	cfg Config

@@ -96,7 +138,8 @@ type Coordinator struct {
 	wg     sync.WaitGroup
 	cancel context.CancelFunc

-	pipeline *Pipeline
+	pipeline              *Pipeline
+	lastNonFailedBatchNum common.BatchNum

 	purger    *Purger
 	txManager *TxManager
@@ -139,10 +182,15 @@ func NewCoordinator(cfg Config,

 	ctx, cancel := context.WithCancel(context.Background())
 	c := Coordinator{
-		pipelineBatchNum: -1,
-		provers:          serverProofs,
-		consts:           *scConsts,
-		vars:             *initSCVars,
+		pipelineNum: 0,
+		pipelineFromBatch: fromBatch{
+			BatchNum:   0,
+			ForgerAddr: ethCommon.Address{},
+			StateRoot:  big.NewInt(0),
+		},
+		provers: serverProofs,
+		consts:  *scConsts,
+		vars:    *initSCVars,

 		cfg: cfg,

@@ -183,8 +231,9 @@ func (c *Coordinator) BatchBuilder() *batchbuilder.BatchBuilder {
 }

 func (c *Coordinator) newPipeline(ctx context.Context) (*Pipeline, error) {
-	return NewPipeline(ctx, c.cfg, c.historyDB, c.l2DB, c.txSelector,
-		c.batchBuilder, c.purger, c.txManager, c.provers, &c.consts)
+	c.pipelineNum++
+	return NewPipeline(ctx, c.cfg, c.pipelineNum, c.historyDB, c.l2DB, c.txSelector,
+		c.batchBuilder, c.purger, c, c.txManager, c.provers, &c.consts)
 }

 // MsgSyncBlock indicates an update to the Synchronizer stats
@@ -205,6 +254,9 @@ type MsgSyncReorg struct {
 // MsgStopPipeline indicates a signal to reset the pipeline
 type MsgStopPipeline struct {
 	Reason string
+	// FailedBatchNum indicates the first batchNum that failed in the
+	// pipeline.  If FailedBatchNum is 0, it should be ignored.
+	FailedBatchNum common.BatchNum
 }

 // SendMsg is a thread safe method to pass a message to the Coordinator
@@ -215,27 +267,36 @@ func (c *Coordinator) SendMsg(ctx context.Context, msg interface{}) {
 	}
 }

+func updateSCVars(vars *synchronizer.SCVariables, update synchronizer.SCVariablesPtr) {
+	if update.Rollup != nil {
+		vars.Rollup = *update.Rollup
+	}
+	if update.Auction != nil {
+		vars.Auction = *update.Auction
+	}
+	if update.WDelayer != nil {
+		vars.WDelayer = *update.WDelayer
+	}
+}
+
 func (c *Coordinator) syncSCVars(vars synchronizer.SCVariablesPtr) {
-	if vars.Rollup != nil {
-		c.vars.Rollup = *vars.Rollup
-	}
-	if vars.Auction != nil {
-		c.vars.Auction = *vars.Auction
-	}
-	if vars.WDelayer != nil {
-		c.vars.WDelayer = *vars.WDelayer
-	}
+	updateSCVars(&c.vars, vars)
 }

 func canForge(auctionConstants *common.AuctionConstants, auctionVars *common.AuctionVariables,
 	currentSlot *common.Slot, nextSlot *common.Slot, addr ethCommon.Address, blockNum int64) bool {
+	if blockNum < auctionConstants.GenesisBlockNum {
+		log.Infow("canForge: requested blockNum is < genesis", "blockNum", blockNum,
+			"genesis", auctionConstants.GenesisBlockNum)
+		return false
+	}
 	var slot *common.Slot
 	if currentSlot.StartBlock <= blockNum && blockNum <= currentSlot.EndBlock {
 		slot = currentSlot
 	} else if nextSlot.StartBlock <= blockNum && blockNum <= nextSlot.EndBlock {
 		slot = nextSlot
 	} else {
-		log.Warnw("Coordinator: requested blockNum for canForge is outside slot",
+		log.Warnw("canForge: requested blockNum is outside current and next slot",
 			"blockNum", blockNum, "currentSlot", currentSlot,
 			"nextSlot", nextSlot,
 		)
@@ -244,16 +305,23 @@ func canForge(auctionConstants *common.AuctionConstants, auctionVars *common.Auc
 	anyoneForge := false
 	if !slot.ForgerCommitment &&
 		auctionConstants.RelativeBlock(blockNum) >= int64(auctionVars.SlotDeadline) {
-		log.Debugw("Coordinator: anyone can forge in the current slot (slotDeadline passed)",
+		log.Debugw("canForge: anyone can forge in the current slot (slotDeadline passed)",
 			"block", blockNum)
 		anyoneForge = true
 	}
 	if slot.Forger == addr || anyoneForge {
 		return true
 	}
+	log.Debugw("canForge: can't forge", "slot.Forger", slot.Forger)
 	return false
 }

+func (c *Coordinator) canForgeAt(blockNum int64) bool {
+	return canForge(&c.consts.Auction, &c.vars.Auction,
+		&c.stats.Sync.Auction.CurrentSlot, &c.stats.Sync.Auction.NextSlot,
+		c.cfg.ForgerAddress, blockNum)
+}
+
 func (c *Coordinator) canForge() bool {
 	blockNum := c.stats.Eth.LastBlock.Num + 1
 	return canForge(&c.consts.Auction, &c.vars.Auction,
@@ -262,12 +330,24 @@ func (c *Coordinator) canForge() bool {
 }

 func (c *Coordinator) syncStats(ctx context.Context, stats *synchronizer.Stats) error {
-	canForge := c.canForge()
+	nextBlock := c.stats.Eth.LastBlock.Num + 1
+	canForge := c.canForgeAt(nextBlock)
+	if c.cfg.ScheduleBatchBlocksAheadCheck != 0 && canForge {
+		canForge = c.canForgeAt(nextBlock + c.cfg.ScheduleBatchBlocksAheadCheck)
+	}
 	if c.pipeline == nil {
-		if canForge {
+		relativeBlock := c.consts.Auction.RelativeBlock(nextBlock)
+		if canForge && relativeBlock < c.cfg.StartSlotBlocksDelay {
+			log.Debugf("Coordinator: delaying pipeline start due to "+
+				"relativeBlock (%v) < cfg.StartSlotBlocksDelay (%v)",
+				relativeBlock, c.cfg.StartSlotBlocksDelay)
+		} else if canForge {
 			log.Infow("Coordinator: forging state begin", "block",
-				stats.Eth.LastBlock.Num+1, "batch", stats.Sync.LastBatch)
-			batchNum := common.BatchNum(stats.Sync.LastBatch)
+				stats.Eth.LastBlock.Num+1, "batch", stats.Sync.LastBatch.BatchNum)
+			batchNum := stats.Sync.LastBatch.BatchNum
+			if c.lastNonFailedBatchNum > batchNum {
+				batchNum = c.lastNonFailedBatchNum
+			}
 			var err error
 			if c.pipeline, err = c.newPipeline(ctx); err != nil {
 				return tracerr.Wrap(err)
@@ -276,7 +356,6 @@ func (c *Coordinator) syncStats(ctx context.Context, stats *synchronizer.Stats)
 				c.pipeline = nil
 				return tracerr.Wrap(err)
 			}
-			c.pipelineBatchNum = batchNum
 		}
 	} else {
 		if !canForge {
@@ -286,25 +365,12 @@ func (c *Coordinator) syncStats(ctx context.Context, stats *synchronizer.Stats)
 		}
 	}
 	if c.pipeline == nil {
-		// Mark invalid in Pool due to forged L2Txs
-		// for _, batch := range batches {
-		// 	if err := c.l2DB.InvalidateOldNonces(
-		// 		idxsNonceFromL2Txs(batch.L2Txs), batch.Batch.BatchNum); err != nil {
-		// 		return err
-		// 	}
-		// }
-		if c.purger.CanInvalidate(stats.Sync.LastBlock.Num, stats.Sync.LastBatch) {
-			if err := c.txSelector.Reset(common.BatchNum(stats.Sync.LastBatch)); err != nil {
-				return tracerr.Wrap(err)
-			}
-		}
-		_, err := c.purger.InvalidateMaybe(c.l2DB, c.txSelector.LocalAccountsDB(),
-			stats.Sync.LastBlock.Num, stats.Sync.LastBatch)
-		if err != nil {
+		if _, err := c.purger.InvalidateMaybe(c.l2DB, c.txSelector.LocalAccountsDB(),
+			stats.Sync.LastBlock.Num, int64(stats.Sync.LastBatch.BatchNum)); err != nil {
 			return tracerr.Wrap(err)
 		}
-		_, err = c.purger.PurgeMaybe(c.l2DB, stats.Sync.LastBlock.Num, stats.Sync.LastBatch)
-		if err != nil {
+		if _, err := c.purger.PurgeMaybe(c.l2DB, stats.Sync.LastBlock.Num,
+			int64(stats.Sync.LastBatch.BatchNum)); err != nil {
 			return tracerr.Wrap(err)
 		}
 	}
@@ -331,33 +397,42 @@ func (c *Coordinator) handleReorg(ctx context.Context, msg *MsgSyncReorg) error
 	if c.pipeline != nil {
 		c.pipeline.SetSyncStatsVars(ctx, &msg.Stats, &msg.Vars)
 	}
-	if common.BatchNum(c.stats.Sync.LastBatch) < c.pipelineBatchNum {
-		// There's been a reorg and the batch from which the pipeline
-		// was started was in a block that was discarded.  The batch
-		// may not be in the main chain, so we stop the pipeline as a
-		// precaution (it will be started again once the node is in
-		// sync).
-		log.Infow("Coordinator.handleReorg StopPipeline sync.LastBatch < c.pipelineBatchNum",
-			"sync.LastBatch", c.stats.Sync.LastBatch,
-			"c.pipelineBatchNum", c.pipelineBatchNum)
-		if err := c.handleStopPipeline(ctx, "reorg"); err != nil {
+	if c.stats.Sync.LastBatch.ForgerAddr != c.cfg.ForgerAddress &&
+		c.stats.Sync.LastBatch.StateRoot.Cmp(c.pipelineFromBatch.StateRoot) != 0 {
+		// There's been a reorg and the batch state root from which the
+		// pipeline was started has changed (probably because it was in
+		// a block that was discarded), and it was sent by a different
+		// coordinator than us.  That batch may never be in the main
+		// chain, so we stop the pipeline  (it will be started again
+		// once the node is in sync).
+		log.Infow("Coordinator.handleReorg StopPipeline sync.LastBatch.ForgerAddr != cfg.ForgerAddr "+
+			"& sync.LastBatch.StateRoot != pipelineFromBatch.StateRoot",
+			"sync.LastBatch.StateRoot", c.stats.Sync.LastBatch.StateRoot,
+			"pipelineFromBatch.StateRoot", c.pipelineFromBatch.StateRoot)
+		c.txManager.DiscardPipeline(ctx, c.pipelineNum)
+		if err := c.handleStopPipeline(ctx, "reorg", 0); err != nil {
 			return tracerr.Wrap(err)
 		}
 	}
 	return nil
 }

-func (c *Coordinator) handleStopPipeline(ctx context.Context, reason string) error {
+// handleStopPipeline handles stopping the pipeline.  If failedBatchNum is 0,
+// the next pipeline will start from the last state of the synchronizer,
+// otherwise, it will state from failedBatchNum-1.
+func (c *Coordinator) handleStopPipeline(ctx context.Context, reason string, failedBatchNum common.BatchNum) error {
+	batchNum := c.stats.Sync.LastBatch.BatchNum
+	if failedBatchNum != 0 {
+		batchNum = failedBatchNum - 1
+	}
 	if c.pipeline != nil {
 		c.pipeline.Stop(c.ctx)
 		c.pipeline = nil
 	}
-	if err := c.l2DB.Reorg(common.BatchNum(c.stats.Sync.LastBatch)); err != nil {
+	if err := c.l2DB.Reorg(batchNum); err != nil {
 		return tracerr.Wrap(err)
 	}
-	if strings.Contains(reason, common.AuctionErrMsgCannotForge) { //nolint:staticcheck
-		// TODO: Check that we are in a slot in which we can't forge
-	}
+	c.lastNonFailedBatchNum = batchNum
 	return nil
 }

@@ -373,7 +448,7 @@ func (c *Coordinator) handleMsg(ctx context.Context, msg interface{}) error {
 		}
 	case MsgStopPipeline:
 		log.Infow("Coordinator received MsgStopPipeline", "reason", msg.Reason)
-		if err := c.handleStopPipeline(ctx, msg.Reason); err != nil {
+		if err := c.handleStopPipeline(ctx, msg.Reason, msg.FailedBatchNum); err != nil {
 			return tracerr.Wrap(fmt.Errorf("Coordinator.handleStopPipeline: %w", err))
 		}
 	default:
--- a/coordinator/coordinator_test.go
+++ b/coordinator/coordinator_test.go
@@ -261,8 +261,8 @@ func TestCoordinatorFlow(t *testing.T) {
 			var stats synchronizer.Stats
 			stats.Eth.LastBlock = *ethClient.CtlLastBlock()
 			stats.Sync.LastBlock = stats.Eth.LastBlock
-			stats.Eth.LastBatch = ethClient.CtlLastForgedBatch()
-			stats.Sync.LastBatch = stats.Eth.LastBatch
+			stats.Eth.LastBatchNum = ethClient.CtlLastForgedBatch()
+			stats.Sync.LastBatch.BatchNum = common.BatchNum(stats.Eth.LastBatchNum)
 			canForge, err := ethClient.AuctionCanForge(forger, blockNum+1)
 			require.NoError(t, err)
 			var slot common.Slot
@@ -279,7 +279,7 @@ func TestCoordinatorFlow(t *testing.T) {
 			// Copy stateDB to synchronizer if there was a new batch
 			source := fmt.Sprintf("%v/BatchNum%v", batchBuilderDBPath, stats.Sync.LastBatch)
 			dest := fmt.Sprintf("%v/BatchNum%v", syncDBPath, stats.Sync.LastBatch)
-			if stats.Sync.LastBatch != 0 {
+			if stats.Sync.LastBatch.BatchNum != 0 {
 				if _, err := os.Stat(dest); os.IsNotExist(err) {
 					log.Infow("Making pebble checkpoint for sync",
 						"source", source, "dest", dest)
--- a/coordinator/pipeline.go
+++ b/coordinator/pipeline.go
@@ -2,6 +2,7 @@ package coordinator

 import (
 	"context"
+	"database/sql"
 	"fmt"
 	"math/big"
 	"sync"
@@ -24,19 +25,27 @@ type statsVars struct {
 	Vars  synchronizer.SCVariablesPtr
 }

+type state struct {
+	batchNum                     common.BatchNum
+	lastScheduledL1BatchBlockNum int64
+	lastForgeL1TxsNum            int64
+}
+
 // Pipeline manages the forging of batches with parallel server proofs
 type Pipeline struct {
+	num    int
 	cfg    Config
 	consts synchronizer.SCConsts

 	// state
-	batchNum                     common.BatchNum
-	lastScheduledL1BatchBlockNum int64
-	lastForgeL1TxsNum            int64
-	started                      bool
+	state         state
+	started       bool
+	rw            sync.RWMutex
+	errAtBatchNum common.BatchNum

 	proversPool  *ProversPool
 	provers      []prover.Client
+	coord        *Coordinator
 	txManager    *TxManager
 	historyDB    *historydb.HistoryDB
 	l2DB         *l2db.L2DB
@@ -53,14 +62,28 @@ type Pipeline struct {
 	cancel context.CancelFunc
 }

+func (p *Pipeline) setErrAtBatchNum(batchNum common.BatchNum) {
+	p.rw.Lock()
+	defer p.rw.Unlock()
+	p.errAtBatchNum = batchNum
+}
+
+func (p *Pipeline) getErrAtBatchNum() common.BatchNum {
+	p.rw.RLock()
+	defer p.rw.RUnlock()
+	return p.errAtBatchNum
+}
+
 // NewPipeline creates a new Pipeline
 func NewPipeline(ctx context.Context,
 	cfg Config,
+	num int, // Pipeline sequential number
 	historyDB *historydb.HistoryDB,
 	l2DB *l2db.L2DB,
 	txSelector *txselector.TxSelector,
 	batchBuilder *batchbuilder.BatchBuilder,
 	purger *Purger,
+	coord *Coordinator,
 	txManager *TxManager,
 	provers []prover.Client,
 	scConsts *synchronizer.SCConsts,
@@ -79,6 +102,7 @@ func NewPipeline(ctx context.Context,
 		return nil, tracerr.Wrap(fmt.Errorf("no provers in the pool"))
 	}
 	return &Pipeline{
+		num:          num,
 		cfg:          cfg,
 		historyDB:    historyDB,
 		l2DB:         l2DB,
@@ -87,6 +111,7 @@ func NewPipeline(ctx context.Context,
 		provers:      provers,
 		proversPool:  proversPool,
 		purger:       purger,
+		coord:        coord,
 		txManager:    txManager,
 		consts:       *scConsts,
 		statsVarsCh:  make(chan statsVars, queueLen),
@@ -104,33 +129,67 @@ func (p *Pipeline) SetSyncStatsVars(ctx context.Context, stats *synchronizer.Sta
 // reset pipeline state
 func (p *Pipeline) reset(batchNum common.BatchNum,
 	stats *synchronizer.Stats, vars *synchronizer.SCVariables) error {
-	p.batchNum = batchNum
-	p.lastForgeL1TxsNum = stats.Sync.LastForgeL1TxsNum
+	p.state = state{
+		batchNum:                     batchNum,
+		lastForgeL1TxsNum:            stats.Sync.LastForgeL1TxsNum,
+		lastScheduledL1BatchBlockNum: 0,
+	}
 	p.stats = *stats
 	p.vars = *vars
-	p.lastScheduledL1BatchBlockNum = 0

-	err := p.txSelector.Reset(p.batchNum)
+	// Reset the StateDB in TxSelector and BatchBuilder from the
+	// synchronizer only if the checkpoint we reset from either:
+	// a. Doesn't exist in the TxSelector/BatchBuilder
+	// b. The batch has already been synced by the synchronizer and has a
+	//    different MTRoot than the BatchBuilder
+	// Otherwise, reset from the local checkpoint.
+
+	// First attempt to reset from local checkpoint if such checkpoint exists
+	existsTxSelector, err := p.txSelector.LocalAccountsDB().CheckpointExists(p.state.batchNum)
 	if err != nil {
 		return tracerr.Wrap(err)
 	}
-	err = p.batchBuilder.Reset(p.batchNum, true)
+	fromSynchronizerTxSelector := !existsTxSelector
+	if err := p.txSelector.Reset(p.state.batchNum, fromSynchronizerTxSelector); err != nil {
+		return tracerr.Wrap(err)
+	}
+	existsBatchBuilder, err := p.batchBuilder.LocalStateDB().CheckpointExists(p.state.batchNum)
 	if err != nil {
 		return tracerr.Wrap(err)
 	}
+	fromSynchronizerBatchBuilder := !existsBatchBuilder
+	if err := p.batchBuilder.Reset(p.state.batchNum, fromSynchronizerBatchBuilder); err != nil {
+		return tracerr.Wrap(err)
+	}
+
+	// After reset, check that if the batch exists in the historyDB, the
+	// stateRoot matches with the local one, if not, force a reset from
+	// synchronizer
+	batch, err := p.historyDB.GetBatch(p.state.batchNum)
+	if tracerr.Unwrap(err) == sql.ErrNoRows {
+		// nothing to do
+	} else if err != nil {
+		return tracerr.Wrap(err)
+	} else {
+		localStateRoot := p.batchBuilder.LocalStateDB().MT.Root().BigInt()
+		if batch.StateRoot.Cmp(localStateRoot) != 0 {
+			log.Debugw("localStateRoot (%v) != historyDB stateRoot (%v).  "+
+				"Forcing reset from Synchronizer", localStateRoot, batch.StateRoot)
+			// StateRoot from synchronizer doesn't match StateRoot
+			// from batchBuilder, force a reset from synchronizer
+			if err := p.txSelector.Reset(p.state.batchNum, true); err != nil {
+				return tracerr.Wrap(err)
+			}
+			if err := p.batchBuilder.Reset(p.state.batchNum, true); err != nil {
+				return tracerr.Wrap(err)
+			}
+		}
+	}
 	return nil
 }

 func (p *Pipeline) syncSCVars(vars synchronizer.SCVariablesPtr) {
-	if vars.Rollup != nil {
-		p.vars.Rollup = *vars.Rollup
-	}
-	if vars.Auction != nil {
-		p.vars.Auction = *vars.Auction
-	}
-	if vars.WDelayer != nil {
-		p.vars.WDelayer = *vars.WDelayer
-	}
+	updateSCVars(&p.vars, vars)
 }

 // handleForgeBatch calls p.forgeBatch to forge the batch and get the zkInputs,
@@ -143,7 +202,7 @@ func (p *Pipeline) handleForgeBatch(ctx context.Context, batchNum common.BatchNu
 	} else if err != nil {
 		if tracerr.Unwrap(err) == errLastL1BatchNotSynced {
 			log.Warnw("forgeBatch: scheduled L1Batch too early", "err", err,
-				"lastForgeL1TxsNum", p.lastForgeL1TxsNum,
+				"lastForgeL1TxsNum", p.state.lastForgeL1TxsNum,
 				"syncLastForgeL1TxsNum", p.stats.Sync.LastForgeL1TxsNum)
 		} else {
 			log.Errorw("forgeBatch", "err", err)
@@ -199,15 +258,32 @@ func (p *Pipeline) Start(batchNum common.BatchNum,
 				p.stats = statsVars.Stats
 				p.syncSCVars(statsVars.Vars)
 			case <-time.After(waitDuration):
-				batchNum = p.batchNum + 1
+				// Once errAtBatchNum != 0, we stop forging
+				// batches because there's been an error and we
+				// wait for the pipeline to be stopped.
+				if p.getErrAtBatchNum() != 0 {
+					waitDuration = p.cfg.ForgeRetryInterval
+					continue
+				}
+				batchNum = p.state.batchNum + 1
 				batchInfo, err := p.handleForgeBatch(p.ctx, batchNum)
 				if p.ctx.Err() != nil {
 					continue
+				} else if tracerr.Unwrap(err) == errLastL1BatchNotSynced {
+					waitDuration = p.cfg.ForgeRetryInterval
+					continue
 				} else if err != nil {
-					waitDuration = p.cfg.SyncRetryInterval
+					p.setErrAtBatchNum(batchNum)
+					waitDuration = p.cfg.ForgeRetryInterval
+					p.coord.SendMsg(p.ctx, MsgStopPipeline{
+						Reason: fmt.Sprintf(
+							"Pipeline.handleForgBatch: %v", err),
+						FailedBatchNum: batchNum,
+					})
 					continue
 				}
-				p.batchNum = batchNum
+
+				p.state.batchNum = batchNum
 				select {
 				case batchChSentServerProof <- batchInfo:
 				case <-p.ctx.Done():
@@ -225,16 +301,28 @@ func (p *Pipeline) Start(batchNum common.BatchNum,
 				p.wg.Done()
 				return
 			case batchInfo := <-batchChSentServerProof:
+				// Once errAtBatchNum != 0, we stop forging
+				// batches because there's been an error and we
+				// wait for the pipeline to be stopped.
+				if p.getErrAtBatchNum() != 0 {
+					continue
+				}
 				err := p.waitServerProof(p.ctx, batchInfo)
-				// We are done with this serverProof, add it back to the pool
-				p.proversPool.Add(p.ctx, batchInfo.ServerProof)
-				batchInfo.ServerProof = nil
 				if p.ctx.Err() != nil {
 					continue
 				} else if err != nil {
 					log.Errorw("waitServerProof", "err", err)
+					p.setErrAtBatchNum(batchInfo.BatchNum)
+					p.coord.SendMsg(p.ctx, MsgStopPipeline{
+						Reason: fmt.Sprintf(
+							"Pipeline.waitServerProof: %v", err),
+						FailedBatchNum: batchInfo.BatchNum,
+					})
 					continue
 				}
+				// We are done with this serverProof, add it back to the pool
+				p.proversPool.Add(p.ctx, batchInfo.ServerProof)
+				// batchInfo.ServerProof = nil
 				p.txManager.AddBatch(p.ctx, batchInfo)
 			}
 		}
@@ -284,8 +372,8 @@ func (p *Pipeline) forgeBatch(batchNum common.BatchNum) (batchInfo *BatchInfo, e
 	if err != nil {
 		return nil, tracerr.Wrap(err)
 	}
-
-	batchInfo = &BatchInfo{BatchNum: batchNum} // to accumulate metadata of the batch
+	// Structure to accumulate data and metadata of the batch
+	batchInfo = &BatchInfo{PipelineNum: p.num, BatchNum: batchNum}
 	batchInfo.Debug.StartTimestamp = time.Now()
 	batchInfo.Debug.StartBlockNum = p.stats.Eth.LastBlock.Num + 1

@@ -300,22 +388,19 @@ func (p *Pipeline) forgeBatch(batchNum common.BatchNum) (batchInfo *BatchInfo, e
 	var auths [][]byte
 	var coordIdxs []common.Idx

+	// TODO: If there are no txs and we are behind the timeout, skip
+	// forging a batch and return a particular error that can be handleded
+	// in the loop where handleForgeBatch is called to retry after an
+	// interval
+
 	// 1. Decide if we forge L2Tx or L1+L2Tx
 	if p.shouldL1L2Batch(batchInfo) {
 		batchInfo.L1Batch = true
-		defer func() {
-			// If there's no error, update the parameters related
-			// to the last L1Batch forged
-			if err == nil {
-				p.lastScheduledL1BatchBlockNum = p.stats.Eth.LastBlock.Num + 1
-				p.lastForgeL1TxsNum++
-			}
-		}()
-		if p.lastForgeL1TxsNum != p.stats.Sync.LastForgeL1TxsNum {
+		if p.state.lastForgeL1TxsNum != p.stats.Sync.LastForgeL1TxsNum {
 			return nil, tracerr.Wrap(errLastL1BatchNotSynced)
 		}
 		// 2a: L1+L2 txs
-		l1UserTxs, err := p.historyDB.GetUnforgedL1UserTxs(p.lastForgeL1TxsNum + 1)
+		l1UserTxs, err := p.historyDB.GetUnforgedL1UserTxs(p.state.lastForgeL1TxsNum + 1)
 		if err != nil {
 			return nil, tracerr.Wrap(err)
 		}
@@ -324,6 +409,9 @@ func (p *Pipeline) forgeBatch(batchNum common.BatchNum) (batchInfo *BatchInfo, e
 		if err != nil {
 			return nil, tracerr.Wrap(err)
 		}
+
+		p.state.lastScheduledL1BatchBlockNum = p.stats.Eth.LastBlock.Num + 1
+		p.state.lastForgeL1TxsNum++
 	} else {
 		// 2b: only L2 txs
 		coordIdxs, auths, l1CoordTxs, poolL2Txs, discardedL2Txs, err =
@@ -399,12 +487,12 @@ func (p *Pipeline) waitServerProof(ctx context.Context, batchInfo *BatchInfo) er
 func (p *Pipeline) shouldL1L2Batch(batchInfo *BatchInfo) bool {
 	// Take the lastL1BatchBlockNum as the biggest between the last
 	// scheduled one, and the synchronized one.
-	lastL1BatchBlockNum := p.lastScheduledL1BatchBlockNum
+	lastL1BatchBlockNum := p.state.lastScheduledL1BatchBlockNum
 	if p.stats.Sync.LastL1BatchBlock > lastL1BatchBlockNum {
 		lastL1BatchBlockNum = p.stats.Sync.LastL1BatchBlock
 	}
 	// Set Debug information
-	batchInfo.Debug.LastScheduledL1BatchBlockNum = p.lastScheduledL1BatchBlockNum
+	batchInfo.Debug.LastScheduledL1BatchBlockNum = p.state.lastScheduledL1BatchBlockNum
 	batchInfo.Debug.LastL1BatchBlock = p.stats.Sync.LastL1BatchBlock
 	batchInfo.Debug.LastL1BatchBlockDelta = p.stats.Eth.LastBlock.Num + 1 - lastL1BatchBlockNum
 	batchInfo.Debug.L1BatchBlockScheduleDeadline =
--- a/coordinator/pipeline_test.go
+++ b/coordinator/pipeline_test.go
@@ -25,6 +25,14 @@ import (
 	"github.com/stretchr/testify/require"
 )

+func newBigInt(s string) *big.Int {
+	v, ok := new(big.Int).SetString(s, 10)
+	if !ok {
+		panic(fmt.Errorf("Can't set big.Int from %s", s))
+	}
+	return v
+}
+
 func TestPipelineShouldL1L2Batch(t *testing.T) {
 	ethClientSetup := test.NewClientSetupExample()
 	ethClientSetup.ChainID = big.NewInt(int64(chainID))
@@ -77,7 +85,7 @@ func TestPipelineShouldL1L2Batch(t *testing.T) {
 	//
 	// Scheduled L1Batch
 	//
-	pipeline.lastScheduledL1BatchBlockNum = startBlock
+	pipeline.state.lastScheduledL1BatchBlockNum = startBlock
 	stats.Sync.LastL1BatchBlock = startBlock - 10

 	// We are are one block before the timeout range * 0.5
@@ -128,6 +136,11 @@ func preloadSync(t *testing.T, ethClient *test.Client, sync *synchronizer.Synchr
 	blocks, err := tc.GenerateBlocksFromInstructions(set)
 	require.NoError(t, err)
 	require.NotNil(t, blocks)
+	// Set StateRoots for batches manually (til doesn't set it)
+	blocks[0].Rollup.Batches[0].Batch.StateRoot =
+		newBigInt("0")
+	blocks[0].Rollup.Batches[1].Batch.StateRoot =
+		newBigInt("10941365282189107056349764238909072001483688090878331371699519307087372995595")

 	ethAddTokens(blocks, ethClient)
 	err = ethClient.CtlAddBlocks(blocks)
@@ -172,7 +185,7 @@ func TestPipelineForgeBatchWithTxs(t *testing.T) {
 	// users with positive balances
 	tilCtx := preloadSync(t, ethClient, sync, modules.historyDB, modules.stateDB)
 	syncStats := sync.Stats()
-	batchNum := common.BatchNum(syncStats.Sync.LastBatch)
+	batchNum := syncStats.Sync.LastBatch.BatchNum
 	syncSCVars := sync.SCVars()

 	pipeline, err := coord.newPipeline(ctx)
--- a/coordinator/purger.go
+++ b/coordinator/purger.go
@@ -13,13 +13,23 @@ import (

 // PurgerCfg is the purger configuration
 type PurgerCfg struct {
-	// PurgeBatchDelay is the delay between batches to purge outdated transactions
+	// PurgeBatchDelay is the delay between batches to purge outdated
+	// transactions.  Oudated L2Txs are those that have been forged or
+	// marked as invalid for longer than the SafetyPeriod and pending L2Txs
+	// that have been in the pool for longer than TTL once there are
+	// MaxTxs.
 	PurgeBatchDelay int64
-	// InvalidateBatchDelay is the delay between batches to mark invalid transactions
+	// InvalidateBatchDelay is the delay between batches to mark invalid
+	// transactions due to nonce lower than the account nonce.
 	InvalidateBatchDelay int64
-	// PurgeBlockDelay is the delay between blocks to purge outdated transactions
+	// PurgeBlockDelay is the delay between blocks to purge outdated
+	// transactions.  Oudated L2Txs are those that have been forged or
+	// marked as invalid for longer than the SafetyPeriod and pending L2Txs
+	// that have been in the pool for longer than TTL once there are
+	// MaxTxs.
 	PurgeBlockDelay int64
-	// InvalidateBlockDelay is the delay between blocks to mark invalid transactions
+	// InvalidateBlockDelay is the delay between blocks to mark invalid
+	// transactions due to nonce lower than the account nonce.
 	InvalidateBlockDelay int64
 }

--- a/coordinator/txmanager.go
+++ b/coordinator/txmanager.go
@@ -2,6 +2,7 @@ package coordinator

 import (
 	"context"
+	"errors"
 	"fmt"
 	"math/big"
 	"time"
@@ -9,6 +10,7 @@ import (
 	"github.com/ethereum/go-ethereum"
 	"github.com/ethereum/go-ethereum/accounts"
 	"github.com/ethereum/go-ethereum/accounts/abi/bind"
+	"github.com/ethereum/go-ethereum/core"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/hermeznetwork/hermez-node/common"
 	"github.com/hermeznetwork/hermez-node/db/l2db"
@@ -35,12 +37,20 @@ type TxManager struct {
 	vars        synchronizer.SCVariables
 	statsVarsCh chan statsVars

-	queue []*BatchInfo
+	discardPipelineCh chan int // int refers to the pipelineNum
+
+	minPipelineNum int
+	queue          Queue
 	// lastSuccessBatch stores the last BatchNum that who's forge call was confirmed
 	lastSuccessBatch common.BatchNum
-	lastPendingBatch common.BatchNum
-	lastSuccessNonce uint64
-	lastPendingNonce uint64
+	// lastPendingBatch common.BatchNum
+	// accNonce is the account nonce in the last mined block (due to mined txs)
+	accNonce uint64
+	// accNextNonce is the nonce that we should use to send the next tx.
+	// In some cases this will be a reused nonce of an already pending tx.
+	accNextNonce uint64
+
+	lastSentL1BatchBlockNum int64
 }

 // NewTxManager creates a new TxManager
@@ -54,26 +64,19 @@ func NewTxManager(ctx context.Context, cfg *Config, ethClient eth.ClientInterfac
 	if err != nil {
 		return nil, tracerr.Wrap(err)
 	}
-	lastSuccessNonce, err := ethClient.EthNonceAt(ctx, *address, nil)
+	accNonce, err := ethClient.EthNonceAt(ctx, *address, nil)
 	if err != nil {
 		return nil, err
 	}
-	lastPendingNonce, err := ethClient.EthPendingNonceAt(ctx, *address)
-	if err != nil {
-		return nil, err
-	}
-	if lastSuccessNonce != lastPendingNonce {
-		return nil, tracerr.Wrap(fmt.Errorf("lastSuccessNonce (%v) != lastPendingNonce (%v)",
-			lastSuccessNonce, lastPendingNonce))
-	}
-	log.Infow("TxManager started", "nonce", lastSuccessNonce)
+	log.Infow("TxManager started", "nonce", accNonce)
 	return &TxManager{
-		cfg:         *cfg,
-		ethClient:   ethClient,
-		l2DB:        l2DB,
-		coord:       coord,
-		batchCh:     make(chan *BatchInfo, queueLen),
-		statsVarsCh: make(chan statsVars, queueLen),
+		cfg:               *cfg,
+		ethClient:         ethClient,
+		l2DB:              l2DB,
+		coord:             coord,
+		batchCh:           make(chan *BatchInfo, queueLen),
+		statsVarsCh:       make(chan statsVars, queueLen),
+		discardPipelineCh: make(chan int, queueLen),
 		account: accounts.Account{
 			Address: *address,
 		},
@@ -82,8 +85,10 @@ func NewTxManager(ctx context.Context, cfg *Config, ethClient eth.ClientInterfac

 		vars: *initSCVars,

-		lastSuccessNonce: lastSuccessNonce,
-		lastPendingNonce: lastPendingNonce,
+		minPipelineNum: 0,
+		queue:          NewQueue(),
+		accNonce:       accNonce,
+		accNextNonce:   accNonce,
 	}, nil
 }

@@ -104,16 +109,17 @@ func (t *TxManager) SetSyncStatsVars(ctx context.Context, stats *synchronizer.St
 	}
 }

+// DiscardPipeline is a thread safe method to notify about a discarded pipeline
+// due to a reorg
+func (t *TxManager) DiscardPipeline(ctx context.Context, pipelineNum int) {
+	select {
+	case t.discardPipelineCh <- pipelineNum:
+	case <-ctx.Done():
+	}
+}
+
 func (t *TxManager) syncSCVars(vars synchronizer.SCVariablesPtr) {
-	if vars.Rollup != nil {
-		t.vars.Rollup = *vars.Rollup
-	}
-	if vars.Auction != nil {
-		t.vars.Auction = *vars.Auction
-	}
-	if vars.WDelayer != nil {
-		t.vars.WDelayer = *vars.WDelayer
-	}
+	updateSCVars(&t.vars, vars)
 }

 // NewAuth generates a new auth object for an ethereum transaction
@@ -123,6 +129,7 @@ func (t *TxManager) NewAuth(ctx context.Context) (*bind.TransactOpts, error) {
 		return nil, tracerr.Wrap(err)
 	}
 	inc := new(big.Int).Set(gasPrice)
+	// TODO: Replace this by a value of percentage
 	const gasPriceDiv = 100
 	inc.Div(inc, new(big.Int).SetUint64(gasPriceDiv))
 	gasPrice.Add(gasPrice, inc)
@@ -141,29 +148,75 @@ func (t *TxManager) NewAuth(ctx context.Context) (*bind.TransactOpts, error) {
 	return auth, nil
 }

-func (t *TxManager) sendRollupForgeBatch(ctx context.Context, batchInfo *BatchInfo) error {
-	// TODO: Check if we can forge in the next blockNum, abort if we can't
-	batchInfo.Debug.Status = StatusSent
-	batchInfo.Debug.SendBlockNum = t.stats.Eth.LastBlock.Num + 1
-	batchInfo.Debug.SendTimestamp = time.Now()
-	batchInfo.Debug.StartToSendDelay = batchInfo.Debug.SendTimestamp.Sub(
-		batchInfo.Debug.StartTimestamp).Seconds()
+func (t *TxManager) shouldSendRollupForgeBatch(batchInfo *BatchInfo) error {
+	nextBlock := t.stats.Eth.LastBlock.Num + 1
+	if !t.canForgeAt(nextBlock) {
+		return tracerr.Wrap(fmt.Errorf("can't forge in the next block: %v", nextBlock))
+	}
+	if t.mustL1L2Batch(nextBlock) && !batchInfo.L1Batch {
+		return tracerr.Wrap(fmt.Errorf("can't forge non-L1Batch in the next block: %v", nextBlock))
+	}
+	margin := t.cfg.SendBatchBlocksMarginCheck
+	if margin != 0 {
+		if !t.canForgeAt(nextBlock + margin) {
+			return tracerr.Wrap(fmt.Errorf("can't forge after %v blocks: %v",
+				margin, nextBlock))
+		}
+		if t.mustL1L2Batch(nextBlock+margin) && !batchInfo.L1Batch {
+			return tracerr.Wrap(fmt.Errorf("can't forge non-L1Batch after %v blocks: %v",
+				margin, nextBlock))
+		}
+	}
+	return nil
+}
+
+func addPerc(v *big.Int, p int64) *big.Int {
+	r := new(big.Int).Set(v)
+	r.Mul(r, big.NewInt(p))
+	// nolint reason: to calculate percentages we divide by 100
+	r.Div(r, big.NewInt(100)) //nolit:gomnd
+	return r.Add(v, r)
+}
+
+func (t *TxManager) sendRollupForgeBatch(ctx context.Context, batchInfo *BatchInfo, resend bool) error {
 	var ethTx *types.Transaction
 	var err error
 	auth, err := t.NewAuth(ctx)
 	if err != nil {
 		return tracerr.Wrap(err)
 	}
-	auth.Nonce = big.NewInt(int64(t.lastPendingNonce))
-	t.lastPendingNonce++
+	auth.Nonce = big.NewInt(int64(t.accNextNonce))
+	if resend {
+		auth.Nonce = big.NewInt(int64(batchInfo.EthTx.Nonce()))
+	}
 	for attempt := 0; attempt < t.cfg.EthClientAttempts; attempt++ {
+		if auth.GasPrice.Cmp(t.cfg.MaxGasPrice) > 0 {
+			return tracerr.Wrap(fmt.Errorf("calculated gasPrice (%v) > maxGasPrice (%v)",
+				auth.GasPrice, t.cfg.MaxGasPrice))
+		}
+		// RollupForgeBatch() calls ethclient.SendTransaction()
 		ethTx, err = t.ethClient.RollupForgeBatch(batchInfo.ForgeBatchArgs, auth)
-		if err != nil {
-			// if strings.Contains(err.Error(), common.AuctionErrMsgCannotForge) {
-			// 	log.Errorw("TxManager ethClient.RollupForgeBatch", "err", err,
-			// 		"block", t.stats.Eth.LastBlock.Num+1)
-			// 	return tracerr.Wrap(err)
-			// }
+		if errors.Is(err, core.ErrNonceTooLow) {
+			log.Warnw("TxManager ethClient.RollupForgeBatch incrementing nonce",
+				"err", err, "nonce", auth.Nonce, "batchNum", batchInfo.BatchNum)
+			auth.Nonce.Add(auth.Nonce, big.NewInt(1))
+			attempt--
+		} else if errors.Is(err, core.ErrNonceTooHigh) {
+			log.Warnw("TxManager ethClient.RollupForgeBatch decrementing nonce",
+				"err", err, "nonce", auth.Nonce, "batchNum", batchInfo.BatchNum)
+			auth.Nonce.Sub(auth.Nonce, big.NewInt(1))
+			attempt--
+		} else if errors.Is(err, core.ErrUnderpriced) {
+			log.Warnw("TxManager ethClient.RollupForgeBatch incrementing gasPrice",
+				"err", err, "gasPrice", auth.GasPrice, "batchNum", batchInfo.BatchNum)
+			auth.GasPrice = addPerc(auth.GasPrice, 10)
+			attempt--
+		} else if errors.Is(err, core.ErrReplaceUnderpriced) {
+			log.Warnw("TxManager ethClient.RollupForgeBatch incrementing gasPrice",
+				"err", err, "gasPrice", auth.GasPrice, "batchNum", batchInfo.BatchNum)
+			auth.GasPrice = addPerc(auth.GasPrice, 10)
+			attempt--
+		} else if err != nil {
 			log.Errorw("TxManager ethClient.RollupForgeBatch",
 				"attempt", attempt, "err", err, "block", t.stats.Eth.LastBlock.Num+1,
 				"batchNum", batchInfo.BatchNum)
@@ -179,10 +232,29 @@ func (t *TxManager) sendRollupForgeBatch(ctx context.Context, batchInfo *BatchIn
 	if err != nil {
 		return tracerr.Wrap(fmt.Errorf("reached max attempts for ethClient.RollupForgeBatch: %w", err))
 	}
+	if !resend {
+		t.accNextNonce = auth.Nonce.Uint64() + 1
+	}
 	batchInfo.EthTx = ethTx
-	log.Infow("TxManager ethClient.RollupForgeBatch", "batch", batchInfo.BatchNum, "tx", ethTx.Hash().Hex())
+	log.Infow("TxManager ethClient.RollupForgeBatch", "batch", batchInfo.BatchNum, "tx", ethTx.Hash())
+	now := time.Now()
+	batchInfo.SendTimestamp = now
+
+	if resend {
+		batchInfo.Debug.ResendNum++
+	}
+	batchInfo.Debug.Status = StatusSent
+	batchInfo.Debug.SendBlockNum = t.stats.Eth.LastBlock.Num + 1
+	batchInfo.Debug.SendTimestamp = batchInfo.SendTimestamp
+	batchInfo.Debug.StartToSendDelay = batchInfo.Debug.SendTimestamp.Sub(
+		batchInfo.Debug.StartTimestamp).Seconds()
 	t.cfg.debugBatchStore(batchInfo)
-	t.lastPendingBatch = batchInfo.BatchNum
+
+	if !resend {
+		if batchInfo.L1Batch {
+			t.lastSentL1BatchBlockNum = t.stats.Eth.LastBlock.Num + 1
+		}
+	}
 	if err := t.l2DB.DoneForging(common.TxIDsFromL2Txs(batchInfo.L2Txs), batchInfo.BatchNum); err != nil {
 		return tracerr.Wrap(err)
 	}
@@ -225,13 +297,20 @@ func (t *TxManager) checkEthTransactionReceipt(ctx context.Context, batchInfo *B
 func (t *TxManager) handleReceipt(ctx context.Context, batchInfo *BatchInfo) (*int64, error) {
 	receipt := batchInfo.Receipt
 	if receipt != nil {
+		if batchInfo.EthTx.Nonce()+1 > t.accNonce {
+			t.accNonce = batchInfo.EthTx.Nonce() + 1
+		}
 		if receipt.Status == types.ReceiptStatusFailed {
 			batchInfo.Debug.Status = StatusFailed
-			t.cfg.debugBatchStore(batchInfo)
 			_, err := t.ethClient.EthCall(ctx, batchInfo.EthTx, receipt.BlockNumber)
-			log.Warnw("TxManager receipt status is failed", "tx", receipt.TxHash.Hex(),
+			log.Warnw("TxManager receipt status is failed", "tx", receipt.TxHash,
 				"batch", batchInfo.BatchNum, "block", receipt.BlockNumber.Int64(),
 				"err", err)
+			batchInfo.EthTxErr = err
+			if batchInfo.BatchNum <= t.lastSuccessBatch {
+				t.lastSuccessBatch = batchInfo.BatchNum - 1
+			}
+			t.cfg.debugBatchStore(batchInfo)
 			return nil, tracerr.Wrap(fmt.Errorf(
 				"ethereum transaction receipt status is failed: %w", err))
 		} else if receipt.Status == types.ReceiptStatusSuccessful {
@@ -239,6 +318,17 @@ func (t *TxManager) handleReceipt(ctx context.Context, batchInfo *BatchInfo) (*i
 			batchInfo.Debug.MineBlockNum = receipt.BlockNumber.Int64()
 			batchInfo.Debug.StartToMineBlocksDelay = batchInfo.Debug.MineBlockNum -
 				batchInfo.Debug.StartBlockNum
+			if batchInfo.Debug.StartToMineDelay == 0 {
+				if block, err := t.ethClient.EthBlockByNumber(ctx,
+					receipt.BlockNumber.Int64()); err != nil {
+					log.Warnw("TxManager: ethClient.EthBlockByNumber", "err", err)
+				} else {
+					batchInfo.Debug.SendToMineDelay = block.Timestamp.Sub(
+						batchInfo.Debug.SendTimestamp).Seconds()
+					batchInfo.Debug.StartToMineDelay = block.Timestamp.Sub(
+						batchInfo.Debug.StartTimestamp).Seconds()
+				}
+			}
 			t.cfg.debugBatchStore(batchInfo)
 			if batchInfo.BatchNum > t.lastSuccessBatch {
 				t.lastSuccessBatch = batchInfo.BatchNum
@@ -250,9 +340,72 @@ func (t *TxManager) handleReceipt(ctx context.Context, batchInfo *BatchInfo) (*i
 	return nil, nil
 }

+// TODO:
+// - After sending a message: CancelPipeline, stop all consecutive pending Batches (transactions)
+
+// Queue of BatchInfos
+type Queue struct {
+	list []*BatchInfo
+	// nonceByBatchNum map[common.BatchNum]uint64
+	next int
+}
+
+// NewQueue returns a new queue
+func NewQueue() Queue {
+	return Queue{
+		list: make([]*BatchInfo, 0),
+		// nonceByBatchNum: make(map[common.BatchNum]uint64),
+		next: 0,
+	}
+}
+
+// Len is the length of the queue
+func (q *Queue) Len() int {
+	return len(q.list)
+}
+
+// At returns the BatchInfo at position (or nil if position is out of bounds)
+func (q *Queue) At(position int) *BatchInfo {
+	if position >= len(q.list) {
+		return nil
+	}
+	return q.list[position]
+}
+
+// Next returns the next BatchInfo (or nil if queue is empty)
+func (q *Queue) Next() (int, *BatchInfo) {
+	if len(q.list) == 0 {
+		return 0, nil
+	}
+	defer func() { q.next = (q.next + 1) % len(q.list) }()
+	return q.next, q.list[q.next]
+}
+
+// Remove removes the BatchInfo at position
+func (q *Queue) Remove(position int) {
+	// batchInfo := q.list[position]
+	// delete(q.nonceByBatchNum, batchInfo.BatchNum)
+	q.list = append(q.list[:position], q.list[position+1:]...)
+	if len(q.list) == 0 {
+		q.next = 0
+	} else {
+		q.next = position % len(q.list)
+	}
+}
+
+// Push adds a new BatchInfo
+func (q *Queue) Push(batchInfo *BatchInfo) {
+	q.list = append(q.list, batchInfo)
+	// q.nonceByBatchNum[batchInfo.BatchNum] = batchInfo.EthTx.Nonce()
+}
+
+// func (q *Queue) NonceByBatchNum(batchNum common.BatchNum) (uint64, bool) {
+// 	nonce, ok := q.nonceByBatchNum[batchNum]
+// 	return nonce, ok
+// }
+
 // Run the TxManager
 func (t *TxManager) Run(ctx context.Context) {
-	next := 0
 	waitDuration := longWaitDuration

 	var statsVars statsVars
@@ -263,7 +416,7 @@ func (t *TxManager) Run(ctx context.Context) {
 	t.stats = statsVars.Stats
 	t.syncSCVars(statsVars.Vars)
 	log.Infow("TxManager: received initial statsVars",
-		"block", t.stats.Eth.LastBlock.Num, "batch", t.stats.Eth.LastBatch)
+		"block", t.stats.Eth.LastBlock.Num, "batch", t.stats.Eth.LastBatchNum)

 	for {
 		select {
@@ -273,8 +426,27 @@ func (t *TxManager) Run(ctx context.Context) {
 		case statsVars := <-t.statsVarsCh:
 			t.stats = statsVars.Stats
 			t.syncSCVars(statsVars.Vars)
+		case pipelineNum := <-t.discardPipelineCh:
+			t.minPipelineNum = pipelineNum + 1
+			if err := t.removeBadBatchInfos(ctx); ctx.Err() != nil {
+				continue
+			} else if err != nil {
+				log.Errorw("TxManager: removeBadBatchInfos", "err", err)
+				continue
+			}
 		case batchInfo := <-t.batchCh:
-			if err := t.sendRollupForgeBatch(ctx, batchInfo); ctx.Err() != nil {
+			if batchInfo.PipelineNum < t.minPipelineNum {
+				log.Warnw("TxManager: batchInfo received pipelineNum < minPipelineNum",
+					"num", batchInfo.PipelineNum, "minNum", t.minPipelineNum)
+			}
+			if err := t.shouldSendRollupForgeBatch(batchInfo); err != nil {
+				log.Warnw("TxManager: shouldSend", "err", err,
+					"batch", batchInfo.BatchNum)
+				t.coord.SendMsg(ctx, MsgStopPipeline{
+					Reason: fmt.Sprintf("forgeBatch shouldSend: %v", err)})
+				continue
+			}
+			if err := t.sendRollupForgeBatch(ctx, batchInfo, false); ctx.Err() != nil {
 				continue
 			} else if err != nil {
 				// If we reach here it's because our ethNode has
@@ -282,19 +454,20 @@ func (t *TxManager) Run(ctx context.Context) {
 				// ethereum.  This could be due to the ethNode
 				// failure, or an invalid transaction (that
 				// can't be mined)
-				t.coord.SendMsg(ctx, MsgStopPipeline{Reason: fmt.Sprintf("forgeBatch send: %v", err)})
+				log.Warnw("TxManager: forgeBatch send failed", "err", err,
+					"batch", batchInfo.BatchNum)
+				t.coord.SendMsg(ctx, MsgStopPipeline{
+					Reason: fmt.Sprintf("forgeBatch send: %v", err)})
 				continue
 			}
-			t.queue = append(t.queue, batchInfo)
+			t.queue.Push(batchInfo)
 			waitDuration = t.cfg.TxManagerCheckInterval
 		case <-time.After(waitDuration):
-			if len(t.queue) == 0 {
+			queuePosition, batchInfo := t.queue.Next()
+			if batchInfo == nil {
 				waitDuration = longWaitDuration
 				continue
 			}
-			current := next
-			next = (current + 1) % len(t.queue)
-			batchInfo := t.queue[current]
 			if err := t.checkEthTransactionReceipt(ctx, batchInfo); ctx.Err() != nil {
 				continue
 			} else if err != nil { //nolint:staticcheck
@@ -304,7 +477,8 @@ func (t *TxManager) Run(ctx context.Context) {
 				// if it was not mined, mined and succesfull or
 				// mined and failed.  This could be due to the
 				// ethNode failure.
-				t.coord.SendMsg(ctx, MsgStopPipeline{Reason: fmt.Sprintf("forgeBatch receipt: %v", err)})
+				t.coord.SendMsg(ctx, MsgStopPipeline{
+					Reason: fmt.Sprintf("forgeBatch receipt: %v", err)})
 			}

 			confirm, err := t.handleReceipt(ctx, batchInfo)
@@ -312,32 +486,106 @@ func (t *TxManager) Run(ctx context.Context) {
 				continue
 			} else if err != nil { //nolint:staticcheck
 				// Transaction was rejected
-				t.queue = append(t.queue[:current], t.queue[current+1:]...)
-				if len(t.queue) == 0 {
-					next = 0
-				} else {
-					next = current % len(t.queue)
+				if err := t.removeBadBatchInfos(ctx); ctx.Err() != nil {
+					continue
+				} else if err != nil {
+					log.Errorw("TxManager: removeBadBatchInfos", "err", err)
+					continue
 				}
-				t.coord.SendMsg(ctx, MsgStopPipeline{Reason: fmt.Sprintf("forgeBatch reject: %v", err)})
+				t.coord.SendMsg(ctx, MsgStopPipeline{
+					Reason: fmt.Sprintf("forgeBatch reject: %v", err)})
+				continue
 			}
-			if confirm != nil && *confirm >= t.cfg.ConfirmBlocks {
-				log.Debugw("TxManager tx for RollupForgeBatch confirmed",
-					"batch", batchInfo.BatchNum)
-				t.queue = append(t.queue[:current], t.queue[current+1:]...)
-				if len(t.queue) == 0 {
-					next = 0
-				} else {
-					next = current % len(t.queue)
+			now := time.Now()
+			if !t.cfg.EthNoReuseNonce && confirm == nil &&
+				now.Sub(batchInfo.SendTimestamp) > t.cfg.EthTxResendTimeout {
+				log.Infow("TxManager: forgeBatch tx not been mined timeout, resending",
+					"tx", batchInfo.EthTx.Hash(), "batch", batchInfo.BatchNum)
+				if err := t.sendRollupForgeBatch(ctx, batchInfo, true); ctx.Err() != nil {
+					continue
+				} else if err != nil {
+					// If we reach here it's because our ethNode has
+					// been unable to send the transaction to
+					// ethereum.  This could be due to the ethNode
+					// failure, or an invalid transaction (that
+					// can't be mined)
+					log.Warnw("TxManager: forgeBatch resend failed", "err", err,
+						"batch", batchInfo.BatchNum)
+					t.coord.SendMsg(ctx, MsgStopPipeline{
+						Reason: fmt.Sprintf("forgeBatch resend: %v", err)})
+					continue
 				}
 			}
+
+			if confirm != nil && *confirm >= t.cfg.ConfirmBlocks {
+				log.Debugw("TxManager: forgeBatch tx confirmed",
+					"tx", batchInfo.EthTx.Hash(), "batch", batchInfo.BatchNum)
+				t.queue.Remove(queuePosition)
+			}
 		}
 	}
 }

-// nolint reason: this function will be used in the future
-//nolint:unused
-func (t *TxManager) canForge(stats *synchronizer.Stats, blockNum int64) bool {
+func (t *TxManager) removeBadBatchInfos(ctx context.Context) error {
+	next := 0
+	for {
+		batchInfo := t.queue.At(next)
+		if batchInfo == nil {
+			break
+		}
+		if err := t.checkEthTransactionReceipt(ctx, batchInfo); ctx.Err() != nil {
+			return nil
+		} else if err != nil {
+			// Our ethNode is giving an error different
+			// than "not found" when getting the receipt
+			// for the transaction, so we can't figure out
+			// if it was not mined, mined and succesfull or
+			// mined and failed.  This could be due to the
+			// ethNode failure.
+			next++
+			continue
+		}
+		confirm, err := t.handleReceipt(ctx, batchInfo)
+		if ctx.Err() != nil {
+			return nil
+		} else if err != nil {
+			// Transaction was rejected
+			if t.minPipelineNum <= batchInfo.PipelineNum {
+				t.minPipelineNum = batchInfo.PipelineNum + 1
+			}
+			t.queue.Remove(next)
+			continue
+		}
+		// If tx is pending but is from a cancelled pipeline, remove it
+		// from the queue
+		if confirm == nil {
+			if batchInfo.PipelineNum < t.minPipelineNum {
+				t.queue.Remove(next)
+				continue
+			}
+		}
+		next++
+	}
+	accNonce, err := t.ethClient.EthNonceAt(ctx, t.account.Address, nil)
+	if err != nil {
+		return err
+	}
+	if !t.cfg.EthNoReuseNonce {
+		t.accNextNonce = accNonce
+	}
+	return nil
+}
+
+func (t *TxManager) canForgeAt(blockNum int64) bool {
 	return canForge(&t.consts.Auction, &t.vars.Auction,
-		&stats.Sync.Auction.CurrentSlot, &stats.Sync.Auction.NextSlot,
+		&t.stats.Sync.Auction.CurrentSlot, &t.stats.Sync.Auction.NextSlot,
 		t.cfg.ForgerAddress, blockNum)
 }
+
+func (t *TxManager) mustL1L2Batch(blockNum int64) bool {
+	lastL1BatchBlockNum := t.lastSentL1BatchBlockNum
+	if t.stats.Sync.LastL1BatchBlock > lastL1BatchBlockNum {
+		lastL1BatchBlockNum = t.stats.Sync.LastL1BatchBlock
+	}
+	return blockNum-lastL1BatchBlockNum >= t.vars.Rollup.ForgeL1L2BatchTimeout-1
+}
--- a/coordinator/txmanager_test.go
+++ b/coordinator/txmanager_test.go
@@ -0,0 +1,15 @@
+package coordinator
+
+import (
+	"math/big"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAddPerc(t *testing.T) {
+	assert.Equal(t, "110", addPerc(big.NewInt(100), 10).String())
+	assert.Equal(t, "101", addPerc(big.NewInt(100), 1).String())
+	assert.Equal(t, "12", addPerc(big.NewInt(10), 20).String())
+	assert.Equal(t, "1500", addPerc(big.NewInt(1000), 50).String())
+}