From 3b3d96e07cc38e2a8a7a03d3022b234349621f8b Mon Sep 17 00:00:00 2001 From: Pantani Date: Tue, 23 Mar 2021 01:40:32 -0300 Subject: [PATCH 1/3] create the metrics package for a better app instrumenting --- cli/node/cfg.buidler.toml | 2 +- metric/metric.go | 192 +++++++++++++++++++++++++++++++++++ synchronizer/metrics.go | 44 -------- synchronizer/synchronizer.go | 10 +- txselector/metrics.go | 53 ---------- txselector/txselector.go | 22 ++-- 6 files changed, 211 insertions(+), 112 deletions(-) create mode 100644 metric/metric.go delete mode 100644 synchronizer/metrics.go delete mode 100644 txselector/metrics.go diff --git a/cli/node/cfg.buidler.toml b/cli/node/cfg.buidler.toml index f2d561e..bfd8675 100644 --- a/cli/node/cfg.buidler.toml +++ b/cli/node/cfg.buidler.toml @@ -35,7 +35,7 @@ Symbol = "SUSHI" Addr = "0x6b3595068778dd592e39a122f4f5a5cf09c90fe2" [Debug] -APIAddress = "localhost:12345" +APIAddress = "0.0.0.0:12345" MeddlerLogs = true GinDebugMode = true diff --git a/metric/metric.go b/metric/metric.go new file mode 100644 index 0000000..a3cc1f5 --- /dev/null +++ b/metric/metric.go @@ -0,0 +1,192 @@ +package metric + +import ( + "time" + + "github.com/hermeznetwork/hermez-node/log" + "github.com/prometheus/client_golang/prometheus" +) + +type ( + // Metric represents the metric type + Metric string +) + +const ( + namespaceError = "error" + namespaceSync = "synchronizer" + namespaceTxSelector = "txselector" + namespaceAPI = "api" +) + +var ( + // Errors errors count metric. + Errors = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespaceError, + Name: "errors", + Help: "", + }, []string{"error"}) + + // WaitServerProof duration time to get the calculated + // proof from the server. + WaitServerProof = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespaceSync, + Name: "wait_server_proof", + Help: "", + }, []string{"batch_number", "pipeline_number"}) + + // Reorgs block reorg count + Reorgs = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespaceSync, + Name: "reorgs", + Help: "", + }) + + // LastBlockNum last block synced + LastBlockNum = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceSync, + Name: "synced_last_block_num", + Help: "", + }) + + // EthLastBlockNum last eth block synced + EthLastBlockNum = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceSync, + Name: "eth_last_block_num", + Help: "", + }) + + // LastBatchNum last batch synced + LastBatchNum = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceSync, + Name: "synced_last_batch_num", + Help: "", + }) + + // EthLastBatchNum last eth batch synced + EthLastBatchNum = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceSync, + Name: "eth_last_batch_num", + Help: "", + }) + + // GetL2TxSelection L2 tx selection count + GetL2TxSelection = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespaceTxSelector, + Name: "get_l2_txselection_total", + Help: "", + }) + + // GetL1L2TxSelection L1L2 tx selection count + GetL1L2TxSelection = prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespaceTxSelector, + Name: "get_l1_l2_txselection_total", + Help: "", + }) + + // SelectedL1CoordinatorTxs selected L1 coordinator tx count + SelectedL1CoordinatorTxs = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceTxSelector, + Name: "selected_l1_coordinator_txs", + Help: "", + }) + + // SelectedL1UserTxs selected L1 user tx count + SelectedL1UserTxs = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceTxSelector, + Name: "selected_l1_user_txs", + Help: "", + }) + + // SelectedL2Txs selected L2 tx count + SelectedL2Txs = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceTxSelector, + Name: "selected_l2_txs", + Help: "", + }) + + // DiscardedL2Txs discarded L2 tx count + DiscardedL2Txs = prometheus.NewGauge( + prometheus.GaugeOpts{ + Namespace: namespaceTxSelector, + Name: "discarded_l2_txs", + Help: "", + }) +) + +func init() { + if err := registerCollectors(); err != nil { + log.Error(err) + } +} +func registerCollectors() error { + if err := registerCollector(Errors); err != nil { + return err + } + if err := registerCollector(WaitServerProof); err != nil { + return err + } + if err := registerCollector(Reorgs); err != nil { + return err + } + if err := registerCollector(LastBlockNum); err != nil { + return err + } + if err := registerCollector(LastBatchNum); err != nil { + return err + } + if err := registerCollector(EthLastBlockNum); err != nil { + return err + } + if err := registerCollector(EthLastBatchNum); err != nil { + return err + } + if err := registerCollector(GetL2TxSelection); err != nil { + return err + } + if err := registerCollector(GetL1L2TxSelection); err != nil { + return err + } + if err := registerCollector(SelectedL1CoordinatorTxs); err != nil { + return err + } + if err := registerCollector(SelectedL1UserTxs); err != nil { + return err + } + return registerCollector(DiscardedL2Txs) +} + +func registerCollector(collector prometheus.Collector) error { + err := prometheus.Register(collector) + if err != nil { + if _, ok := err.(prometheus.AlreadyRegisteredError); !ok { + return err + } + } + return nil +} + +// MeasureDuration measure the method execution duration +// and save it into a histogram metric +func MeasureDuration(histogram *prometheus.HistogramVec, start time.Time, lvs ...string) { + duration := time.Since(start) + histogram.WithLabelValues(lvs...).Observe(float64(duration.Milliseconds())) +} + +// CollectError collect the error message and increment +// the error count +func CollectError(err error) { + Errors.With(map[string]string{"error": err.Error()}).Inc() +} diff --git a/synchronizer/metrics.go b/synchronizer/metrics.go deleted file mode 100644 index bff876b..0000000 --- a/synchronizer/metrics.go +++ /dev/null @@ -1,44 +0,0 @@ -package synchronizer - -import "github.com/prometheus/client_golang/prometheus" - -var ( - metricReorgsCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "sync_reorgs", - Help: "", - }, - ) - metricSyncedLastBlockNum = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "sync_synced_last_block_num", - Help: "", - }, - ) - metricEthLastBlockNum = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "sync_eth_last_block_num", - Help: "", - }, - ) - metricSyncedLastBatchNum = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "sync_synced_last_batch_num", - Help: "", - }, - ) - metricEthLastBatchNum = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "sync_eth_last_batch_num", - Help: "", - }, - ) -) - -func init() { - prometheus.MustRegister(metricReorgsCount) - prometheus.MustRegister(metricSyncedLastBlockNum) - prometheus.MustRegister(metricEthLastBlockNum) - prometheus.MustRegister(metricSyncedLastBatchNum) - prometheus.MustRegister(metricEthLastBatchNum) -} diff --git a/synchronizer/synchronizer.go b/synchronizer/synchronizer.go index 0d1b9fb..63bb1d5 100644 --- a/synchronizer/synchronizer.go +++ b/synchronizer/synchronizer.go @@ -15,6 +15,7 @@ import ( "github.com/hermeznetwork/hermez-node/db/statedb" "github.com/hermeznetwork/hermez-node/eth" "github.com/hermeznetwork/hermez-node/log" + "github.com/hermeznetwork/hermez-node/metric" "github.com/hermeznetwork/hermez-node/txprocessor" "github.com/hermeznetwork/tracerr" ) @@ -549,6 +550,7 @@ func (s *Synchronizer) Sync(ctx context.Context, return nil, nil, tracerr.Wrap(err) } discarded := lastSavedBlock.Num - lastDBBlockNum + metric.Reorgs.Inc() return nil, &discarded, nil } } @@ -641,16 +643,16 @@ func (s *Synchronizer) Sync(ctx context.Context, } for _, batchData := range rollupData.Batches { - metricSyncedLastBatchNum.Set(float64(batchData.Batch.BatchNum)) - metricEthLastBatchNum.Set(float64(s.stats.Eth.LastBatchNum)) + metric.LastBatchNum.Set(float64(batchData.Batch.BatchNum)) + metric.EthLastBatchNum.Set(float64(s.stats.Eth.LastBatchNum)) log.Debugw("Synced batch", "syncLastBatch", batchData.Batch.BatchNum, "syncBatchesPerc", s.stats.batchesPerc(batchData.Batch.BatchNum), "ethLastBatch", s.stats.Eth.LastBatchNum, ) } - metricSyncedLastBlockNum.Set(float64(s.stats.Sync.LastBlock.Num)) - metricEthLastBlockNum.Set(float64(s.stats.Eth.LastBlock.Num)) + metric.LastBlockNum.Set(float64(s.stats.Sync.LastBlock.Num)) + metric.EthLastBlockNum.Set(float64(s.stats.Eth.LastBlock.Num)) log.Debugw("Synced block", "syncLastBlockNum", s.stats.Sync.LastBlock.Num, "syncBlocksPerc", s.stats.blocksPerc(), diff --git a/txselector/metrics.go b/txselector/metrics.go deleted file mode 100644 index cb4d780..0000000 --- a/txselector/metrics.go +++ /dev/null @@ -1,53 +0,0 @@ -package txselector - -import "github.com/prometheus/client_golang/prometheus" - -var ( - metricGetL2TxSelection = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "txsel_get_l2_txselecton_total", - Help: "", - }, - ) - metricGetL1L2TxSelection = prometheus.NewCounter( - prometheus.CounterOpts{ - Name: "txsel_get_l1_l2_txselecton_total", - Help: "", - }, - ) - - metricSelectedL1CoordinatorTxs = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "txsel_selected_l1_coordinator_txs", - Help: "", - }, - ) - metricSelectedL1UserTxs = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "txsel_selected_l1_user_txs", - Help: "", - }, - ) - metricSelectedL2Txs = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "txsel_selected_l2_txs", - Help: "", - }, - ) - metricDiscardedL2Txs = prometheus.NewGauge( - prometheus.GaugeOpts{ - Name: "txsel_discarded_l2_txs", - Help: "", - }, - ) -) - -func init() { - prometheus.MustRegister(metricGetL2TxSelection) - prometheus.MustRegister(metricGetL1L2TxSelection) - - prometheus.MustRegister(metricSelectedL1CoordinatorTxs) - prometheus.MustRegister(metricSelectedL1UserTxs) - prometheus.MustRegister(metricSelectedL2Txs) - prometheus.MustRegister(metricDiscardedL2Txs) -} diff --git a/txselector/txselector.go b/txselector/txselector.go index 46338b7..1453367 100644 --- a/txselector/txselector.go +++ b/txselector/txselector.go @@ -13,6 +13,7 @@ import ( "github.com/hermeznetwork/hermez-node/db/l2db" "github.com/hermeznetwork/hermez-node/db/statedb" "github.com/hermeznetwork/hermez-node/log" + "github.com/hermeznetwork/hermez-node/metric" "github.com/hermeznetwork/hermez-node/txprocessor" "github.com/hermeznetwork/tracerr" "github.com/iden3/go-iden3-crypto/babyjub" @@ -123,7 +124,7 @@ func (txsel *TxSelector) coordAccountForTokenID(l1CoordinatorTxs []common.L1Tx, // included in the next batch. func (txsel *TxSelector) GetL2TxSelection(selectionConfig txprocessor.Config) ([]common.Idx, [][]byte, []common.L1Tx, []common.PoolL2Tx, []common.PoolL2Tx, error) { - metricGetL2TxSelection.Inc() + metric.GetL2TxSelection.Inc() coordIdxs, accCreationAuths, _, l1CoordinatorTxs, l2Txs, discardedL2Txs, err := txsel.getL1L2TxSelection(selectionConfig, []common.L1Tx{}) return coordIdxs, accCreationAuths, l1CoordinatorTxs, l2Txs, @@ -141,7 +142,7 @@ func (txsel *TxSelector) GetL2TxSelection(selectionConfig txprocessor.Config) ([ func (txsel *TxSelector) GetL1L2TxSelection(selectionConfig txprocessor.Config, l1UserTxs []common.L1Tx) ([]common.Idx, [][]byte, []common.L1Tx, []common.L1Tx, []common.PoolL2Tx, []common.PoolL2Tx, error) { - metricGetL1L2TxSelection.Inc() + metric.GetL1L2TxSelection.Inc() coordIdxs, accCreationAuths, l1UserTxs, l1CoordinatorTxs, l2Txs, discardedL2Txs, err := txsel.getL1L2TxSelection(selectionConfig, l1UserTxs) return coordIdxs, accCreationAuths, l1UserTxs, l1CoordinatorTxs, l2Txs, @@ -221,10 +222,11 @@ func (txsel *TxSelector) getL1L2TxSelection(selectionConfig txprocessor.Config, return nil, nil, nil, nil, nil, nil, tracerr.Wrap(err) } - metricSelectedL1UserTxs.Set(float64(len(l1UserTxs))) - metricSelectedL1CoordinatorTxs.Set(0) - metricSelectedL2Txs.Set(0) - metricDiscardedL2Txs.Set(float64(len(discardedL2Txs))) + metric.SelectedL1UserTxs.Set(float64(len(l1UserTxs))) + metric.SelectedL1CoordinatorTxs.Set(0) + metric.SelectedL2Txs.Set(0) + metric.DiscardedL2Txs.Set(float64(len(discardedL2Txs))) + return nil, nil, l1UserTxs, nil, nil, discardedL2Txs, nil } @@ -320,10 +322,10 @@ func (txsel *TxSelector) getL1L2TxSelection(selectionConfig txprocessor.Config, return nil, nil, nil, nil, nil, nil, tracerr.Wrap(err) } - metricSelectedL1UserTxs.Set(float64(len(l1UserTxs))) - metricSelectedL1CoordinatorTxs.Set(float64(len(l1CoordinatorTxs))) - metricSelectedL2Txs.Set(float64(len(validTxs))) - metricDiscardedL2Txs.Set(float64(len(discardedL2Txs))) + metric.SelectedL1CoordinatorTxs.Set(float64(len(l1CoordinatorTxs))) + metric.SelectedL1UserTxs.Set(float64(len(l1UserTxs))) + metric.SelectedL2Txs.Set(float64(len(validTxs))) + metric.DiscardedL2Txs.Set(float64(len(discardedL2Txs))) return coordIdxs, accAuths, l1UserTxs, l1CoordinatorTxs, validTxs, discardedL2Txs, nil } From 6d84d143a2a87838f8ccdcd9e00e8d684ea7b429 Mon Sep 17 00:00:00 2001 From: Pantani Date: Tue, 23 Mar 2021 01:40:45 -0300 Subject: [PATCH 2/3] Measure the server proof duration --- coordinator/batch.go | 1 + coordinator/pipeline.go | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/coordinator/batch.go b/coordinator/batch.go index 8df15d8..75f35ad 100644 --- a/coordinator/batch.go +++ b/coordinator/batch.go @@ -80,6 +80,7 @@ type BatchInfo struct { PipelineNum int BatchNum common.BatchNum ServerProof prover.Client + ProofStart time.Time ZKInputs *common.ZKInputs Proof *prover.Proof PublicInputs []*big.Int diff --git a/coordinator/pipeline.go b/coordinator/pipeline.go index aa690b4..939815e 100644 --- a/coordinator/pipeline.go +++ b/coordinator/pipeline.go @@ -5,6 +5,7 @@ import ( "database/sql" "fmt" "math/big" + "strconv" "sync" "time" @@ -14,6 +15,7 @@ import ( "github.com/hermeznetwork/hermez-node/db/l2db" "github.com/hermeznetwork/hermez-node/eth" "github.com/hermeznetwork/hermez-node/log" + "github.com/hermeznetwork/hermez-node/metric" "github.com/hermeznetwork/hermez-node/prover" "github.com/hermeznetwork/hermez-node/synchronizer" "github.com/hermeznetwork/hermez-node/txselector" @@ -246,6 +248,7 @@ func (p *Pipeline) handleForgeBatch(ctx context.Context, // 3. Send the ZKInputs to the proof server batchInfo.ServerProof = serverProof + batchInfo.ProofStart = time.Now() if err := p.sendServerProof(ctx, batchInfo); ctx.Err() != nil { return nil, ctx.Err() } else if err != nil { @@ -602,6 +605,9 @@ func (p *Pipeline) forgeBatch(batchNum common.BatchNum) (batchInfo *BatchInfo, // waitServerProof gets the generated zkProof & sends it to the SmartContract func (p *Pipeline) waitServerProof(ctx context.Context, batchInfo *BatchInfo) error { + defer metric.MeasureDuration(metric.WaitServerProof, batchInfo.ProofStart, + batchInfo.BatchNum.BigInt().String(), strconv.Itoa(batchInfo.PipelineNum)) + proof, pubInputs, err := batchInfo.ServerProof.GetProof(ctx) // blocking call, // until not resolved don't continue. Returns when the proof server has calculated the proof if err != nil { From 9245247ee41864943d8d3168bf5337fb49ed4c19 Mon Sep 17 00:00:00 2001 From: Pantani Date: Thu, 25 Mar 2021 09:31:29 -0300 Subject: [PATCH 3/3] create a gin middleware to collect request metrics and export them to the Prometheus route --- api/api.go | 7 +++++ api/handlers.go | 6 +++- metric/request.go | 78 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 1 deletion(-) create mode 100644 metric/request.go diff --git a/api/api.go b/api/api.go index 926bc86..89f9661 100644 --- a/api/api.go +++ b/api/api.go @@ -7,6 +7,7 @@ import ( "github.com/gin-gonic/gin" "github.com/hermeznetwork/hermez-node/db/historydb" "github.com/hermeznetwork/hermez-node/db/l2db" + "github.com/hermeznetwork/hermez-node/metric" "github.com/hermeznetwork/tracerr" ) @@ -50,6 +51,12 @@ func NewAPI( hermezAddress: consts.HermezAddress, } + middleware, err := metric.PrometheusMiddleware() + if err != nil { + return nil, err + } + server.Use(middleware) + v1 := server.Group("/v1") // Add coordinator endpoints diff --git a/api/handlers.go b/api/handlers.go index 6e9a2e9..aeefd04 100644 --- a/api/handlers.go +++ b/api/handlers.go @@ -8,6 +8,7 @@ import ( "github.com/gin-gonic/gin" "github.com/hermeznetwork/hermez-node/db/historydb" "github.com/hermeznetwork/hermez-node/log" + "github.com/hermeznetwork/hermez-node/metric" "github.com/hermeznetwork/tracerr" "github.com/lib/pq" "github.com/russross/meddler" @@ -46,7 +47,9 @@ var ( func retSQLErr(err error, c *gin.Context) { log.Warnw("HTTP API SQL request error", "err", err) - errMsg := tracerr.Unwrap(err).Error() + unwrapErr := tracerr.Unwrap(err) + metric.CollectError(unwrapErr) + errMsg := unwrapErr.Error() retDupKey := func(errCode pq.ErrorCode) { // https://www.postgresql.org/docs/current/errcodes-appendix.html if errCode == "23505" { @@ -80,6 +83,7 @@ func retSQLErr(err error, c *gin.Context) { func retBadReq(err error, c *gin.Context) { log.Warnw("HTTP API Bad request error", "err", err) + metric.CollectError(err) c.JSON(http.StatusBadRequest, errorMsg{ Message: err.Error(), }) diff --git a/metric/request.go b/metric/request.go new file mode 100644 index 0000000..b9dcf8d --- /dev/null +++ b/metric/request.go @@ -0,0 +1,78 @@ +package metric + +import ( + "strconv" + "time" + + "github.com/gin-gonic/gin" + "github.com/prometheus/client_golang/prometheus" +) + +const ( + favicon = "/favicon.ico" +) + +// Prometheus contains the metrics gathered by the instance and its path +type Prometheus struct { + reqCnt *prometheus.CounterVec + reqDur *prometheus.HistogramVec +} + +// NewPrometheus generates a new set of metrics with a certain subsystem name +func NewPrometheus() (*Prometheus, error) { + reqCnt := prometheus.NewCounterVec( + prometheus.CounterOpts{ + Namespace: namespaceAPI, + Name: "requests_total", + Help: "How many HTTP requests processed, partitioned by status code and HTTP method", + }, + []string{"code", "method", "path"}, + ) + if err := registerCollector(reqCnt); err != nil { + return nil, err + } + reqDur := prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Namespace: namespaceAPI, + Name: "request_duration_seconds", + Help: "The HTTP request latencies in seconds", + }, + []string{"code", "method", "path"}, + ) + if err := registerCollector(reqDur); err != nil { + return nil, err + } + return &Prometheus{ + reqCnt: reqCnt, + reqDur: reqDur, + }, nil +} + +// PrometheusMiddleware creates the prometheus collector and +// defines status handler function for the middleware +func PrometheusMiddleware() (gin.HandlerFunc, error) { + p, err := NewPrometheus() + if err != nil { + return nil, err + } + return p.Middleware(), nil +} + +// Middleware defines status handler function for middleware +func (p *Prometheus) Middleware() gin.HandlerFunc { + return func(c *gin.Context) { + if c.Request.URL.Path == favicon { + c.Next() + return + } + start := time.Now() + c.Next() + + status := strconv.Itoa(c.Writer.Status()) + elapsed := float64(time.Since(start)) / float64(time.Second) + fullPath := c.FullPath() + + p.reqDur.WithLabelValues(status, c.Request.Method, fullPath).Observe(elapsed) + p.reqCnt.WithLabelValues(status, c.Request.Method, fullPath).Inc() + } +}