From 4167583b8d7fde9d72e8f31788cf92affcbf1992 Mon Sep 17 00:00:00 2001 From: arnaucube Date: Sun, 23 May 2021 16:19:04 +0200 Subject: [PATCH] Add dbgStats metrics Add dbgStats metrics to analyze number of Hashes, db Gets, and db Puts. Current benchmarks: ``` CASE A, AddBatch was 8.841700 times faster than without AddBatch nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory dbgStats(hash: 2.044k, dbGet: 1, dbPut: 2.049k) CASE B, AddBatch was 7.678766 times faster than without AddBatch nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory dbgStats(hash: 2.044k, dbGet: 199, dbPut: 2.049k) CASE C, AddBatch was 8.401087 times faster than without AddBatch nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory dbgStats(hash: 2.044k, dbGet: 207, dbPut: 2.049k) CASE D, AddBatch was 2.466346 times faster than without AddBatch nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory dbgStats(hash: 33.884k, dbGet: 30.697k, dbPut: 33.889k) CASE E, AddBatch was 1.958160 times faster than without AddBatch nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory dbgStats(hash: 41.419k, dbGet: 37.558k, dbPut: 41.874k) TestAddBatchBench: nCPU: 4, nLeafs: 50000, hash: Blake2b, db: leveldb Add loop: 10.089858449s dbgStats(hash: 825.285k, dbGet: 788.869k, dbPut: 925.285k) AddBatch: 904.647829ms dbgStats(hash: 122.458k, dbGet: 1, dbPut: 122.463k) TestDbgStats add in loop dbgStats(hash: 141.915k, dbGet: 134.602k, dbPut: 161.915k) addbatch caseA dbgStats(hash: 24.528k, dbGet: 1, dbPut: 24.533k) addbatch caseD dbgStats(hash: 115.506k, dbGet: 97.482k, dbPut: 115.516k) ``` --- addbatch.go | 85 +++++++++++++++++++++-------- addbatch_test.go | 135 +++++++++++++++++++++++++++++++++++++++++------ dbg.go | 67 +++++++++++++++++++++++ tree.go | 54 ++++++++++++++----- vt.go | 15 +++++- 5 files changed, 304 insertions(+), 52 deletions(-) create mode 100644 dbg.go diff --git a/addbatch.go b/addbatch.go index 3d69f4f..6fccc7f 100644 --- a/addbatch.go +++ b/addbatch.go @@ -153,10 +153,6 @@ func (t *Tree) AddBatch(keys, values [][]byte) ([]int, error) { t.Lock() defer t.Unlock() - // when len(keyvalues) is not a power of 2, cut at the biggest power of - // 2 under the len(keys), add those 2**n key-values using the AddBatch - // approach, and then add the remaining key-values using tree.Add. - kvs, err := t.keysValuesToKvs(keys, values) if err != nil { return nil, err @@ -258,7 +254,7 @@ func (t *Tree) AddBatch(keys, values [][]byte) ([]int, error) { func (t *Tree) finalizeAddBatch(nKeys int, invalids []int) ([]int, error) { // store root to db - if err := t.tx.Put(dbKeyRoot, t.root); err != nil { + if err := t.dbPut(dbKeyRoot, t.root); err != nil { return nil, err } @@ -275,7 +271,7 @@ func (t *Tree) finalizeAddBatch(nKeys int, invalids []int) ([]int, error) { } func (t *Tree) caseA(nCPU int, kvs []kv) ([]int, error) { - invalids, err := t.buildTreeBottomUp(nCPU, kvs) + invalids, err := t.buildTreeFromLeafs(nCPU, kvs) if err != nil { return nil, err } @@ -301,17 +297,18 @@ func (t *Tree) caseB(nCPU, l int, kvs []kv) ([]int, error) { var invalids2 []int if nCPU > 1 { - invalids2, err = t.buildTreeBottomUp(nCPU, kvs) + invalids2, err = t.buildTreeFromLeafs(nCPU, kvs) if err != nil { return nil, err } } else { - invalids2, err = t.buildTreeBottomUpSingleThread(l, kvs) + invalids2, err = t.buildTreeFromLeafsSingleThread(l, kvs) if err != nil { return nil, err } } invalids = append(invalids, invalids2...) + return invalids, nil } @@ -323,6 +320,7 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { // 2. use keys at level L as roots of the subtrees under each one subRoots := make([][]byte, nCPU) + dbgStatsPerBucket := make([]*dbgStats, nCPU) txs := make([]db.Tx, nCPU) var wg sync.WaitGroup wg.Add(nCPU) @@ -337,7 +335,8 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { panic(err) // TODO } bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels, - hashFunction: t.hashFunction, root: keysAtL[cpu]} + hashFunction: t.hashFunction, root: keysAtL[cpu], + emptyHash: t.emptyHash, dbg: newDbgStats()} // 3. do CASE B (with 1 cpu) for each key at level L _, err = bucketTree.caseB(1, l, buckets[cpu]) // TODO handle invalids @@ -346,6 +345,7 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { // return nil, err } subRoots[cpu] = bucketTree.root + dbgStatsPerBucket[cpu] = bucketTree.dbg wg.Done() }(i) } @@ -372,6 +372,11 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { invalids = append(invalids, excedents[i].pos) } } + + for i := 0; i < len(dbgStatsPerBucket); i++ { + t.dbg.add(dbgStatsPerBucket[i]) + } + return invalids, nil } @@ -390,6 +395,7 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { subRoots := make([][]byte, nCPU) invalidsInBucket := make([][]int, nCPU) + dbgStatsPerBucket := make([]*dbgStats, nCPU) txs := make([]db.Tx, nCPU) var wg sync.WaitGroup @@ -409,7 +415,8 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { } bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels - l, - hashFunction: t.hashFunction, root: keysAtL[cpu]} + hashFunction: t.hashFunction, root: keysAtL[cpu], + emptyHash: t.emptyHash, dbg: newDbgStats()} for j := 0; j < len(buckets[cpu]); j++ { if err = bucketTree.add(l, buckets[cpu][j].k, buckets[cpu][j].v); err != nil { @@ -417,6 +424,7 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { } } subRoots[cpu] = bucketTree.root + dbgStatsPerBucket[cpu] = bucketTree.dbg wg.Done() }(i) } @@ -440,6 +448,10 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { invalids = append(invalids, invalidsInBucket[i]...) } + for i := 0; i < len(dbgStatsPerBucket); i++ { + t.dbg.add(dbgStatsPerBucket[i]) + } + return invalids, nil } @@ -539,17 +551,18 @@ func (t *Tree) kvsToKeysValues(kvs []kv) ([][]byte, [][]byte) { } */ -// buildTreeBottomUp splits the key-values into n Buckets (where n is the number +// buildTreeFromLeafs splits the key-values into n Buckets (where n is the number // of CPUs), in parallel builds a subtree for each bucket, once all the subtrees // are built, uses the subtrees roots as keys for a new tree, which as result // will have the complete Tree build from bottom to up, where until the // log2(nCPU) level it has been computed in parallel. -func (t *Tree) buildTreeBottomUp(nCPU int, kvs []kv) ([]int, error) { +func (t *Tree) buildTreeFromLeafs(nCPU int, kvs []kv) ([]int, error) { l := int(math.Log2(float64(nCPU))) buckets := splitInBuckets(kvs, nCPU) subRoots := make([][]byte, nCPU) invalidsInBucket := make([][]int, nCPU) + dbgStatsPerBucket := make([]*dbgStats, nCPU) txs := make([]db.Tx, nCPU) var wg sync.WaitGroup @@ -567,14 +580,16 @@ func (t *Tree) buildTreeBottomUp(nCPU int, kvs []kv) ([]int, error) { panic(err) // TODO } bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels, - hashFunction: t.hashFunction, root: t.emptyHash} + hashFunction: t.hashFunction, root: t.emptyHash, + emptyHash: t.emptyHash, dbg: newDbgStats()} - currInvalids, err := bucketTree.buildTreeBottomUpSingleThread(l, buckets[cpu]) + currInvalids, err := bucketTree.buildTreeFromLeafsSingleThread(l, buckets[cpu]) if err != nil { panic(err) // TODO } invalidsInBucket[cpu] = currInvalids subRoots[cpu] = bucketTree.root + dbgStatsPerBucket[cpu] = bucketTree.dbg wg.Done() }(i) } @@ -598,12 +613,16 @@ func (t *Tree) buildTreeBottomUp(nCPU int, kvs []kv) ([]int, error) { invalids = append(invalids, invalidsInBucket[i]...) } + for i := 0; i < len(dbgStatsPerBucket); i++ { + t.dbg.add(dbgStatsPerBucket[i]) + } + return invalids, err } -// buildTreeBottomUpSingleThread builds the tree with the given []kv from bottom +// buildTreeFromLeafsSingleThread builds the tree with the given []kv from bottom // to the root -func (t *Tree) buildTreeBottomUpSingleThread(l int, kvsRaw []kv) ([]int, error) { +func (t *Tree) buildTreeFromLeafsSingleThread(l int, kvsRaw []kv) ([]int, error) { // TODO check that log2(len(leafs)) < t.maxLevels, if not, maxLevels // would be reached and should return error if len(kvsRaw) == 0 { @@ -611,23 +630,27 @@ func (t *Tree) buildTreeBottomUpSingleThread(l int, kvsRaw []kv) ([]int, error) } vt := newVT(t.maxLevels, t.hashFunction) + if t.dbg != nil { + vt.params.dbg = newDbgStats() + } for i := 0; i < len(kvsRaw); i++ { if err := vt.add(l, kvsRaw[i].k, kvsRaw[i].v); err != nil { return nil, err } } - pairs, err := vt.computeHashes() if err != nil { return nil, err } + // store pairs in db for i := 0; i < len(pairs); i++ { - if err := t.tx.Put(pairs[i][0], pairs[i][1]); err != nil { + if err := t.dbPut(pairs[i][0], pairs[i][1]); err != nil { return nil, err } } + t.dbg.add(vt.params.dbg) // set tree.root from the virtual tree root t.root = vt.root.h @@ -654,7 +677,7 @@ func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) { return nil, err } // store k-v to db - if err = t.tx.Put(k, v); err != nil { + if err = t.dbPut(k, v); err != nil { return nil, err } rKs = append(rKs, k) @@ -727,9 +750,25 @@ func combineInKVSet(base, toAdd []kv) ([]kv, []int) { return r, invalids } -// TODO WIP -// func loadDBTreeToVirtualTree() error { -// return nil +// loadVT loads a new virtual tree (vt) from the current Tree, which contains +// the same leafs. +// func (t *Tree) loadVT() (vt, error) { +// vt := newVT(t.maxLevels, t.hashFunction) +// vt.params.dbg = t.dbg +// err := t.Iterate(func(k, v []byte) { +// switch v[0] { +// case PrefixValueEmpty: +// case PrefixValueLeaf: +// leafK, leafV := ReadLeafValue(v) +// if err := vt.add(0, leafK, leafV); err != nil { +// panic(err) +// } +// case PrefixValueIntermediate: +// default: +// } +// }) +// +// return vt, err // } // func computeSimpleAddCost(nLeafs int) int { @@ -738,7 +777,7 @@ func combineInKVSet(base, toAdd []kv) ([]kv, []int) { // return nLvls * int(math.Pow(2, float64(nLvls))) // } // -// func computeBottomUpAddCost(nLeafs int) int { +// func computeFromLeafsAddCost(nLeafs int) int { // // 2^nLvls * 2 - 1 // nLvls := int(math.Log2(float64(nLeafs))) // return (int(math.Pow(2, float64(nLvls))) * 2) - 1 diff --git a/addbatch_test.go b/addbatch_test.go index d23337a..68cbb97 100644 --- a/addbatch_test.go +++ b/addbatch_test.go @@ -14,7 +14,20 @@ import ( "github.com/iden3/go-merkletree/db/memory" ) -var debug = true +var debug = false + +func printTestContext(prefix string, nLeafs int, hashName, dbName string) { + if debug { + fmt.Printf("%snCPU: %d, nLeafs: %d, hash: %s, db: %s\n", + prefix, runtime.NumCPU(), nLeafs, hashName, dbName) + } +} + +func printRes(name string, duration time.Duration) { + if debug { + fmt.Printf("%s: %s \n", name, duration) + } +} func debugTime(descr string, time1, time2 time.Duration) { if debug { @@ -69,6 +82,7 @@ func TestAddBatchCaseA(t *testing.T) { tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) c.Assert(err, qt.IsNil) defer tree2.db.Close() + tree2.dbgInit() var keys, values [][]byte for i := 0; i < nLeafs; i++ { @@ -81,7 +95,11 @@ func TestAddBatchCaseA(t *testing.T) { indexes, err := tree2.AddBatch(keys, values) c.Assert(err, qt.IsNil) time2 := time.Since(start) - debugTime("CASE A, AddBatch", time1, time2) + if debug { + debugTime("CASE A, AddBatch", time1, time2) + printTestContext(" ", nLeafs, "Poseidon", "memory") + tree2.dbg.print(" ") + } c.Check(len(indexes), qt.Equals, 0) // check that both trees roots are equal @@ -133,7 +151,7 @@ func randomBytes(n int) []byte { return b } -func TestBuildTreeBottomUpSingleThread(t *testing.T) { +func TestBuildTreeFromLeafsSingleThread(t *testing.T) { c := qt.New(t) tree1, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b) c.Assert(err, qt.IsNil) @@ -169,8 +187,8 @@ func TestBuildTreeBottomUpSingleThread(t *testing.T) { tree2.tx, err = tree2.db.NewTx() c.Assert(err, qt.IsNil) - // indexes, err := tree2.buildTreeBottomUpSingleThread(kvs) - indexes, err := tree2.buildTreeBottomUp(4, kvs) + // indexes, err := tree2.buildTreeFromLeafsSingleThread(kvs) + indexes, err := tree2.buildTreeFromLeafs(4, kvs) c.Assert(err, qt.IsNil) // tree1.PrintGraphviz(nil) // tree2.PrintGraphviz(nil) @@ -280,6 +298,7 @@ func TestAddBatchCaseB(t *testing.T) { initialNLeafs := 99 // TMP TODO use const minLeafsThreshold-1 once ready tree1, tree2 := testInit(c, initialNLeafs) + tree2.dbgInit() start := time.Now() for i := initialNLeafs; i < nLeafs; i++ { @@ -303,7 +322,11 @@ func TestAddBatchCaseB(t *testing.T) { indexes, err := tree2.AddBatch(keys, values) c.Assert(err, qt.IsNil) time2 := time.Since(start) - debugTime("CASE B, AddBatch", time1, time2) + if debug { + debugTime("CASE B, AddBatch", time1, time2) + printTestContext(" ", nLeafs, "Poseidon", "memory") + tree2.dbg.print(" ") + } c.Check(len(indexes), qt.Equals, 0) // check that both trees roots are equal @@ -500,6 +523,7 @@ func TestAddBatchCaseC(t *testing.T) { initialNLeafs := 101 // TMP TODO use const minLeafsThreshold+1 once ready tree1, tree2 := testInit(c, initialNLeafs) + tree2.dbgInit() start := time.Now() for i := initialNLeafs; i < nLeafs; i++ { @@ -523,7 +547,11 @@ func TestAddBatchCaseC(t *testing.T) { indexes, err := tree2.AddBatch(keys, values) c.Assert(err, qt.IsNil) time2 := time.Since(start) - debugTime("CASE C, AddBatch", time1, time2) + if debug { + debugTime("CASE C, AddBatch", time1, time2) + printTestContext(" ", nLeafs, "Poseidon", "memory") + tree2.dbg.print(" ") + } c.Check(len(indexes), qt.Equals, 0) // check that both trees roots are equal @@ -537,6 +565,7 @@ func TestAddBatchCaseD(t *testing.T) { initialNLeafs := 900 tree1, tree2 := testInit(c, initialNLeafs) + tree2.dbgInit() start := time.Now() for i := initialNLeafs; i < nLeafs; i++ { @@ -560,7 +589,11 @@ func TestAddBatchCaseD(t *testing.T) { indexes, err := tree2.AddBatch(keys, values) c.Assert(err, qt.IsNil) time2 := time.Since(start) - debugTime("CASE D, AddBatch", time1, time2) + if debug { + debugTime("CASE D, AddBatch", time1, time2) + printTestContext(" ", nLeafs, "Poseidon", "memory") + tree2.dbg.print(" ") + } c.Check(len(indexes), qt.Equals, 0) // check that both trees roots are equal @@ -588,6 +621,7 @@ func TestAddBatchCaseE(t *testing.T) { tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) c.Assert(err, qt.IsNil) defer tree2.db.Close() + tree2.dbgInit() var keys, values [][]byte // add the initial leafs to fill a bit the tree before calling the @@ -617,7 +651,11 @@ func TestAddBatchCaseE(t *testing.T) { indexes, err := tree2.AddBatch(keys, values) c.Assert(err, qt.IsNil) time2 := time.Since(start) - debugTime("CASE E, AddBatch", time1, time2) + if debug { + debugTime("CASE E, AddBatch", time1, time2) + printTestContext(" ", nLeafs, "Poseidon", "memory") + tree2.dbg.print(" ") + } c.Check(len(indexes), qt.Equals, 0) // check that both trees roots are equal @@ -636,8 +674,7 @@ func TestFlp2(t *testing.T) { func TestAddBatchBench(t *testing.T) { nLeafs := 50_000 - fmt.Printf("TestAddBatchBench\n nCPU: %d, nLeafs: %d, hash: Blake2b, db: leveldb\n", - runtime.NumCPU(), nLeafs) + printTestContext("TestAddBatchBench: ", nLeafs, "Blake2b", "leveldb") // prepare inputs var ks, vs [][]byte @@ -662,12 +699,16 @@ func benchAdd(t *testing.T, ks, vs [][]byte) { tree, err := NewTree(storage, 140, HashFunctionBlake2b) c.Assert(err, qt.IsNil) + if debug { + tree.dbgInit() + } start := time.Now() for i := 0; i < len(ks); i++ { err = tree.Add(ks[i], vs[i]) c.Assert(err, qt.IsNil) } printRes(" Add loop", time.Since(start)) + tree.dbg.print(" ") } func benchAddBatch(t *testing.T, ks, vs [][]byte) { @@ -679,16 +720,80 @@ func benchAddBatch(t *testing.T, ks, vs [][]byte) { tree, err := NewTree(storage, 140, HashFunctionBlake2b) c.Assert(err, qt.IsNil) + if debug { + tree.dbgInit() + } start := time.Now() invalids, err := tree.AddBatch(ks, vs) printRes(" AddBatch", time.Since(start)) c.Assert(err, qt.IsNil) c.Assert(len(invalids), qt.Equals, 0) + tree.dbg.print(" ") } -func printRes(name string, duration time.Duration) { +func TestDbgStats(t *testing.T) { + c := qt.New(t) + + nLeafs := 10_000 + + // prepare inputs + var ks, vs [][]byte + for i := 0; i < nLeafs; i++ { + k := randomBytes(32) + v := randomBytes(32) + ks = append(ks, k) + vs = append(vs, v) + } + + // 1 + tree1, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b) + c.Assert(err, qt.IsNil) + defer tree1.db.Close() + + tree1.dbgInit() + + for i := 0; i < len(ks); i++ { + err = tree1.Add(ks[i], vs[i]) + c.Assert(err, qt.IsNil) + } + + // 2 + tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b) + c.Assert(err, qt.IsNil) + defer tree2.db.Close() + + tree2.dbgInit() + + invalids, err := tree2.AddBatch(ks, vs) + c.Assert(err, qt.IsNil) + c.Assert(len(invalids), qt.Equals, 0) + + // 3 + tree3, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b) + c.Assert(err, qt.IsNil) + defer tree3.db.Close() + + tree3.dbgInit() + + // add few key-values + // invalids, err = tree3.AddBatch(ks[:], vs[:]) + invalids, err = tree3.AddBatch(ks[:1000], vs[:1000]) + c.Assert(err, qt.IsNil) + c.Assert(len(invalids), qt.Equals, 0) + + // add the rest of key-values + invalids, err = tree3.AddBatch(ks[1000:], vs[1000:]) + c.Assert(err, qt.IsNil) + c.Assert(len(invalids), qt.Equals, 0) + + c.Check(tree2.Root(), qt.DeepEquals, tree1.Root()) + c.Check(tree3.Root(), qt.DeepEquals, tree1.Root()) + if debug { - fmt.Printf(" %s: %s \n", name, duration) + fmt.Println("TestDbgStats") + tree1.dbg.print(" add in loop ") + tree2.dbg.print(" addbatch caseA ") + tree3.dbg.print(" addbatch caseD ") } } @@ -714,10 +819,10 @@ func printRes(name string, duration time.Duration) { // func TestComputeCosts(t *testing.T) { // fmt.Println(computeSimpleAddCost(10)) -// fmt.Println(computeBottomUpAddCost(10)) +// fmt.Println(computeFromLeafsAddCost(10)) // // fmt.Println(computeSimpleAddCost(1024)) -// fmt.Println(computeBottomUpAddCost(1024)) +// fmt.Println(computeFromLeafsAddCost(1024)) // } // TODO test tree with nLeafs > minLeafsThreshold, but that at level L, there is diff --git a/dbg.go b/dbg.go new file mode 100644 index 0000000..8f0cc2e --- /dev/null +++ b/dbg.go @@ -0,0 +1,67 @@ +package arbo + +import "fmt" + +// dbgStats is for debug purposes +type dbgStats struct { + hash int + dbGet int + dbPut int +} + +func (t *Tree) dbgInit() { + t.dbg = newDbgStats() +} + +func newDbgStats() *dbgStats { + return &dbgStats{ + hash: 0, + dbGet: 0, + dbPut: 0, + } +} + +func (d *dbgStats) incHash() { + if d == nil { + return + } + d.hash++ +} + +func (d *dbgStats) incDbGet() { + if d == nil { + return + } + d.dbGet++ +} + +func (d *dbgStats) incDbPut() { + if d == nil { + return + } + d.dbPut++ +} + +func (d *dbgStats) add(d2 *dbgStats) { + if d == nil || d2 == nil { + return + } + d.hash += d2.hash + d.dbGet += d2.dbGet + d.dbPut += d2.dbPut +} + +func (d *dbgStats) print(prefix string) { + if d == nil { + return + } + fmt.Printf("%sdbgStats(hash: %s, dbGet: %s, dbPut: %s)\n", + prefix, formatK(d.hash), formatK(d.dbGet), formatK(d.dbPut)) +} + +func formatK(v int) string { + if v/1000 > 0 { + return fmt.Sprintf("%.3fk", float64(v)/1000) //nolint:gomnd + } + return fmt.Sprintf("%d", v) +} diff --git a/tree.go b/tree.go index ce45bc0..24106bd 100644 --- a/tree.go +++ b/tree.go @@ -57,7 +57,11 @@ type Tree struct { root []byte hashFunction HashFunction - emptyHash []byte + // TODO in the methods that use it, check if emptyHash param is len>0 + // (check if it has been initialized) + emptyHash []byte + + dbg *dbgStats } // NewTree returns a new Tree, if there is a Tree still in the given storage, it @@ -76,7 +80,7 @@ func NewTree(storage db.Storage, maxLevels int, hash HashFunction) (*Tree, error return nil, err } t.root = t.emptyHash - if err = t.tx.Put(dbKeyRoot, t.root); err != nil { + if err = t.dbPut(dbKeyRoot, t.root); err != nil { return nil, err } if err = t.setNLeafs(0); err != nil { @@ -132,7 +136,7 @@ func (t *Tree) Add(k, v []byte) error { return err } // store root to db - if err := t.tx.Put(dbKeyRoot, t.root); err != nil { + if err := t.dbPut(dbKeyRoot, t.root); err != nil { return err } // update nLeafs @@ -156,12 +160,12 @@ func (t *Tree) add(fromLvl int, k, v []byte) error { return err } - leafKey, leafValue, err := newLeafValue(t.hashFunction, k, v) + leafKey, leafValue, err := t.newLeafValue(k, v) if err != nil { return err } - if err := t.tx.Put(leafKey, leafValue); err != nil { + if err := t.dbPut(leafKey, leafValue); err != nil { return err } @@ -186,6 +190,7 @@ func (t *Tree) down(newKey, currKey []byte, siblings [][]byte, if currLvl > t.maxLevels-1 { return nil, nil, nil, fmt.Errorf("max level") } + var err error var currValue []byte if bytes.Equal(currKey, t.emptyHash) { @@ -205,6 +210,9 @@ func (t *Tree) down(newKey, currKey []byte, siblings [][]byte, panic("should not be reached, as the 'if' above should avoid reaching this point") // TMP case PrefixValueLeaf: // leaf if bytes.Equal(newKey, currKey) { + // TODO move this error msg to const & add test that + // checks that adding a repeated key this error is + // returned return nil, nil, nil, fmt.Errorf("key already exists") } @@ -275,18 +283,18 @@ func (t *Tree) up(key []byte, siblings [][]byte, path []bool, currLvl, toLvl int var k, v []byte var err error if path[currLvl+toLvl] { - k, v, err = newIntermediate(t.hashFunction, siblings[currLvl], key) + k, v, err = t.newIntermediate(siblings[currLvl], key) if err != nil { return nil, err } } else { - k, v, err = newIntermediate(t.hashFunction, key, siblings[currLvl]) + k, v, err = t.newIntermediate(key, siblings[currLvl]) if err != nil { return nil, err } } // store k-v to db - if err = t.tx.Put(k, v); err != nil { + if err = t.dbPut(k, v); err != nil { return nil, err } @@ -298,6 +306,11 @@ func (t *Tree) up(key []byte, siblings [][]byte, path []bool, currLvl, toLvl int return t.up(k, siblings, path, currLvl-1, toLvl) } +func (t *Tree) newLeafValue(k, v []byte) ([]byte, []byte, error) { + t.dbg.incHash() + return newLeafValue(t.hashFunction, k, v) +} + func newLeafValue(hashFunc HashFunction, k, v []byte) ([]byte, []byte, error) { leafKey, err := hashFunc.Hash(k, v, []byte{1}) if err != nil { @@ -326,6 +339,11 @@ func ReadLeafValue(b []byte) ([]byte, []byte) { return k, v } +func (t *Tree) newIntermediate(l, r []byte) ([]byte, []byte, error) { + t.dbg.incHash() + return newIntermediate(t.hashFunction, l, r) +} + func newIntermediate(hashFunc HashFunction, l, r []byte) ([]byte, []byte, error) { b := make([]byte, PrefixValueLen+hashFunc.Len()*2) b[0] = 2 @@ -392,12 +410,12 @@ func (t *Tree) Update(k, v []byte) error { return fmt.Errorf("key %s does not exist", hex.EncodeToString(k)) } - leafKey, leafValue, err := newLeafValue(t.hashFunction, k, v) + leafKey, leafValue, err := t.newLeafValue(k, v) if err != nil { return err } - if err := t.tx.Put(leafKey, leafValue); err != nil { + if err := t.dbPut(leafKey, leafValue); err != nil { return err } @@ -413,7 +431,7 @@ func (t *Tree) Update(k, v []byte) error { t.root = root // store root to db - if err := t.tx.Put(dbKeyRoot, t.root); err != nil { + if err := t.dbPut(dbKeyRoot, t.root); err != nil { return err } return t.tx.Commit() @@ -535,7 +553,8 @@ func (t *Tree) Get(k []byte) ([]byte, []byte, error) { } leafK, leafV := ReadLeafValue(value) if !bytes.Equal(k, leafK) { - panic(fmt.Errorf("%s != %s", BytesToBigInt(k), BytesToBigInt(leafK))) + panic(fmt.Errorf("Tree.Get error: keys doesn't match, %s != %s", + BytesToBigInt(k), BytesToBigInt(leafK))) } return leafK, leafV, nil @@ -577,11 +596,20 @@ func CheckProof(hashFunc HashFunction, k, v, root, packedSiblings []byte) (bool, return false, nil } +func (t *Tree) dbPut(k, v []byte) error { + if t.tx == nil { + return fmt.Errorf("dbPut error: no db Tx") + } + t.dbg.incDbPut() + return t.tx.Put(k, v) +} + func (t *Tree) dbGet(k []byte) ([]byte, error) { // if key is empty, return empty as value if bytes.Equal(k, t.emptyHash) { return t.emptyHash, nil } + t.dbg.incDbGet() v, err := t.db.Get(k) if err == nil { @@ -609,7 +637,7 @@ func (t *Tree) incNLeafs(nLeafs int) error { func (t *Tree) setNLeafs(nLeafs int) error { b := make([]byte, 8) binary.LittleEndian.PutUint64(b, uint64(nLeafs)) - if err := t.tx.Put(dbKeyNLeafs, b); err != nil { + if err := t.dbPut(dbKeyNLeafs, b); err != nil { return err } return nil diff --git a/vt.go b/vt.go index 304480f..a8b4740 100644 --- a/vt.go +++ b/vt.go @@ -24,11 +24,13 @@ type params struct { maxLevels int hashFunction HashFunction emptyHash []byte + dbg *dbgStats } // vt stands for virtual tree. It's a tree that does not have any computed hash // while placing the leafs. Once all the leafs are placed, it computes all the -// hashes. In this way, each node hash is only computed one time. +// hashes. In this way, each node hash is only computed one time (at the end) +// and the tree is computed in memory. type vt struct { root *node params *params @@ -45,6 +47,15 @@ func newVT(maxLevels int, hash HashFunction) vt { } } +// WIP +// func (t *vt) addBatch(fromLvl int, k, v []byte) error { +// // parallelize adding leafs in the virtual tree +// nCPU := flp2(runtime.NumCPU()) +// l := int(math.Log2(float64(nCPU))) +// +// return nil +// } + func (t *vt) add(fromLvl int, k, v []byte) error { leaf := newLeafNode(t.params, k, v) if t.root == nil { @@ -205,6 +216,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error) t := n.typ() switch t { case vtLeaf: + p.dbg.incHash() leafKey, leafValue, err := newLeafValue(p.hashFunction, n.k, n.v) if err != nil { return pairs, err @@ -235,6 +247,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error) } // once the sub nodes are computed, can compute the current node // hash + p.dbg.incHash() k, v, err := newIntermediate(p.hashFunction, n.l.h, n.r.h) if err != nil { return nil, err