Browse Source

Add dbgStats metrics

Add dbgStats metrics to analyze number of Hashes, db Gets, and db Puts.

Current benchmarks:
```
CASE A, AddBatch was 8.841700 times faster than without AddBatch
	nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory
	dbgStats(hash: 2.044k, dbGet: 1, dbPut: 2.049k)
CASE B, AddBatch was 7.678766 times faster than without AddBatch
	nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory
	dbgStats(hash: 2.044k, dbGet: 199, dbPut: 2.049k)
CASE C, AddBatch was 8.401087 times faster than without AddBatch
	nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory
	dbgStats(hash: 2.044k, dbGet: 207, dbPut: 2.049k)
CASE D, AddBatch was 2.466346 times faster than without AddBatch
	nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory
	dbgStats(hash: 33.884k, dbGet: 30.697k, dbPut: 33.889k)
CASE E, AddBatch was 1.958160 times faster than without AddBatch
	nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory
	dbgStats(hash: 41.419k, dbGet: 37.558k, dbPut: 41.874k)
TestAddBatchBench: nCPU: 4, nLeafs: 50000, hash: Blake2b, db: leveldb
	Add loop:	10.089858449s
		dbgStats(hash: 825.285k, dbGet: 788.869k, dbPut: 925.285k)
	AddBatch:	904.647829ms
		dbgStats(hash: 122.458k, dbGet: 1, dbPut: 122.463k)
TestDbgStats
	add in loop    dbgStats(hash: 141.915k, dbGet: 134.602k, dbPut: 161.915k)
	addbatch caseA dbgStats(hash: 24.528k, dbGet: 1, dbPut: 24.533k)
	addbatch caseD dbgStats(hash: 115.506k, dbGet: 97.482k, dbPut: 115.516k)
```
master
arnaucube 3 years ago
parent
commit
4167583b8d
5 changed files with 304 additions and 52 deletions
  1. +62
    -23
      addbatch.go
  2. +120
    -15
      addbatch_test.go
  3. +67
    -0
      dbg.go
  4. +41
    -13
      tree.go
  5. +14
    -1
      vt.go

+ 62
- 23
addbatch.go

@ -153,10 +153,6 @@ func (t *Tree) AddBatch(keys, values [][]byte) ([]int, error) {
t.Lock() t.Lock()
defer t.Unlock() defer t.Unlock()
// when len(keyvalues) is not a power of 2, cut at the biggest power of
// 2 under the len(keys), add those 2**n key-values using the AddBatch
// approach, and then add the remaining key-values using tree.Add.
kvs, err := t.keysValuesToKvs(keys, values) kvs, err := t.keysValuesToKvs(keys, values)
if err != nil { if err != nil {
return nil, err return nil, err
@ -258,7 +254,7 @@ func (t *Tree) AddBatch(keys, values [][]byte) ([]int, error) {
func (t *Tree) finalizeAddBatch(nKeys int, invalids []int) ([]int, error) { func (t *Tree) finalizeAddBatch(nKeys int, invalids []int) ([]int, error) {
// store root to db // store root to db
if err := t.tx.Put(dbKeyRoot, t.root); err != nil {
if err := t.dbPut(dbKeyRoot, t.root); err != nil {
return nil, err return nil, err
} }
@ -275,7 +271,7 @@ func (t *Tree) finalizeAddBatch(nKeys int, invalids []int) ([]int, error) {
} }
func (t *Tree) caseA(nCPU int, kvs []kv) ([]int, error) { func (t *Tree) caseA(nCPU int, kvs []kv) ([]int, error) {
invalids, err := t.buildTreeBottomUp(nCPU, kvs)
invalids, err := t.buildTreeFromLeafs(nCPU, kvs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -301,17 +297,18 @@ func (t *Tree) caseB(nCPU, l int, kvs []kv) ([]int, error) {
var invalids2 []int var invalids2 []int
if nCPU > 1 { if nCPU > 1 {
invalids2, err = t.buildTreeBottomUp(nCPU, kvs)
invalids2, err = t.buildTreeFromLeafs(nCPU, kvs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} else { } else {
invalids2, err = t.buildTreeBottomUpSingleThread(l, kvs)
invalids2, err = t.buildTreeFromLeafsSingleThread(l, kvs)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
invalids = append(invalids, invalids2...) invalids = append(invalids, invalids2...)
return invalids, nil return invalids, nil
} }
@ -323,6 +320,7 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
// 2. use keys at level L as roots of the subtrees under each one // 2. use keys at level L as roots of the subtrees under each one
subRoots := make([][]byte, nCPU) subRoots := make([][]byte, nCPU)
dbgStatsPerBucket := make([]*dbgStats, nCPU)
txs := make([]db.Tx, nCPU) txs := make([]db.Tx, nCPU)
var wg sync.WaitGroup var wg sync.WaitGroup
wg.Add(nCPU) wg.Add(nCPU)
@ -337,7 +335,8 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
panic(err) // TODO panic(err) // TODO
} }
bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels, bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels,
hashFunction: t.hashFunction, root: keysAtL[cpu]}
hashFunction: t.hashFunction, root: keysAtL[cpu],
emptyHash: t.emptyHash, dbg: newDbgStats()}
// 3. do CASE B (with 1 cpu) for each key at level L // 3. do CASE B (with 1 cpu) for each key at level L
_, err = bucketTree.caseB(1, l, buckets[cpu]) // TODO handle invalids _, err = bucketTree.caseB(1, l, buckets[cpu]) // TODO handle invalids
@ -346,6 +345,7 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
// return nil, err // return nil, err
} }
subRoots[cpu] = bucketTree.root subRoots[cpu] = bucketTree.root
dbgStatsPerBucket[cpu] = bucketTree.dbg
wg.Done() wg.Done()
}(i) }(i)
} }
@ -372,6 +372,11 @@ func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
invalids = append(invalids, excedents[i].pos) invalids = append(invalids, excedents[i].pos)
} }
} }
for i := 0; i < len(dbgStatsPerBucket); i++ {
t.dbg.add(dbgStatsPerBucket[i])
}
return invalids, nil return invalids, nil
} }
@ -390,6 +395,7 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
subRoots := make([][]byte, nCPU) subRoots := make([][]byte, nCPU)
invalidsInBucket := make([][]int, nCPU) invalidsInBucket := make([][]int, nCPU)
dbgStatsPerBucket := make([]*dbgStats, nCPU)
txs := make([]db.Tx, nCPU) txs := make([]db.Tx, nCPU)
var wg sync.WaitGroup var wg sync.WaitGroup
@ -409,7 +415,8 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
} }
bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels - l, bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels - l,
hashFunction: t.hashFunction, root: keysAtL[cpu]}
hashFunction: t.hashFunction, root: keysAtL[cpu],
emptyHash: t.emptyHash, dbg: newDbgStats()}
for j := 0; j < len(buckets[cpu]); j++ { for j := 0; j < len(buckets[cpu]); j++ {
if err = bucketTree.add(l, buckets[cpu][j].k, buckets[cpu][j].v); err != nil { if err = bucketTree.add(l, buckets[cpu][j].k, buckets[cpu][j].v); err != nil {
@ -417,6 +424,7 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
} }
} }
subRoots[cpu] = bucketTree.root subRoots[cpu] = bucketTree.root
dbgStatsPerBucket[cpu] = bucketTree.dbg
wg.Done() wg.Done()
}(i) }(i)
} }
@ -440,6 +448,10 @@ func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) {
invalids = append(invalids, invalidsInBucket[i]...) invalids = append(invalids, invalidsInBucket[i]...)
} }
for i := 0; i < len(dbgStatsPerBucket); i++ {
t.dbg.add(dbgStatsPerBucket[i])
}
return invalids, nil return invalids, nil
} }
@ -539,17 +551,18 @@ func (t *Tree) kvsToKeysValues(kvs []kv) ([][]byte, [][]byte) {
} }
*/ */
// buildTreeBottomUp splits the key-values into n Buckets (where n is the number
// buildTreeFromLeafs splits the key-values into n Buckets (where n is the number
// of CPUs), in parallel builds a subtree for each bucket, once all the subtrees // of CPUs), in parallel builds a subtree for each bucket, once all the subtrees
// are built, uses the subtrees roots as keys for a new tree, which as result // are built, uses the subtrees roots as keys for a new tree, which as result
// will have the complete Tree build from bottom to up, where until the // will have the complete Tree build from bottom to up, where until the
// log2(nCPU) level it has been computed in parallel. // log2(nCPU) level it has been computed in parallel.
func (t *Tree) buildTreeBottomUp(nCPU int, kvs []kv) ([]int, error) {
func (t *Tree) buildTreeFromLeafs(nCPU int, kvs []kv) ([]int, error) {
l := int(math.Log2(float64(nCPU))) l := int(math.Log2(float64(nCPU)))
buckets := splitInBuckets(kvs, nCPU) buckets := splitInBuckets(kvs, nCPU)
subRoots := make([][]byte, nCPU) subRoots := make([][]byte, nCPU)
invalidsInBucket := make([][]int, nCPU) invalidsInBucket := make([][]int, nCPU)
dbgStatsPerBucket := make([]*dbgStats, nCPU)
txs := make([]db.Tx, nCPU) txs := make([]db.Tx, nCPU)
var wg sync.WaitGroup var wg sync.WaitGroup
@ -567,14 +580,16 @@ func (t *Tree) buildTreeBottomUp(nCPU int, kvs []kv) ([]int, error) {
panic(err) // TODO panic(err) // TODO
} }
bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels, bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels,
hashFunction: t.hashFunction, root: t.emptyHash}
hashFunction: t.hashFunction, root: t.emptyHash,
emptyHash: t.emptyHash, dbg: newDbgStats()}
currInvalids, err := bucketTree.buildTreeBottomUpSingleThread(l, buckets[cpu])
currInvalids, err := bucketTree.buildTreeFromLeafsSingleThread(l, buckets[cpu])
if err != nil { if err != nil {
panic(err) // TODO panic(err) // TODO
} }
invalidsInBucket[cpu] = currInvalids invalidsInBucket[cpu] = currInvalids
subRoots[cpu] = bucketTree.root subRoots[cpu] = bucketTree.root
dbgStatsPerBucket[cpu] = bucketTree.dbg
wg.Done() wg.Done()
}(i) }(i)
} }
@ -598,12 +613,16 @@ func (t *Tree) buildTreeBottomUp(nCPU int, kvs []kv) ([]int, error) {
invalids = append(invalids, invalidsInBucket[i]...) invalids = append(invalids, invalidsInBucket[i]...)
} }
for i := 0; i < len(dbgStatsPerBucket); i++ {
t.dbg.add(dbgStatsPerBucket[i])
}
return invalids, err return invalids, err
} }
// buildTreeBottomUpSingleThread builds the tree with the given []kv from bottom
// buildTreeFromLeafsSingleThread builds the tree with the given []kv from bottom
// to the root // to the root
func (t *Tree) buildTreeBottomUpSingleThread(l int, kvsRaw []kv) ([]int, error) {
func (t *Tree) buildTreeFromLeafsSingleThread(l int, kvsRaw []kv) ([]int, error) {
// TODO check that log2(len(leafs)) < t.maxLevels, if not, maxLevels // TODO check that log2(len(leafs)) < t.maxLevels, if not, maxLevels
// would be reached and should return error // would be reached and should return error
if len(kvsRaw) == 0 { if len(kvsRaw) == 0 {
@ -611,23 +630,27 @@ func (t *Tree) buildTreeBottomUpSingleThread(l int, kvsRaw []kv) ([]int, error)
} }
vt := newVT(t.maxLevels, t.hashFunction) vt := newVT(t.maxLevels, t.hashFunction)
if t.dbg != nil {
vt.params.dbg = newDbgStats()
}
for i := 0; i < len(kvsRaw); i++ { for i := 0; i < len(kvsRaw); i++ {
if err := vt.add(l, kvsRaw[i].k, kvsRaw[i].v); err != nil { if err := vt.add(l, kvsRaw[i].k, kvsRaw[i].v); err != nil {
return nil, err return nil, err
} }
} }
pairs, err := vt.computeHashes() pairs, err := vt.computeHashes()
if err != nil { if err != nil {
return nil, err return nil, err
} }
// store pairs in db // store pairs in db
for i := 0; i < len(pairs); i++ { for i := 0; i < len(pairs); i++ {
if err := t.tx.Put(pairs[i][0], pairs[i][1]); err != nil {
if err := t.dbPut(pairs[i][0], pairs[i][1]); err != nil {
return nil, err return nil, err
} }
} }
t.dbg.add(vt.params.dbg)
// set tree.root from the virtual tree root // set tree.root from the virtual tree root
t.root = vt.root.h t.root = vt.root.h
@ -654,7 +677,7 @@ func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) {
return nil, err return nil, err
} }
// store k-v to db // store k-v to db
if err = t.tx.Put(k, v); err != nil {
if err = t.dbPut(k, v); err != nil {
return nil, err return nil, err
} }
rKs = append(rKs, k) rKs = append(rKs, k)
@ -727,9 +750,25 @@ func combineInKVSet(base, toAdd []kv) ([]kv, []int) {
return r, invalids return r, invalids
} }
// TODO WIP
// func loadDBTreeToVirtualTree() error {
// return nil
// loadVT loads a new virtual tree (vt) from the current Tree, which contains
// the same leafs.
// func (t *Tree) loadVT() (vt, error) {
// vt := newVT(t.maxLevels, t.hashFunction)
// vt.params.dbg = t.dbg
// err := t.Iterate(func(k, v []byte) {
// switch v[0] {
// case PrefixValueEmpty:
// case PrefixValueLeaf:
// leafK, leafV := ReadLeafValue(v)
// if err := vt.add(0, leafK, leafV); err != nil {
// panic(err)
// }
// case PrefixValueIntermediate:
// default:
// }
// })
//
// return vt, err
// } // }
// func computeSimpleAddCost(nLeafs int) int { // func computeSimpleAddCost(nLeafs int) int {
@ -738,7 +777,7 @@ func combineInKVSet(base, toAdd []kv) ([]kv, []int) {
// return nLvls * int(math.Pow(2, float64(nLvls))) // return nLvls * int(math.Pow(2, float64(nLvls)))
// } // }
// //
// func computeBottomUpAddCost(nLeafs int) int {
// func computeFromLeafsAddCost(nLeafs int) int {
// // 2^nLvls * 2 - 1 // // 2^nLvls * 2 - 1
// nLvls := int(math.Log2(float64(nLeafs))) // nLvls := int(math.Log2(float64(nLeafs)))
// return (int(math.Pow(2, float64(nLvls))) * 2) - 1 // return (int(math.Pow(2, float64(nLvls))) * 2) - 1

+ 120
- 15
addbatch_test.go

@ -14,7 +14,20 @@ import (
"github.com/iden3/go-merkletree/db/memory" "github.com/iden3/go-merkletree/db/memory"
) )
var debug = true
var debug = false
func printTestContext(prefix string, nLeafs int, hashName, dbName string) {
if debug {
fmt.Printf("%snCPU: %d, nLeafs: %d, hash: %s, db: %s\n",
prefix, runtime.NumCPU(), nLeafs, hashName, dbName)
}
}
func printRes(name string, duration time.Duration) {
if debug {
fmt.Printf("%s: %s \n", name, duration)
}
}
func debugTime(descr string, time1, time2 time.Duration) { func debugTime(descr string, time1, time2 time.Duration) {
if debug { if debug {
@ -69,6 +82,7 @@ func TestAddBatchCaseA(t *testing.T) {
tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
defer tree2.db.Close() defer tree2.db.Close()
tree2.dbgInit()
var keys, values [][]byte var keys, values [][]byte
for i := 0; i < nLeafs; i++ { for i := 0; i < nLeafs; i++ {
@ -81,7 +95,11 @@ func TestAddBatchCaseA(t *testing.T) {
indexes, err := tree2.AddBatch(keys, values) indexes, err := tree2.AddBatch(keys, values)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
time2 := time.Since(start) time2 := time.Since(start)
debugTime("CASE A, AddBatch", time1, time2)
if debug {
debugTime("CASE A, AddBatch", time1, time2)
printTestContext(" ", nLeafs, "Poseidon", "memory")
tree2.dbg.print(" ")
}
c.Check(len(indexes), qt.Equals, 0) c.Check(len(indexes), qt.Equals, 0)
// check that both trees roots are equal // check that both trees roots are equal
@ -133,7 +151,7 @@ func randomBytes(n int) []byte {
return b return b
} }
func TestBuildTreeBottomUpSingleThread(t *testing.T) {
func TestBuildTreeFromLeafsSingleThread(t *testing.T) {
c := qt.New(t) c := qt.New(t)
tree1, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b) tree1, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
@ -169,8 +187,8 @@ func TestBuildTreeBottomUpSingleThread(t *testing.T) {
tree2.tx, err = tree2.db.NewTx() tree2.tx, err = tree2.db.NewTx()
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
// indexes, err := tree2.buildTreeBottomUpSingleThread(kvs)
indexes, err := tree2.buildTreeBottomUp(4, kvs)
// indexes, err := tree2.buildTreeFromLeafsSingleThread(kvs)
indexes, err := tree2.buildTreeFromLeafs(4, kvs)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
// tree1.PrintGraphviz(nil) // tree1.PrintGraphviz(nil)
// tree2.PrintGraphviz(nil) // tree2.PrintGraphviz(nil)
@ -280,6 +298,7 @@ func TestAddBatchCaseB(t *testing.T) {
initialNLeafs := 99 // TMP TODO use const minLeafsThreshold-1 once ready initialNLeafs := 99 // TMP TODO use const minLeafsThreshold-1 once ready
tree1, tree2 := testInit(c, initialNLeafs) tree1, tree2 := testInit(c, initialNLeafs)
tree2.dbgInit()
start := time.Now() start := time.Now()
for i := initialNLeafs; i < nLeafs; i++ { for i := initialNLeafs; i < nLeafs; i++ {
@ -303,7 +322,11 @@ func TestAddBatchCaseB(t *testing.T) {
indexes, err := tree2.AddBatch(keys, values) indexes, err := tree2.AddBatch(keys, values)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
time2 := time.Since(start) time2 := time.Since(start)
debugTime("CASE B, AddBatch", time1, time2)
if debug {
debugTime("CASE B, AddBatch", time1, time2)
printTestContext(" ", nLeafs, "Poseidon", "memory")
tree2.dbg.print(" ")
}
c.Check(len(indexes), qt.Equals, 0) c.Check(len(indexes), qt.Equals, 0)
// check that both trees roots are equal // check that both trees roots are equal
@ -500,6 +523,7 @@ func TestAddBatchCaseC(t *testing.T) {
initialNLeafs := 101 // TMP TODO use const minLeafsThreshold+1 once ready initialNLeafs := 101 // TMP TODO use const minLeafsThreshold+1 once ready
tree1, tree2 := testInit(c, initialNLeafs) tree1, tree2 := testInit(c, initialNLeafs)
tree2.dbgInit()
start := time.Now() start := time.Now()
for i := initialNLeafs; i < nLeafs; i++ { for i := initialNLeafs; i < nLeafs; i++ {
@ -523,7 +547,11 @@ func TestAddBatchCaseC(t *testing.T) {
indexes, err := tree2.AddBatch(keys, values) indexes, err := tree2.AddBatch(keys, values)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
time2 := time.Since(start) time2 := time.Since(start)
debugTime("CASE C, AddBatch", time1, time2)
if debug {
debugTime("CASE C, AddBatch", time1, time2)
printTestContext(" ", nLeafs, "Poseidon", "memory")
tree2.dbg.print(" ")
}
c.Check(len(indexes), qt.Equals, 0) c.Check(len(indexes), qt.Equals, 0)
// check that both trees roots are equal // check that both trees roots are equal
@ -537,6 +565,7 @@ func TestAddBatchCaseD(t *testing.T) {
initialNLeafs := 900 initialNLeafs := 900
tree1, tree2 := testInit(c, initialNLeafs) tree1, tree2 := testInit(c, initialNLeafs)
tree2.dbgInit()
start := time.Now() start := time.Now()
for i := initialNLeafs; i < nLeafs; i++ { for i := initialNLeafs; i < nLeafs; i++ {
@ -560,7 +589,11 @@ func TestAddBatchCaseD(t *testing.T) {
indexes, err := tree2.AddBatch(keys, values) indexes, err := tree2.AddBatch(keys, values)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
time2 := time.Since(start) time2 := time.Since(start)
debugTime("CASE D, AddBatch", time1, time2)
if debug {
debugTime("CASE D, AddBatch", time1, time2)
printTestContext(" ", nLeafs, "Poseidon", "memory")
tree2.dbg.print(" ")
}
c.Check(len(indexes), qt.Equals, 0) c.Check(len(indexes), qt.Equals, 0)
// check that both trees roots are equal // check that both trees roots are equal
@ -588,6 +621,7 @@ func TestAddBatchCaseE(t *testing.T) {
tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
defer tree2.db.Close() defer tree2.db.Close()
tree2.dbgInit()
var keys, values [][]byte var keys, values [][]byte
// add the initial leafs to fill a bit the tree before calling the // add the initial leafs to fill a bit the tree before calling the
@ -617,7 +651,11 @@ func TestAddBatchCaseE(t *testing.T) {
indexes, err := tree2.AddBatch(keys, values) indexes, err := tree2.AddBatch(keys, values)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
time2 := time.Since(start) time2 := time.Since(start)
debugTime("CASE E, AddBatch", time1, time2)
if debug {
debugTime("CASE E, AddBatch", time1, time2)
printTestContext(" ", nLeafs, "Poseidon", "memory")
tree2.dbg.print(" ")
}
c.Check(len(indexes), qt.Equals, 0) c.Check(len(indexes), qt.Equals, 0)
// check that both trees roots are equal // check that both trees roots are equal
@ -636,8 +674,7 @@ func TestFlp2(t *testing.T) {
func TestAddBatchBench(t *testing.T) { func TestAddBatchBench(t *testing.T) {
nLeafs := 50_000 nLeafs := 50_000
fmt.Printf("TestAddBatchBench\n nCPU: %d, nLeafs: %d, hash: Blake2b, db: leveldb\n",
runtime.NumCPU(), nLeafs)
printTestContext("TestAddBatchBench: ", nLeafs, "Blake2b", "leveldb")
// prepare inputs // prepare inputs
var ks, vs [][]byte var ks, vs [][]byte
@ -662,12 +699,16 @@ func benchAdd(t *testing.T, ks, vs [][]byte) {
tree, err := NewTree(storage, 140, HashFunctionBlake2b) tree, err := NewTree(storage, 140, HashFunctionBlake2b)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
if debug {
tree.dbgInit()
}
start := time.Now() start := time.Now()
for i := 0; i < len(ks); i++ { for i := 0; i < len(ks); i++ {
err = tree.Add(ks[i], vs[i]) err = tree.Add(ks[i], vs[i])
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
} }
printRes(" Add loop", time.Since(start)) printRes(" Add loop", time.Since(start))
tree.dbg.print(" ")
} }
func benchAddBatch(t *testing.T, ks, vs [][]byte) { func benchAddBatch(t *testing.T, ks, vs [][]byte) {
@ -679,16 +720,80 @@ func benchAddBatch(t *testing.T, ks, vs [][]byte) {
tree, err := NewTree(storage, 140, HashFunctionBlake2b) tree, err := NewTree(storage, 140, HashFunctionBlake2b)
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
if debug {
tree.dbgInit()
}
start := time.Now() start := time.Now()
invalids, err := tree.AddBatch(ks, vs) invalids, err := tree.AddBatch(ks, vs)
printRes(" AddBatch", time.Since(start)) printRes(" AddBatch", time.Since(start))
c.Assert(err, qt.IsNil) c.Assert(err, qt.IsNil)
c.Assert(len(invalids), qt.Equals, 0) c.Assert(len(invalids), qt.Equals, 0)
tree.dbg.print(" ")
} }
func printRes(name string, duration time.Duration) {
func TestDbgStats(t *testing.T) {
c := qt.New(t)
nLeafs := 10_000
// prepare inputs
var ks, vs [][]byte
for i := 0; i < nLeafs; i++ {
k := randomBytes(32)
v := randomBytes(32)
ks = append(ks, k)
vs = append(vs, v)
}
// 1
tree1, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b)
c.Assert(err, qt.IsNil)
defer tree1.db.Close()
tree1.dbgInit()
for i := 0; i < len(ks); i++ {
err = tree1.Add(ks[i], vs[i])
c.Assert(err, qt.IsNil)
}
// 2
tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b)
c.Assert(err, qt.IsNil)
defer tree2.db.Close()
tree2.dbgInit()
invalids, err := tree2.AddBatch(ks, vs)
c.Assert(err, qt.IsNil)
c.Assert(len(invalids), qt.Equals, 0)
// 3
tree3, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionBlake2b)
c.Assert(err, qt.IsNil)
defer tree3.db.Close()
tree3.dbgInit()
// add few key-values
// invalids, err = tree3.AddBatch(ks[:], vs[:])
invalids, err = tree3.AddBatch(ks[:1000], vs[:1000])
c.Assert(err, qt.IsNil)
c.Assert(len(invalids), qt.Equals, 0)
// add the rest of key-values
invalids, err = tree3.AddBatch(ks[1000:], vs[1000:])
c.Assert(err, qt.IsNil)
c.Assert(len(invalids), qt.Equals, 0)
c.Check(tree2.Root(), qt.DeepEquals, tree1.Root())
c.Check(tree3.Root(), qt.DeepEquals, tree1.Root())
if debug { if debug {
fmt.Printf(" %s: %s \n", name, duration)
fmt.Println("TestDbgStats")
tree1.dbg.print(" add in loop ")
tree2.dbg.print(" addbatch caseA ")
tree3.dbg.print(" addbatch caseD ")
} }
} }
@ -714,10 +819,10 @@ func printRes(name string, duration time.Duration) {
// func TestComputeCosts(t *testing.T) { // func TestComputeCosts(t *testing.T) {
// fmt.Println(computeSimpleAddCost(10)) // fmt.Println(computeSimpleAddCost(10))
// fmt.Println(computeBottomUpAddCost(10))
// fmt.Println(computeFromLeafsAddCost(10))
// //
// fmt.Println(computeSimpleAddCost(1024)) // fmt.Println(computeSimpleAddCost(1024))
// fmt.Println(computeBottomUpAddCost(1024))
// fmt.Println(computeFromLeafsAddCost(1024))
// } // }
// TODO test tree with nLeafs > minLeafsThreshold, but that at level L, there is // TODO test tree with nLeafs > minLeafsThreshold, but that at level L, there is

+ 67
- 0
dbg.go

@ -0,0 +1,67 @@
package arbo
import "fmt"
// dbgStats is for debug purposes
type dbgStats struct {
hash int
dbGet int
dbPut int
}
func (t *Tree) dbgInit() {
t.dbg = newDbgStats()
}
func newDbgStats() *dbgStats {
return &dbgStats{
hash: 0,
dbGet: 0,
dbPut: 0,
}
}
func (d *dbgStats) incHash() {
if d == nil {
return
}
d.hash++
}
func (d *dbgStats) incDbGet() {
if d == nil {
return
}
d.dbGet++
}
func (d *dbgStats) incDbPut() {
if d == nil {
return
}
d.dbPut++
}
func (d *dbgStats) add(d2 *dbgStats) {
if d == nil || d2 == nil {
return
}
d.hash += d2.hash
d.dbGet += d2.dbGet
d.dbPut += d2.dbPut
}
func (d *dbgStats) print(prefix string) {
if d == nil {
return
}
fmt.Printf("%sdbgStats(hash: %s, dbGet: %s, dbPut: %s)\n",
prefix, formatK(d.hash), formatK(d.dbGet), formatK(d.dbPut))
}
func formatK(v int) string {
if v/1000 > 0 {
return fmt.Sprintf("%.3fk", float64(v)/1000) //nolint:gomnd
}
return fmt.Sprintf("%d", v)
}

+ 41
- 13
tree.go

@ -57,7 +57,11 @@ type Tree struct {
root []byte root []byte
hashFunction HashFunction hashFunction HashFunction
emptyHash []byte
// TODO in the methods that use it, check if emptyHash param is len>0
// (check if it has been initialized)
emptyHash []byte
dbg *dbgStats
} }
// NewTree returns a new Tree, if there is a Tree still in the given storage, it // NewTree returns a new Tree, if there is a Tree still in the given storage, it
@ -76,7 +80,7 @@ func NewTree(storage db.Storage, maxLevels int, hash HashFunction) (*Tree, error
return nil, err return nil, err
} }
t.root = t.emptyHash t.root = t.emptyHash
if err = t.tx.Put(dbKeyRoot, t.root); err != nil {
if err = t.dbPut(dbKeyRoot, t.root); err != nil {
return nil, err return nil, err
} }
if err = t.setNLeafs(0); err != nil { if err = t.setNLeafs(0); err != nil {
@ -132,7 +136,7 @@ func (t *Tree) Add(k, v []byte) error {
return err return err
} }
// store root to db // store root to db
if err := t.tx.Put(dbKeyRoot, t.root); err != nil {
if err := t.dbPut(dbKeyRoot, t.root); err != nil {
return err return err
} }
// update nLeafs // update nLeafs
@ -156,12 +160,12 @@ func (t *Tree) add(fromLvl int, k, v []byte) error {
return err return err
} }
leafKey, leafValue, err := newLeafValue(t.hashFunction, k, v)
leafKey, leafValue, err := t.newLeafValue(k, v)
if err != nil { if err != nil {
return err return err
} }
if err := t.tx.Put(leafKey, leafValue); err != nil {
if err := t.dbPut(leafKey, leafValue); err != nil {
return err return err
} }
@ -186,6 +190,7 @@ func (t *Tree) down(newKey, currKey []byte, siblings [][]byte,
if currLvl > t.maxLevels-1 { if currLvl > t.maxLevels-1 {
return nil, nil, nil, fmt.Errorf("max level") return nil, nil, nil, fmt.Errorf("max level")
} }
var err error var err error
var currValue []byte var currValue []byte
if bytes.Equal(currKey, t.emptyHash) { if bytes.Equal(currKey, t.emptyHash) {
@ -205,6 +210,9 @@ func (t *Tree) down(newKey, currKey []byte, siblings [][]byte,
panic("should not be reached, as the 'if' above should avoid reaching this point") // TMP panic("should not be reached, as the 'if' above should avoid reaching this point") // TMP
case PrefixValueLeaf: // leaf case PrefixValueLeaf: // leaf
if bytes.Equal(newKey, currKey) { if bytes.Equal(newKey, currKey) {
// TODO move this error msg to const & add test that
// checks that adding a repeated key this error is
// returned
return nil, nil, nil, fmt.Errorf("key already exists") return nil, nil, nil, fmt.Errorf("key already exists")
} }
@ -275,18 +283,18 @@ func (t *Tree) up(key []byte, siblings [][]byte, path []bool, currLvl, toLvl int
var k, v []byte var k, v []byte
var err error var err error
if path[currLvl+toLvl] { if path[currLvl+toLvl] {
k, v, err = newIntermediate(t.hashFunction, siblings[currLvl], key)
k, v, err = t.newIntermediate(siblings[currLvl], key)
if err != nil { if err != nil {
return nil, err return nil, err
} }
} else { } else {
k, v, err = newIntermediate(t.hashFunction, key, siblings[currLvl])
k, v, err = t.newIntermediate(key, siblings[currLvl])
if err != nil { if err != nil {
return nil, err return nil, err
} }
} }
// store k-v to db // store k-v to db
if err = t.tx.Put(k, v); err != nil {
if err = t.dbPut(k, v); err != nil {
return nil, err return nil, err
} }
@ -298,6 +306,11 @@ func (t *Tree) up(key []byte, siblings [][]byte, path []bool, currLvl, toLvl int
return t.up(k, siblings, path, currLvl-1, toLvl) return t.up(k, siblings, path, currLvl-1, toLvl)
} }
func (t *Tree) newLeafValue(k, v []byte) ([]byte, []byte, error) {
t.dbg.incHash()
return newLeafValue(t.hashFunction, k, v)
}
func newLeafValue(hashFunc HashFunction, k, v []byte) ([]byte, []byte, error) { func newLeafValue(hashFunc HashFunction, k, v []byte) ([]byte, []byte, error) {
leafKey, err := hashFunc.Hash(k, v, []byte{1}) leafKey, err := hashFunc.Hash(k, v, []byte{1})
if err != nil { if err != nil {
@ -326,6 +339,11 @@ func ReadLeafValue(b []byte) ([]byte, []byte) {
return k, v return k, v
} }
func (t *Tree) newIntermediate(l, r []byte) ([]byte, []byte, error) {
t.dbg.incHash()
return newIntermediate(t.hashFunction, l, r)
}
func newIntermediate(hashFunc HashFunction, l, r []byte) ([]byte, []byte, error) { func newIntermediate(hashFunc HashFunction, l, r []byte) ([]byte, []byte, error) {
b := make([]byte, PrefixValueLen+hashFunc.Len()*2) b := make([]byte, PrefixValueLen+hashFunc.Len()*2)
b[0] = 2 b[0] = 2
@ -392,12 +410,12 @@ func (t *Tree) Update(k, v []byte) error {
return fmt.Errorf("key %s does not exist", hex.EncodeToString(k)) return fmt.Errorf("key %s does not exist", hex.EncodeToString(k))
} }
leafKey, leafValue, err := newLeafValue(t.hashFunction, k, v)
leafKey, leafValue, err := t.newLeafValue(k, v)
if err != nil { if err != nil {
return err return err
} }
if err := t.tx.Put(leafKey, leafValue); err != nil {
if err := t.dbPut(leafKey, leafValue); err != nil {
return err return err
} }
@ -413,7 +431,7 @@ func (t *Tree) Update(k, v []byte) error {
t.root = root t.root = root
// store root to db // store root to db
if err := t.tx.Put(dbKeyRoot, t.root); err != nil {
if err := t.dbPut(dbKeyRoot, t.root); err != nil {
return err return err
} }
return t.tx.Commit() return t.tx.Commit()
@ -535,7 +553,8 @@ func (t *Tree) Get(k []byte) ([]byte, []byte, error) {
} }
leafK, leafV := ReadLeafValue(value) leafK, leafV := ReadLeafValue(value)
if !bytes.Equal(k, leafK) { if !bytes.Equal(k, leafK) {
panic(fmt.Errorf("%s != %s", BytesToBigInt(k), BytesToBigInt(leafK)))
panic(fmt.Errorf("Tree.Get error: keys doesn't match, %s != %s",
BytesToBigInt(k), BytesToBigInt(leafK)))
} }
return leafK, leafV, nil return leafK, leafV, nil
@ -577,11 +596,20 @@ func CheckProof(hashFunc HashFunction, k, v, root, packedSiblings []byte) (bool,
return false, nil return false, nil
} }
func (t *Tree) dbPut(k, v []byte) error {
if t.tx == nil {
return fmt.Errorf("dbPut error: no db Tx")
}
t.dbg.incDbPut()
return t.tx.Put(k, v)
}
func (t *Tree) dbGet(k []byte) ([]byte, error) { func (t *Tree) dbGet(k []byte) ([]byte, error) {
// if key is empty, return empty as value // if key is empty, return empty as value
if bytes.Equal(k, t.emptyHash) { if bytes.Equal(k, t.emptyHash) {
return t.emptyHash, nil return t.emptyHash, nil
} }
t.dbg.incDbGet()
v, err := t.db.Get(k) v, err := t.db.Get(k)
if err == nil { if err == nil {
@ -609,7 +637,7 @@ func (t *Tree) incNLeafs(nLeafs int) error {
func (t *Tree) setNLeafs(nLeafs int) error { func (t *Tree) setNLeafs(nLeafs int) error {
b := make([]byte, 8) b := make([]byte, 8)
binary.LittleEndian.PutUint64(b, uint64(nLeafs)) binary.LittleEndian.PutUint64(b, uint64(nLeafs))
if err := t.tx.Put(dbKeyNLeafs, b); err != nil {
if err := t.dbPut(dbKeyNLeafs, b); err != nil {
return err return err
} }
return nil return nil

+ 14
- 1
vt.go

@ -24,11 +24,13 @@ type params struct {
maxLevels int maxLevels int
hashFunction HashFunction hashFunction HashFunction
emptyHash []byte emptyHash []byte
dbg *dbgStats
} }
// vt stands for virtual tree. It's a tree that does not have any computed hash // vt stands for virtual tree. It's a tree that does not have any computed hash
// while placing the leafs. Once all the leafs are placed, it computes all the // while placing the leafs. Once all the leafs are placed, it computes all the
// hashes. In this way, each node hash is only computed one time.
// hashes. In this way, each node hash is only computed one time (at the end)
// and the tree is computed in memory.
type vt struct { type vt struct {
root *node root *node
params *params params *params
@ -45,6 +47,15 @@ func newVT(maxLevels int, hash HashFunction) vt {
} }
} }
// WIP
// func (t *vt) addBatch(fromLvl int, k, v []byte) error {
// // parallelize adding leafs in the virtual tree
// nCPU := flp2(runtime.NumCPU())
// l := int(math.Log2(float64(nCPU)))
//
// return nil
// }
func (t *vt) add(fromLvl int, k, v []byte) error { func (t *vt) add(fromLvl int, k, v []byte) error {
leaf := newLeafNode(t.params, k, v) leaf := newLeafNode(t.params, k, v)
if t.root == nil { if t.root == nil {
@ -205,6 +216,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
t := n.typ() t := n.typ()
switch t { switch t {
case vtLeaf: case vtLeaf:
p.dbg.incHash()
leafKey, leafValue, err := newLeafValue(p.hashFunction, n.k, n.v) leafKey, leafValue, err := newLeafValue(p.hashFunction, n.k, n.v)
if err != nil { if err != nil {
return pairs, err return pairs, err
@ -235,6 +247,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
} }
// once the sub nodes are computed, can compute the current node // once the sub nodes are computed, can compute the current node
// hash // hash
p.dbg.incHash()
k, v, err := newIntermediate(p.hashFunction, n.l.h, n.r.h) k, v, err := newIntermediate(p.hashFunction, n.l.h, n.r.h)
if err != nil { if err != nil {
return nil, err return nil, err

Loading…
Cancel
Save