AddBatch implement using VirtualTree.AddBatch

Reduces the num of hashes, dbGet and dbPut on cases D & E.

Current benchmarks:
```
CASE A, AddBatch was 9.101855 times faster than without AddBatch
	nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory
	dbgStats(hash: 2.047k, dbGet: 1, dbPut: 2.049k)
CASE B, AddBatch was 8.244078 times faster than without AddBatch
	nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory
	dbgStats(hash: 2.047k, dbGet: 198, dbPut: 2.049k)
CASE C, AddBatch was 8.511131 times faster than without AddBatch
	nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory
	dbgStats(hash: 2.047k, dbGet: 202, dbPut: 2.049k)
CASE D, AddBatch was 8.565696 times faster than without AddBatch
	nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory
	dbgStats(hash: 8.191k, dbGet: 1.800k, dbPut: 8.193k)
CASE E, AddBatch was 8.970056 times faster than without AddBatch
	nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory
	dbgStats(hash: 10.409k, dbGet: 2.668k, dbPut: 10.861k)
TestAddBatchBench: nCPU: 4, nLeafs: 50000, hash: Blake2b, db: leveldb
	Add loop:	10.616145533s
		dbgStats(hash: 824.884k, dbGet: 788.975k, dbPut: 924.884k)
	AddBatch:	942.34815ms
		dbgStats(hash: 122.051k, dbGet: 1, dbPut: 122.053k)
TestDbgStats
	add in loop    dbgStats(hash: 141.911k, dbGet: 134.650k, dbPut: 161.911k)
	addbatch caseA dbgStats(hash: 24.522k, dbGet: 1, dbPut: 24.524k)
	addbatch caseD dbgStats(hash: 26.985k, dbGet: 2.465k, dbPut: 26.989k)
```
This commit is contained in:
2021-05-24 17:05:03 +02:00
parent d09bd605bb
commit f31ed1d2d1
4 changed files with 473 additions and 64 deletions

207
vt.go
View File

@@ -68,41 +68,39 @@ func newVT(maxLevels int, hash HashFunction) vt {
}
}
func (t *vt) addBatch(ks, vs [][]byte) error {
func (t *vt) addBatch(ks, vs [][]byte) ([]int, error) {
// parallelize adding leafs in the virtual tree
nCPU := flp2(runtime.NumCPU())
if nCPU == 1 || len(ks) < nCPU {
// var invalids []int
var invalids []int
for i := 0; i < len(ks); i++ {
if err := t.add(0, ks[i], vs[i]); err != nil {
// invalids = append(invalids, i)
fmt.Println(err) // TODO WIP
invalids = append(invalids, i)
}
}
return nil // TODO invalids
return invalids, nil
}
l := int(math.Log2(float64(nCPU)))
kvs, err := t.params.keysValuesToKvs(ks, vs)
if err != nil {
return err
return nil, err
}
buckets := splitInBuckets(kvs, nCPU)
nodesAtL, err := t.getNodesAtLevel(l)
if err != nil {
return err
return nil, err
}
// fmt.Println("nodesatL pre-E", len(nodesAtL))
if len(nodesAtL) != nCPU {
if len(nodesAtL) != nCPU && t.root != nil {
// CASE E: add one key at each bucket, and then do CASE D
for i := 0; i < len(buckets); i++ {
// add one leaf of the bucket, if there is an error when
// adding the k-v, try to add the next one of the bucket
// (until one is added)
var inserted int
inserted := -1
for j := 0; j < len(buckets[i]); j++ {
if err := t.add(0, buckets[i][j].k, buckets[i][j].v); err == nil {
inserted = j
@@ -111,13 +109,20 @@ func (t *vt) addBatch(ks, vs [][]byte) error {
}
// remove the inserted element from buckets[i]
buckets[i] = append(buckets[i][:inserted], buckets[i][inserted+1:]...)
// fmt.Println("rm-ins", inserted)
if inserted != -1 {
buckets[i] = append(buckets[i][:inserted], buckets[i][inserted+1:]...)
}
}
nodesAtL, err = t.getNodesAtLevel(l)
if err != nil {
return err
return nil, err
}
}
if len(nodesAtL) != nCPU {
fmt.Println("ASDF")
panic("should not happen")
}
subRoots := make([]*node, nCPU)
invalidsInBucket := make([][]int, nCPU)
@@ -141,49 +146,64 @@ func (t *vt) addBatch(ks, vs [][]byte) error {
}
wg.Wait()
var invalids []int
for i := 0; i < len(invalidsInBucket); i++ {
invalids = append(invalids, invalidsInBucket[i]...)
}
newRootNode, err := upFromNodes(subRoots)
if err != nil {
return err
return nil, err
}
t.root = newRootNode
return nil
return invalids, nil
}
func (t *vt) getNodesAtLevel(l int) ([]*node, error) {
if t.root == nil {
return nil, nil
var r []*node
nChilds := int(math.Pow(2, float64(l))) //nolint:gomnd
for i := 0; i < nChilds; i++ {
r = append(r, nil)
}
return r, nil
}
return t.root.getNodesAtLevel(0, l)
}
func (n *node) getNodesAtLevel(currLvl, l int) ([]*node, error) {
var nodes []*node
if n == nil {
var r []*node
nChilds := int(math.Pow(2, float64(l-currLvl))) //nolint:gomnd
for i := 0; i < nChilds; i++ {
r = append(r, nil)
}
return r, nil
}
typ := n.typ()
if currLvl == l && typ != vtEmpty {
nodes = append(nodes, n)
return nodes, nil
return []*node{n}, nil
}
if currLvl >= l {
panic("should not reach this point") // TODO TMP
// return nil, nil
}
if n.l != nil {
nodesL, err := n.l.getNodesAtLevel(currLvl+1, l)
if err != nil {
return nil, err
}
nodes = append(nodes, nodesL...)
var nodes []*node
nodesL, err := n.l.getNodesAtLevel(currLvl+1, l)
if err != nil {
return nil, err
}
if n.r != nil {
nodesR, err := n.r.getNodesAtLevel(currLvl+1, l)
if err != nil {
return nil, err
}
nodes = append(nodes, nodesR...)
nodes = append(nodes, nodesL...)
nodesR, err := n.r.getNodesAtLevel(currLvl+1, l)
if err != nil {
return nil, err
}
nodes = append(nodes, nodesR...)
return nodes, nil
}
@@ -194,9 +214,11 @@ func upFromNodes(ns []*node) (*node, error) {
var res []*node
for i := 0; i < len(ns); i += 2 {
if ns[i].typ() == vtEmpty && ns[i+1].typ() == vtEmpty {
// if ns[i].typ() == vtEmpty && ns[i+1].typ() == vtEmpty {
if ns[i] == nil && ns[i+1] == nil {
// when both sub nodes are empty, the node is also empty
res = append(res, ns[i]) // empty node
continue
}
n := &node{
l: ns[i],
@@ -207,6 +229,56 @@ func upFromNodes(ns []*node) (*node, error) {
return upFromNodes(res)
}
// func upFromNodesComputingHashes(p *params, ns []*node, pairs [][2][]byte) (
// [][2][]byte, *node, error) {
// if len(ns) == 1 {
// return pairs, ns[0], nil
// }
//
// var res []*node
// for i := 0; i < len(ns); i += 2 {
// if ns[i] == nil && ns[i+1] == nil {
// // when both sub nodes are empty, the node is also empty
// res = append(res, ns[i]) // empty node
// continue
// }
// n := &node{
// l: ns[i],
// r: ns[i+1],
// }
//
// if n.l == nil {
// n.l = &node{
// h: p.emptyHash,
// }
// }
// if n.r == nil {
// n.r = &node{
// h: p.emptyHash,
// }
// }
// if n.l.typ() == vtEmpty && n.r.typ() == vtLeaf {
// n = n.r
// }
// if n.r.typ() == vtEmpty && n.l.typ() == vtLeaf {
// n = n.l
// }
//
// // once the sub nodes are computed, can compute the current node
// // hash
// p.dbg.incHash()
// k, v, err := newIntermediate(p.hashFunction, n.l.h, n.r.h)
// if err != nil {
// return nil, nil, err
// }
// n.h = k
// kv := [2][]byte{k, v}
// pairs = append(pairs, kv)
// res = append(res, n)
// }
// return upFromNodesComputingHashes(p, res, pairs)
// }
func (t *vt) add(fromLvl int, k, v []byte) error {
leaf := newLeafNode(t.params, k, v)
if t.root == nil {
@@ -224,13 +296,63 @@ func (t *vt) add(fromLvl int, k, v []byte) error {
// computeHashes should be called after all the vt.add is used, once all the
// leafs are in the tree
func (t *vt) computeHashes() ([][2][]byte, error) {
var pairs [][2][]byte
var err error
// TODO parallelize computeHashes
pairs, err = t.root.computeHashes(t.params, pairs)
nCPU := flp2(runtime.NumCPU())
l := int(math.Log2(float64(nCPU)))
nodesAtL, err := t.getNodesAtLevel(l)
if err != nil {
return pairs, err
return nil, err
}
subRoots := make([]*node, nCPU)
bucketPairs := make([][][2][]byte, nCPU)
dbgStatsPerBucket := make([]*dbgStats, nCPU)
var wg sync.WaitGroup
wg.Add(nCPU)
for i := 0; i < nCPU; i++ {
go func(cpu int) {
bucketVT := newVT(t.params.maxLevels-l, t.params.hashFunction)
bucketVT.params.dbg = newDbgStats()
bucketVT.root = nodesAtL[cpu]
bucketPairs[cpu], err = bucketVT.root.computeHashes(l,
t.params.maxLevels, bucketVT.params, bucketPairs[cpu])
if err != nil {
// TODO WIP
fmt.Println("TODO ERR, err:", err)
panic(err)
}
subRoots[cpu] = bucketVT.root
dbgStatsPerBucket[cpu] = bucketVT.params.dbg
wg.Done()
}(i)
}
wg.Wait()
for i := 0; i < len(dbgStatsPerBucket); i++ {
t.params.dbg.add(dbgStatsPerBucket[i])
}
var pairs [][2][]byte
for i := 0; i < len(bucketPairs); i++ {
pairs = append(pairs, bucketPairs[i]...)
}
nodesAtL, err = t.getNodesAtLevel(l)
if err != nil {
return nil, err
}
for i := 0; i < len(nodesAtL); i++ {
nodesAtL = subRoots
}
pairs, err = t.root.computeHashes(0, l, t.params, pairs)
if err != nil {
return nil, err
}
return pairs, nil
}
@@ -363,7 +485,12 @@ func (n *node) downUntilDivergence(p *params, currLvl int, oldLeaf, newLeaf *nod
}
// returns an array of key-values to store in the db
func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error) {
func (n *node) computeHashes(currLvl, maxLvl int, p *params, pairs [][2][]byte) (
[][2][]byte, error) {
if n == nil || currLvl >= maxLvl {
// no need to compute any hash
return pairs, nil
}
if pairs == nil {
pairs = [][2][]byte{}
}
@@ -381,7 +508,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
pairs = append(pairs, kv)
case vtMid:
if n.l != nil {
pairs, err = n.l.computeHashes(p, pairs)
pairs, err = n.l.computeHashes(currLvl+1, maxLvl, p, pairs)
if err != nil {
return pairs, err
}
@@ -391,7 +518,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
}
}
if n.r != nil {
pairs, err = n.r.computeHashes(p, pairs)
pairs, err = n.r.computeHashes(currLvl+1, maxLvl, p, pairs)
if err != nil {
return pairs, err
}
@@ -410,7 +537,9 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
n.h = k
kv := [2][]byte{k, v}
pairs = append(pairs, kv)
case vtEmpty:
default:
fmt.Println("n.computeHashes type no match", t)
return nil, fmt.Errorf("ERR TMP") // TODO
}