mirror of
https://github.com/arnaucube/arbo.git
synced 2026-01-15 01:41:28 +01:00
AddBatch implement using VirtualTree.AddBatch
Reduces the num of hashes, dbGet and dbPut on cases D & E. Current benchmarks: ``` CASE A, AddBatch was 9.101855 times faster than without AddBatch nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory dbgStats(hash: 2.047k, dbGet: 1, dbPut: 2.049k) CASE B, AddBatch was 8.244078 times faster than without AddBatch nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory dbgStats(hash: 2.047k, dbGet: 198, dbPut: 2.049k) CASE C, AddBatch was 8.511131 times faster than without AddBatch nCPU: 4, nLeafs: 1024, hash: Poseidon, db: memory dbgStats(hash: 2.047k, dbGet: 202, dbPut: 2.049k) CASE D, AddBatch was 8.565696 times faster than without AddBatch nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory dbgStats(hash: 8.191k, dbGet: 1.800k, dbPut: 8.193k) CASE E, AddBatch was 8.970056 times faster than without AddBatch nCPU: 4, nLeafs: 4096, hash: Poseidon, db: memory dbgStats(hash: 10.409k, dbGet: 2.668k, dbPut: 10.861k) TestAddBatchBench: nCPU: 4, nLeafs: 50000, hash: Blake2b, db: leveldb Add loop: 10.616145533s dbgStats(hash: 824.884k, dbGet: 788.975k, dbPut: 924.884k) AddBatch: 942.34815ms dbgStats(hash: 122.051k, dbGet: 1, dbPut: 122.053k) TestDbgStats add in loop dbgStats(hash: 141.911k, dbGet: 134.650k, dbPut: 161.911k) addbatch caseA dbgStats(hash: 24.522k, dbGet: 1, dbPut: 24.524k) addbatch caseD dbgStats(hash: 26.985k, dbGet: 2.465k, dbPut: 26.989k) ```
This commit is contained in:
207
vt.go
207
vt.go
@@ -68,41 +68,39 @@ func newVT(maxLevels int, hash HashFunction) vt {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *vt) addBatch(ks, vs [][]byte) error {
|
||||
func (t *vt) addBatch(ks, vs [][]byte) ([]int, error) {
|
||||
// parallelize adding leafs in the virtual tree
|
||||
nCPU := flp2(runtime.NumCPU())
|
||||
if nCPU == 1 || len(ks) < nCPU {
|
||||
// var invalids []int
|
||||
var invalids []int
|
||||
for i := 0; i < len(ks); i++ {
|
||||
if err := t.add(0, ks[i], vs[i]); err != nil {
|
||||
// invalids = append(invalids, i)
|
||||
fmt.Println(err) // TODO WIP
|
||||
invalids = append(invalids, i)
|
||||
}
|
||||
}
|
||||
return nil // TODO invalids
|
||||
return invalids, nil
|
||||
}
|
||||
|
||||
l := int(math.Log2(float64(nCPU)))
|
||||
|
||||
kvs, err := t.params.keysValuesToKvs(ks, vs)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
|
||||
buckets := splitInBuckets(kvs, nCPU)
|
||||
|
||||
nodesAtL, err := t.getNodesAtLevel(l)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
// fmt.Println("nodesatL pre-E", len(nodesAtL))
|
||||
if len(nodesAtL) != nCPU {
|
||||
if len(nodesAtL) != nCPU && t.root != nil {
|
||||
// CASE E: add one key at each bucket, and then do CASE D
|
||||
for i := 0; i < len(buckets); i++ {
|
||||
// add one leaf of the bucket, if there is an error when
|
||||
// adding the k-v, try to add the next one of the bucket
|
||||
// (until one is added)
|
||||
var inserted int
|
||||
inserted := -1
|
||||
for j := 0; j < len(buckets[i]); j++ {
|
||||
if err := t.add(0, buckets[i][j].k, buckets[i][j].v); err == nil {
|
||||
inserted = j
|
||||
@@ -111,13 +109,20 @@ func (t *vt) addBatch(ks, vs [][]byte) error {
|
||||
}
|
||||
|
||||
// remove the inserted element from buckets[i]
|
||||
buckets[i] = append(buckets[i][:inserted], buckets[i][inserted+1:]...)
|
||||
// fmt.Println("rm-ins", inserted)
|
||||
if inserted != -1 {
|
||||
buckets[i] = append(buckets[i][:inserted], buckets[i][inserted+1:]...)
|
||||
}
|
||||
}
|
||||
nodesAtL, err = t.getNodesAtLevel(l)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
if len(nodesAtL) != nCPU {
|
||||
fmt.Println("ASDF")
|
||||
panic("should not happen")
|
||||
}
|
||||
|
||||
subRoots := make([]*node, nCPU)
|
||||
invalidsInBucket := make([][]int, nCPU)
|
||||
@@ -141,49 +146,64 @@ func (t *vt) addBatch(ks, vs [][]byte) error {
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
var invalids []int
|
||||
for i := 0; i < len(invalidsInBucket); i++ {
|
||||
invalids = append(invalids, invalidsInBucket[i]...)
|
||||
}
|
||||
|
||||
newRootNode, err := upFromNodes(subRoots)
|
||||
if err != nil {
|
||||
return err
|
||||
return nil, err
|
||||
}
|
||||
t.root = newRootNode
|
||||
|
||||
return nil
|
||||
return invalids, nil
|
||||
}
|
||||
|
||||
func (t *vt) getNodesAtLevel(l int) ([]*node, error) {
|
||||
if t.root == nil {
|
||||
return nil, nil
|
||||
var r []*node
|
||||
nChilds := int(math.Pow(2, float64(l))) //nolint:gomnd
|
||||
for i := 0; i < nChilds; i++ {
|
||||
r = append(r, nil)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
return t.root.getNodesAtLevel(0, l)
|
||||
}
|
||||
|
||||
func (n *node) getNodesAtLevel(currLvl, l int) ([]*node, error) {
|
||||
var nodes []*node
|
||||
if n == nil {
|
||||
var r []*node
|
||||
nChilds := int(math.Pow(2, float64(l-currLvl))) //nolint:gomnd
|
||||
for i := 0; i < nChilds; i++ {
|
||||
r = append(r, nil)
|
||||
}
|
||||
return r, nil
|
||||
}
|
||||
|
||||
typ := n.typ()
|
||||
if currLvl == l && typ != vtEmpty {
|
||||
nodes = append(nodes, n)
|
||||
return nodes, nil
|
||||
return []*node{n}, nil
|
||||
}
|
||||
if currLvl >= l {
|
||||
panic("should not reach this point") // TODO TMP
|
||||
// return nil, nil
|
||||
}
|
||||
|
||||
if n.l != nil {
|
||||
nodesL, err := n.l.getNodesAtLevel(currLvl+1, l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodes = append(nodes, nodesL...)
|
||||
var nodes []*node
|
||||
|
||||
nodesL, err := n.l.getNodesAtLevel(currLvl+1, l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if n.r != nil {
|
||||
nodesR, err := n.r.getNodesAtLevel(currLvl+1, l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodes = append(nodes, nodesR...)
|
||||
nodes = append(nodes, nodesL...)
|
||||
|
||||
nodesR, err := n.r.getNodesAtLevel(currLvl+1, l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodes = append(nodes, nodesR...)
|
||||
|
||||
return nodes, nil
|
||||
}
|
||||
|
||||
@@ -194,9 +214,11 @@ func upFromNodes(ns []*node) (*node, error) {
|
||||
|
||||
var res []*node
|
||||
for i := 0; i < len(ns); i += 2 {
|
||||
if ns[i].typ() == vtEmpty && ns[i+1].typ() == vtEmpty {
|
||||
// if ns[i].typ() == vtEmpty && ns[i+1].typ() == vtEmpty {
|
||||
if ns[i] == nil && ns[i+1] == nil {
|
||||
// when both sub nodes are empty, the node is also empty
|
||||
res = append(res, ns[i]) // empty node
|
||||
continue
|
||||
}
|
||||
n := &node{
|
||||
l: ns[i],
|
||||
@@ -207,6 +229,56 @@ func upFromNodes(ns []*node) (*node, error) {
|
||||
return upFromNodes(res)
|
||||
}
|
||||
|
||||
// func upFromNodesComputingHashes(p *params, ns []*node, pairs [][2][]byte) (
|
||||
// [][2][]byte, *node, error) {
|
||||
// if len(ns) == 1 {
|
||||
// return pairs, ns[0], nil
|
||||
// }
|
||||
//
|
||||
// var res []*node
|
||||
// for i := 0; i < len(ns); i += 2 {
|
||||
// if ns[i] == nil && ns[i+1] == nil {
|
||||
// // when both sub nodes are empty, the node is also empty
|
||||
// res = append(res, ns[i]) // empty node
|
||||
// continue
|
||||
// }
|
||||
// n := &node{
|
||||
// l: ns[i],
|
||||
// r: ns[i+1],
|
||||
// }
|
||||
//
|
||||
// if n.l == nil {
|
||||
// n.l = &node{
|
||||
// h: p.emptyHash,
|
||||
// }
|
||||
// }
|
||||
// if n.r == nil {
|
||||
// n.r = &node{
|
||||
// h: p.emptyHash,
|
||||
// }
|
||||
// }
|
||||
// if n.l.typ() == vtEmpty && n.r.typ() == vtLeaf {
|
||||
// n = n.r
|
||||
// }
|
||||
// if n.r.typ() == vtEmpty && n.l.typ() == vtLeaf {
|
||||
// n = n.l
|
||||
// }
|
||||
//
|
||||
// // once the sub nodes are computed, can compute the current node
|
||||
// // hash
|
||||
// p.dbg.incHash()
|
||||
// k, v, err := newIntermediate(p.hashFunction, n.l.h, n.r.h)
|
||||
// if err != nil {
|
||||
// return nil, nil, err
|
||||
// }
|
||||
// n.h = k
|
||||
// kv := [2][]byte{k, v}
|
||||
// pairs = append(pairs, kv)
|
||||
// res = append(res, n)
|
||||
// }
|
||||
// return upFromNodesComputingHashes(p, res, pairs)
|
||||
// }
|
||||
|
||||
func (t *vt) add(fromLvl int, k, v []byte) error {
|
||||
leaf := newLeafNode(t.params, k, v)
|
||||
if t.root == nil {
|
||||
@@ -224,13 +296,63 @@ func (t *vt) add(fromLvl int, k, v []byte) error {
|
||||
// computeHashes should be called after all the vt.add is used, once all the
|
||||
// leafs are in the tree
|
||||
func (t *vt) computeHashes() ([][2][]byte, error) {
|
||||
var pairs [][2][]byte
|
||||
var err error
|
||||
// TODO parallelize computeHashes
|
||||
pairs, err = t.root.computeHashes(t.params, pairs)
|
||||
|
||||
nCPU := flp2(runtime.NumCPU())
|
||||
l := int(math.Log2(float64(nCPU)))
|
||||
nodesAtL, err := t.getNodesAtLevel(l)
|
||||
if err != nil {
|
||||
return pairs, err
|
||||
return nil, err
|
||||
}
|
||||
subRoots := make([]*node, nCPU)
|
||||
bucketPairs := make([][][2][]byte, nCPU)
|
||||
dbgStatsPerBucket := make([]*dbgStats, nCPU)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(nCPU)
|
||||
for i := 0; i < nCPU; i++ {
|
||||
go func(cpu int) {
|
||||
bucketVT := newVT(t.params.maxLevels-l, t.params.hashFunction)
|
||||
bucketVT.params.dbg = newDbgStats()
|
||||
bucketVT.root = nodesAtL[cpu]
|
||||
|
||||
bucketPairs[cpu], err = bucketVT.root.computeHashes(l,
|
||||
t.params.maxLevels, bucketVT.params, bucketPairs[cpu])
|
||||
if err != nil {
|
||||
// TODO WIP
|
||||
fmt.Println("TODO ERR, err:", err)
|
||||
panic(err)
|
||||
}
|
||||
|
||||
subRoots[cpu] = bucketVT.root
|
||||
dbgStatsPerBucket[cpu] = bucketVT.params.dbg
|
||||
wg.Done()
|
||||
}(i)
|
||||
}
|
||||
wg.Wait()
|
||||
|
||||
for i := 0; i < len(dbgStatsPerBucket); i++ {
|
||||
t.params.dbg.add(dbgStatsPerBucket[i])
|
||||
}
|
||||
|
||||
var pairs [][2][]byte
|
||||
for i := 0; i < len(bucketPairs); i++ {
|
||||
pairs = append(pairs, bucketPairs[i]...)
|
||||
}
|
||||
|
||||
nodesAtL, err = t.getNodesAtLevel(l)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for i := 0; i < len(nodesAtL); i++ {
|
||||
nodesAtL = subRoots
|
||||
}
|
||||
|
||||
pairs, err = t.root.computeHashes(0, l, t.params, pairs)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return pairs, nil
|
||||
}
|
||||
|
||||
@@ -363,7 +485,12 @@ func (n *node) downUntilDivergence(p *params, currLvl int, oldLeaf, newLeaf *nod
|
||||
}
|
||||
|
||||
// returns an array of key-values to store in the db
|
||||
func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error) {
|
||||
func (n *node) computeHashes(currLvl, maxLvl int, p *params, pairs [][2][]byte) (
|
||||
[][2][]byte, error) {
|
||||
if n == nil || currLvl >= maxLvl {
|
||||
// no need to compute any hash
|
||||
return pairs, nil
|
||||
}
|
||||
if pairs == nil {
|
||||
pairs = [][2][]byte{}
|
||||
}
|
||||
@@ -381,7 +508,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
|
||||
pairs = append(pairs, kv)
|
||||
case vtMid:
|
||||
if n.l != nil {
|
||||
pairs, err = n.l.computeHashes(p, pairs)
|
||||
pairs, err = n.l.computeHashes(currLvl+1, maxLvl, p, pairs)
|
||||
if err != nil {
|
||||
return pairs, err
|
||||
}
|
||||
@@ -391,7 +518,7 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
|
||||
}
|
||||
}
|
||||
if n.r != nil {
|
||||
pairs, err = n.r.computeHashes(p, pairs)
|
||||
pairs, err = n.r.computeHashes(currLvl+1, maxLvl, p, pairs)
|
||||
if err != nil {
|
||||
return pairs, err
|
||||
}
|
||||
@@ -410,7 +537,9 @@ func (n *node) computeHashes(p *params, pairs [][2][]byte) ([][2][]byte, error)
|
||||
n.h = k
|
||||
kv := [2][]byte{k, v}
|
||||
pairs = append(pairs, kv)
|
||||
case vtEmpty:
|
||||
default:
|
||||
fmt.Println("n.computeHashes type no match", t)
|
||||
return nil, fmt.Errorf("ERR TMP") // TODO
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user