You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

667 lines
15 KiB

// Package arbo > vt.go implements the Virtual Tree, which computes a tree
// without computing any hash. With the idea of once all the leafs are placed in
// their positions, the hashes can be computed, avoiding computing a node hash
// more than one time.
package arbo
import (
"bytes"
"encoding/hex"
"fmt"
"io"
"math"
"runtime"
"sync"
)
type node struct {
l *node
r *node
k []byte
v []byte
path []bool
h []byte
}
type params struct {
maxLevels int
hashFunction HashFunction
emptyHash []byte
dbg *dbgStats
}
type kv struct {
pos int // original position in the inputted array
keyPath []byte
k []byte
v []byte
}
func (p *params) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) {
if len(ks) != len(vs) {
return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)",
len(ks), len(vs))
}
kvs := make([]kv, len(ks))
for i := 0; i < len(ks); i++ {
keyPath := make([]byte, p.hashFunction.Len())
copy(keyPath[:], ks[i])
kvs[i].pos = i
kvs[i].keyPath = keyPath
kvs[i].k = ks[i]
kvs[i].v = vs[i]
}
return kvs, nil
}
// vt stands for virtual tree. It's a tree that does not have any computed hash
// while placing the leafs. Once all the leafs are placed, it computes all the
// hashes. In this way, each node hash is only computed one time (at the end)
// and the tree is computed in memory.
type vt struct {
root *node
params *params
}
func newVT(maxLevels int, hash HashFunction) vt {
return vt{
root: nil,
params: &params{
maxLevels: maxLevels,
hashFunction: hash,
emptyHash: make([]byte, hash.Len()), // empty
},
}
}
// addBatch adds a batch of key-values to the VirtualTree. Returns an array
// containing the indexes of the keys failed to add. Does not include the
// computation of hashes of the nodes neither the storage of the key-values of
// the tree into the db. After addBatch, vt.computeHashes should be called to
// compute the hashes of all the nodes of the tree.
func (t *vt) addBatch(ks, vs [][]byte) ([]int, error) {
nCPU := flp2(runtime.NumCPU())
if nCPU == 1 || len(ks) < nCPU {
var invalids []int
for i := 0; i < len(ks); i++ {
if err := t.add(0, ks[i], vs[i]); err != nil {
invalids = append(invalids, i)
}
}
return invalids, nil
}
l := int(math.Log2(float64(nCPU)))
kvs, err := t.params.keysValuesToKvs(ks, vs)
if err != nil {
return nil, err
}
buckets := splitInBuckets(kvs, nCPU)
nodesAtL, err := t.getNodesAtLevel(l)
if err != nil {
return nil, err
}
if len(nodesAtL) != nCPU && t.root != nil {
/*
Already populated Tree but Unbalanced
- Need to fill M1 and M2, and then will be able to continue with the flow
- Search for M1 & M2 in the inputed Keys
- Add M1 & M2 to the Tree
- From here can continue with the flow
R
/ \
/ \
/ \
* *
| \
| \
| \
L: M1 * M2 * (where M1 and M2 are empty)
/ | /
/ | /
/ | /
A * *
/ \ | \
/ \ | \
/ \ | \
B * * C
/ \ |\
... ... | \
| \
D E
*/
// add one key at each bucket, and then continue with the flow
for i := 0; i < len(buckets); i++ {
// add one leaf of the bucket, if there is an error when
// adding the k-v, try to add the next one of the bucket
// (until one is added)
inserted := -1
for j := 0; j < len(buckets[i]); j++ {
if err := t.add(0, buckets[i][j].k, buckets[i][j].v); err == nil {
inserted = j
break
}
}
// remove the inserted element from buckets[i]
// fmt.Println("rm-ins", inserted)
if inserted != -1 {
buckets[i] = append(buckets[i][:inserted], buckets[i][inserted+1:]...)
}
}
nodesAtL, err = t.getNodesAtLevel(l)
if err != nil {
return nil, err
}
}
if len(nodesAtL) != nCPU {
panic("should not happen") // TODO TMP
}
subRoots := make([]*node, nCPU)
invalidsInBucket := make([][]int, nCPU)
var wg sync.WaitGroup
wg.Add(nCPU)
for i := 0; i < nCPU; i++ {
go func(cpu int) {
bucketVT := newVT(t.params.maxLevels-l, t.params.hashFunction)
bucketVT.root = nodesAtL[cpu]
for j := 0; j < len(buckets[cpu]); j++ {
if err = bucketVT.add(l, buckets[cpu][j].k, buckets[cpu][j].v); err != nil {
invalidsInBucket[cpu] = append(invalidsInBucket[cpu], buckets[cpu][j].pos)
}
}
subRoots[cpu] = bucketVT.root
wg.Done()
}(i)
}
wg.Wait()
var invalids []int
for i := 0; i < len(invalidsInBucket); i++ {
invalids = append(invalids, invalidsInBucket[i]...)
}
newRootNode, err := upFromNodes(subRoots)
if err != nil {
return nil, err
}
t.root = newRootNode
return invalids, nil
}
func (t *vt) getNodesAtLevel(l int) ([]*node, error) {
if t.root == nil {
var r []*node
nChilds := int(math.Pow(2, float64(l))) //nolint:gomnd
for i := 0; i < nChilds; i++ {
r = append(r, nil)
}
return r, nil
}
return t.root.getNodesAtLevel(0, l)
}
func (n *node) getNodesAtLevel(currLvl, l int) ([]*node, error) {
if n == nil {
var r []*node
nChilds := int(math.Pow(2, float64(l-currLvl))) //nolint:gomnd
for i := 0; i < nChilds; i++ {
r = append(r, nil)
}
return r, nil
}
typ := n.typ()
if currLvl == l && typ != vtEmpty {
return []*node{n}, nil
}
if currLvl >= l {
panic("should not reach this point") // TODO TMP
}
var nodes []*node
nodesL, err := n.l.getNodesAtLevel(currLvl+1, l)
if err != nil {
return nil, err
}
nodes = append(nodes, nodesL...)
nodesR, err := n.r.getNodesAtLevel(currLvl+1, l)
if err != nil {
return nil, err
}
nodes = append(nodes, nodesR...)
return nodes, nil
}
func upFromNodes(ns []*node) (*node, error) {
if len(ns) == 1 {
return ns[0], nil
}
var res []*node
for i := 0; i < len(ns); i += 2 {
if ns[i].typ() == vtEmpty && ns[i+1].typ() == vtEmpty {
// if ns[i] == nil && ns[i+1] == nil {
// when both sub nodes are empty, the node is also empty
res = append(res, ns[i]) // empty node
continue
}
n := &node{
l: ns[i],
r: ns[i+1],
}
res = append(res, n)
}
return upFromNodes(res)
}
func (t *vt) add(fromLvl int, k, v []byte) error {
leaf := newLeafNode(t.params, k, v)
if t.root == nil {
t.root = leaf
return nil
}
if err := t.root.add(t.params, fromLvl, leaf); err != nil {
return err
}
return nil
}
// computeHashes should be called after all the vt.add is used, once all the
// leafs are in the tree
func (t *vt) computeHashes() ([][2][]byte, error) {
var err error
nCPU := flp2(runtime.NumCPU())
l := int(math.Log2(float64(nCPU)))
nodesAtL, err := t.getNodesAtLevel(l)
if err != nil {
return nil, err
}
subRoots := make([]*node, nCPU)
bucketPairs := make([][][2][]byte, nCPU)
dbgStatsPerBucket := make([]*dbgStats, nCPU)
var wg sync.WaitGroup
wg.Add(nCPU)
for i := 0; i < nCPU; i++ {
go func(cpu int) {
bucketVT := newVT(t.params.maxLevels-l, t.params.hashFunction)
bucketVT.params.dbg = newDbgStats()
bucketVT.root = nodesAtL[cpu]
bucketPairs[cpu], err = bucketVT.root.computeHashes(l,
t.params.maxLevels, bucketVT.params, bucketPairs[cpu])
if err != nil {
// TODO WIP
panic("TODO" + err.Error())
}
subRoots[cpu] = bucketVT.root
dbgStatsPerBucket[cpu] = bucketVT.params.dbg
wg.Done()
}(i)
}
wg.Wait()
for i := 0; i < len(dbgStatsPerBucket); i++ {
t.params.dbg.add(dbgStatsPerBucket[i])
}
var pairs [][2][]byte
for i := 0; i < len(bucketPairs); i++ {
pairs = append(pairs, bucketPairs[i]...)
}
nodesAtL, err = t.getNodesAtLevel(l)
if err != nil {
return nil, err
}
for i := 0; i < len(nodesAtL); i++ {
nodesAtL = subRoots
}
pairs, err = t.root.computeHashes(0, l, t.params, pairs)
if err != nil {
return nil, err
}
return pairs, nil
}
func newLeafNode(p *params, k, v []byte) *node {
keyPath := make([]byte, p.hashFunction.Len())
copy(keyPath[:], k)
path := getPath(p.maxLevels, keyPath)
n := &node{
k: k,
v: v,
path: path,
}
return n
}
type virtualNodeType int
const (
vtEmpty = 0 // for convenience uses same value that PrefixValueEmpty
vtLeaf = 1 // for convenience uses same value that PrefixValueLeaf
vtMid = 2 // for convenience uses same value that PrefixValueIntermediate
)
func (n *node) typ() virtualNodeType {
if n == nil {
return vtEmpty // TODO decide if return 'vtEmpty' or an error
}
if n.l == nil && n.r == nil && n.k != nil {
return vtLeaf
}
if n.l != nil || n.r != nil {
return vtMid
}
return vtEmpty
}
func (n *node) add(p *params, currLvl int, leaf *node) error {
if currLvl > p.maxLevels-1 {
return fmt.Errorf("max virtual level %d", currLvl)
}
if n == nil {
// n = leaf // TMP!
return nil
}
t := n.typ()
switch t {
case vtMid:
if leaf.path[currLvl] {
//right
if n.r == nil {
// empty sub-node, add the leaf here
n.r = leaf
return nil
}
if err := n.r.add(p, currLvl+1, leaf); err != nil {
return err
}
} else {
if n.l == nil {
// empty sub-node, add the leaf here
n.l = leaf
return nil
}
if err := n.l.add(p, currLvl+1, leaf); err != nil {
return err
}
}
case vtLeaf:
if bytes.Equal(n.k, leaf.k) {
return fmt.Errorf("key already exists. Existing node: %s, trying to add node: %s",
hex.EncodeToString(n.k), hex.EncodeToString(leaf.k))
}
oldLeaf := &node{
k: n.k,
v: n.v,
path: n.path,
}
// remove values from current node (converting it to mid node)
n.k = nil
n.v = nil
n.h = nil
n.path = nil
if err := n.downUntilDivergence(p, currLvl, oldLeaf, leaf); err != nil {
return err
}
case vtEmpty:
panic(fmt.Errorf("EMPTY %v", n)) // TODO TMP
default:
return fmt.Errorf("ERR") // TODO TMP
}
return nil
}
func (n *node) downUntilDivergence(p *params, currLvl int, oldLeaf, newLeaf *node) error {
if currLvl > p.maxLevels-1 {
return fmt.Errorf("max virtual level %d", currLvl)
}
if oldLeaf.path[currLvl] != newLeaf.path[currLvl] {
// reached divergence in next level
if newLeaf.path[currLvl] {
n.l = oldLeaf
n.r = newLeaf
} else {
n.l = newLeaf
n.r = oldLeaf
}
return nil
}
// no divergence yet, continue going down
if newLeaf.path[currLvl] {
// right
n.r = &node{}
if err := n.r.downUntilDivergence(p, currLvl+1, oldLeaf, newLeaf); err != nil {
return err
}
} else {
// left
n.l = &node{}
if err := n.l.downUntilDivergence(p, currLvl+1, oldLeaf, newLeaf); err != nil {
return err
}
}
return nil
}
func splitInBuckets(kvs []kv, nBuckets int) [][]kv {
buckets := make([][]kv, nBuckets)
// 1. classify the keyvalues into buckets
for i := 0; i < len(kvs); i++ {
pair := kvs[i]
// bucketnum := keyToBucket(pair.k, nBuckets)
bucketnum := keyToBucket(pair.keyPath, nBuckets)
buckets[bucketnum] = append(buckets[bucketnum], pair)
}
return buckets
}
// TODO rename in a more 'real' name (calculate bucket from/for key)
func keyToBucket(k []byte, nBuckets int) int {
nLevels := int(math.Log2(float64(nBuckets)))
b := make([]int, nBuckets)
for i := 0; i < nBuckets; i++ {
b[i] = i
}
r := b
mid := len(r) / 2 //nolint:gomnd
for i := 0; i < nLevels; i++ {
if int(k[i/8]&(1<<(i%8))) != 0 {
r = r[mid:]
mid = len(r) / 2 //nolint:gomnd
} else {
r = r[:mid]
mid = len(r) / 2 //nolint:gomnd
}
}
return r[0]
}
// flp2 computes the floor power of 2, the highest power of 2 under the given
// value.
func flp2(n int) int {
res := 0
for i := n; i >= 1; i-- {
if (i & (i - 1)) == 0 {
res = i
break
}
}
return res
}
// returns an array of key-values to store in the db
func (n *node) computeHashes(currLvl, maxLvl int, p *params, pairs [][2][]byte) (
[][2][]byte, error) {
if n == nil || currLvl >= maxLvl {
// no need to compute any hash
return pairs, nil
}
if pairs == nil {
pairs = [][2][]byte{}
}
var err error
t := n.typ()
switch t {
case vtLeaf:
p.dbg.incHash()
leafKey, leafValue, err := newLeafValue(p.hashFunction, n.k, n.v)
if err != nil {
return pairs, err
}
n.h = leafKey
kv := [2][]byte{leafKey, leafValue}
pairs = append(pairs, kv)
case vtMid:
if n.l != nil {
pairs, err = n.l.computeHashes(currLvl+1, maxLvl, p, pairs)
if err != nil {
return pairs, err
}
} else {
n.l = &node{
h: p.emptyHash,
}
}
if n.r != nil {
pairs, err = n.r.computeHashes(currLvl+1, maxLvl, p, pairs)
if err != nil {
return pairs, err
}
} else {
n.r = &node{
h: p.emptyHash,
}
}
// once the sub nodes are computed, can compute the current node
// hash
p.dbg.incHash()
k, v, err := newIntermediate(p.hashFunction, n.l.h, n.r.h)
if err != nil {
return nil, err
}
n.h = k
kv := [2][]byte{k, v}
pairs = append(pairs, kv)
case vtEmpty:
default:
return nil, fmt.Errorf("ERR:n.computeHashes type (%d) no match", t) // TODO TMP
}
return pairs, nil
}
//nolint:unused
func (t *vt) graphviz(w io.Writer) error {
fmt.Fprintf(w, `digraph hierarchy {
node [fontname=Monospace,fontsize=10,shape=box]
`)
if _, err := t.root.graphviz(w, t.params, 0); err != nil {
return err
}
fmt.Fprintf(w, "}\n")
return nil
}
//nolint:unused
func (n *node) graphviz(w io.Writer, p *params, nEmpties int) (int, error) {
nChars := 4 // TODO move to global constant
if n == nil {
return nEmpties, nil
}
t := n.typ()
switch t {
case vtLeaf:
leafKey, _, err := newLeafValue(p.hashFunction, n.k, n.v)
if err != nil {
return nEmpties, err
}
fmt.Fprintf(w, "\"%p\" [style=filled,label=\"%v\"];\n", n, hex.EncodeToString(leafKey[:nChars]))
fmt.Fprintf(w, "\"%p\" -> {\"k:%v\\nv:%v\"}\n", n,
hex.EncodeToString(n.k[:nChars]),
hex.EncodeToString(n.v[:nChars]))
fmt.Fprintf(w, "\"k:%v\\nv:%v\" [style=dashed]\n",
hex.EncodeToString(n.k[:nChars]),
hex.EncodeToString(n.v[:nChars]))
case vtMid:
fmt.Fprintf(w, "\"%p\" [label=\"\"];\n", n)
lStr := fmt.Sprintf("%p", n.l)
rStr := fmt.Sprintf("%p", n.r)
eStr := ""
if n.l == nil {
lStr = fmt.Sprintf("empty%v", nEmpties)
eStr += fmt.Sprintf("\"%v\" [style=dashed,label=0];\n",
lStr)
nEmpties++
}
if n.r == nil {
rStr = fmt.Sprintf("empty%v", nEmpties)
eStr += fmt.Sprintf("\"%v\" [style=dashed,label=0];\n",
rStr)
nEmpties++
}
fmt.Fprintf(w, "\"%p\" -> {\"%v\" \"%v\"}\n", n, lStr, rStr)
fmt.Fprint(w, eStr)
nEmpties, err := n.l.graphviz(w, p, nEmpties)
if err != nil {
return nEmpties, err
}
nEmpties, err = n.r.graphviz(w, p, nEmpties)
if err != nil {
return nEmpties, err
}
case vtEmpty:
default:
return nEmpties, fmt.Errorf("ERR")
}
return nEmpties, nil
}
//nolint:unused
func (t *vt) printGraphviz() error {
w := bytes.NewBufferString("")
fmt.Fprintf(w,
"--------\nGraphviz:\n")
err := t.graphviz(w)
if err != nil {
fmt.Println(w)
return err
}
fmt.Fprintf(w,
"End of Graphviz --------\n")
fmt.Println(w)
return nil
}