You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

256 lines
5.8 KiB

package arbo
import (
"bytes"
"fmt"
"sort"
)
/*
AddBatch design
===============
CASE A: Empty Tree --> if tree is empty (root==0)
=================================================
- Build the full tree from bottom to top (from all the leaf to the root)
CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < numBuckets
==============================================================================
- Get the Leafs (key & value) (iterate the tree from the current root getting
the leafs)
- Create a new empty Tree
- Do CASE A for the new Tree, giving the already existing key&values (leafs)
from the original Tree + the new key&values to be added from the AddBatch call
R
/ \
A *
/ \
B C
CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=numBuckets)
==============================================================================
- Use A, B, G, F as Roots of subtrees
- Do CASE B for each subtree
- Then go from L to the Root
R
/ \
/ \
/ \
* *
/ | / \
/ | / \
/ | / \
L: A B G D
/ \
/ \
/ \
C *
/ \
/ \
/ \
D E
CASE D: Already populated Tree
==============================
- Use A, B, C, D as subtree
- Sort the Keys in Buckets that share the initial part of the path
- For each subtree add there the new leafs
R
/ \
/ \
/ \
* *
/ | / \
/ | / \
/ | / \
L: A B C D
/\ /\ / \ / \
... ... ... ... ... ...
CASE E: Already populated Tree Unbalanced
=========================================
- Need to fill M1 and M2, and then will be able to use CASE D
- Search for M1 & M2 in the inputed Keys
- Add M1 & M2 to the Tree
- From here can use CASE D
R
/ \
/ \
/ \
* *
| \
| \
| \
L: M1 * M2 * (where M1 and M2 are empty)
/ | /
/ | /
/ | /
A * *
/ \ | \
/ \ | \
/ \ | \
B * * C
/ \ |\
... ... | \
| \
D E
Algorithm decision
==================
- if nLeafs==0 (root==0): CASE A
- if nLeafs<nBuckets: CASE B
- if nLeafs>=nBuckets && nLeafs < minLeafsThreshold: CASE C
- else: CASE D & CASE E
- Multiple tree.Add calls: O(n log n)
- Used in: cases A, B, C
- Tree from bottom to top: O(log n)
- Used in: cases D, E
*/
// AddBatchOpt is the WIP implementation of the AddBatch method in a more
// optimized approach.
func (t *Tree) AddBatchOpt(keys, values [][]byte) ([]int, error) {
t.updateAccessTime()
t.Lock()
defer t.Unlock()
// TODO if len(keys) is not a power of 2, add padding of empty
// keys&values. Maybe when len(keyvalues) is not a power of 2, cut at
// the biggest power of 2 under the len(keys), add those 2**n key-values
// using the AddBatch approach, and then add the remaining key-values
// using tree.Add.
kvs, err := t.keysValuesToKvs(keys, values)
if err != nil {
return nil, err
}
t.tx, err = t.db.NewTx()
if err != nil {
return nil, err
}
// if nLeafs==0 (root==0): CASE A
e := make([]byte, t.hashFunction.Len())
if bytes.Equal(t.root, e) {
// CASE A
// sort keys & values by path
sortKvs(kvs)
return t.buildTreeBottomUp(kvs)
}
return nil, fmt.Errorf("UNIMPLEMENTED")
}
type kv struct {
pos int // original position in the array
keyPath []byte
k []byte
v []byte
}
// compareBytes compares byte slices where the bytes are compared from left to
// right and each byte is compared by bit from right to left
func compareBytes(a, b []byte) bool {
// WIP
for i := 0; i < len(a); i++ {
for j := 0; j < 8; j++ {
aBit := a[i] & (1 << j)
bBit := b[i] & (1 << j)
if aBit > bBit {
return false
} else if aBit < bBit {
return true
}
}
}
return false
}
// sortKvs sorts the kv by path
func sortKvs(kvs []kv) {
sort.Slice(kvs, func(i, j int) bool {
return compareBytes(kvs[i].keyPath, kvs[j].keyPath)
})
}
func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) {
if len(ks) != len(vs) {
return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)",
len(ks), len(vs))
}
kvs := make([]kv, len(ks))
for i := 0; i < len(ks); i++ {
keyPath := make([]byte, t.hashFunction.Len())
copy(keyPath[:], ks[i])
kvs[i].pos = i
kvs[i].keyPath = ks[i]
kvs[i].k = ks[i]
kvs[i].v = vs[i]
}
return kvs, nil
}
// keys & values must be sorted by path, and must be length multiple of 2
// TODO return index of failed keyvaules
func (t *Tree) buildTreeBottomUp(kvs []kv) ([]int, error) {
// build the leafs
leafKeys := make([][]byte, len(kvs))
for i := 0; i < len(kvs); i++ {
// TODO handle the case where Key&Value == 0
leafKey, leafValue, err := newLeafValue(t.hashFunction, kvs[i].k, kvs[i].v)
if err != nil {
return nil, err
}
// store leafKey & leafValue to db
if err := t.tx.Put(leafKey, leafValue); err != nil {
return nil, err
}
leafKeys[i] = leafKey
}
r, err := t.upFromKeys(leafKeys)
if err != nil {
return nil, err
}
t.root = r
return nil, nil
}
func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) {
if len(ks) == 1 {
return ks[0], nil
}
var rKs [][]byte
for i := 0; i < len(ks); i += 2 {
// TODO handle the case where Key&Value == 0
k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1])
if err != nil {
return nil, err
}
// store k-v to db
if err = t.tx.Put(k, v); err != nil {
return nil, err
}
rKs = append(rKs, k)
}
return t.upFromKeys(rKs)
}