arnaucube
/
arbo
mirror of https://github.com/arnaucube/arbo.git


								package arbo


								import (

									"bytes"

									"fmt"

									"sort"

								)


								/*


								AddBatch design

								===============


								CASE A: Empty Tree --> if tree is empty (root==0)

								=================================================

								- Build the full tree from bottom to top (from all the leaf to the root)


								CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < numBuckets

								==============================================================================

								- Get the Leafs (key & value) (iterate the tree from the current root getting

								the leafs)

								- Create a new empty Tree

								- Do CASE A for the new Tree, giving the already existing key&values (leafs)

								from the original Tree + the new key&values to be added from the AddBatch call


								      R

								     / \

								    A   *

								       / \

								      B   C


								CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=numBuckets)

								==============================================================================

								- Use A, B, G, F as Roots of subtrees

								- Do CASE B for each subtree

								- Then go from L to the Root


								              R

								             /  \

								            /    \

								           /      \

								          *        *

								         / |      / \

								        /  |     /   \

								       /   |    /     \

								L:    A    B   G       D

								              / \

								             /   \

								            /     \

								           C      *

								                 / \

								                /   \

								               /     \

								              D      E


								CASE D: Already populated Tree

								==============================

								- Use A, B, C, D as subtree

								- Sort the Keys in Buckets that share the initial part of the path

								- For each subtree add there the new leafs


								              R

								             /  \

								            /    \

								           /      \

								          *        *

								         / |      / \

								        /  |     /   \

								       /   |    /     \

								L:    A    B   C       D

								     /\   /\  / \     / \

								    ...  ... ... ... ... ...


								CASE E: Already populated Tree Unbalanced

								=========================================

								- Need to fill M1 and M2, and then will be able to use CASE D

									- Search for M1 & M2 in the inputed Keys

									- Add M1 & M2 to the Tree

									- From here can use CASE D


								              R

								             /  \

								            /    \

								           /      \

								          *        *

								           |        \

								           |         \

								           |          \

								L:    M1   *   M2      *        (where M1 and M2 are empty)

								          / |         /

								         /  |        /

								        /   |       /

								       A    *      *

								           / \     | \

								          /   \    |  \

								         /     \   |   \

								        B      *   *   C

								              / \  |\

								           ... ... | \

								                   |  \

								                   D  E


								Algorithm decision

								==================

								- if nLeafs==0 (root==0): CASE A

								- if nLeafs<nBuckets: CASE B

								- if nLeafs>=nBuckets && nLeafs < minLeafsThreshold: CASE C

								- else: CASE D & CASE E


								- Multiple tree.Add calls: O(n log n)

									- Used in: cases A, B, C

								- Tree from bottom to top: O(log n)

									- Used in: cases D, E


								*/


								// AddBatchOpt is the WIP implementation of the AddBatch method in a more

								// optimized approach.

								func (t *Tree) AddBatchOpt(keys, values [][]byte) ([]int, error) {

									t.updateAccessTime()

									t.Lock()

									defer t.Unlock()


									// TODO if len(keys) is not a power of 2, add padding of empty

									// keys&values. Maybe when len(keyvalues) is not a power of 2, cut at

									// the biggest power of 2 under the len(keys), add those 2**n key-values

									// using the AddBatch approach, and then add the remaining key-values

									// using tree.Add.


									kvs, err := t.keysValuesToKvs(keys, values)

									if err != nil {

										return nil, err

									}


									t.tx, err = t.db.NewTx()

									if err != nil {

										return nil, err

									}


									// if nLeafs==0 (root==0): CASE A

									e := make([]byte, t.hashFunction.Len())

									if bytes.Equal(t.root, e) {

										// CASE A

										// sort keys & values by path

										sortKvs(kvs)

										return t.buildTreeBottomUp(kvs)

									}


									return nil, fmt.Errorf("UNIMPLEMENTED")

								}


								type kv struct {

									pos     int // original position in the array

									keyPath []byte

									k       []byte

									v       []byte

								}


								// compareBytes compares byte slices where the bytes are compared from left to

								// right and each byte is compared by bit from right to left

								func compareBytes(a, b []byte) bool {

									// WIP

									for i := 0; i < len(a); i++ {

										for j := 0; j < 8; j++ {

											aBit := a[i] & (1 << j)

											bBit := b[i] & (1 << j)

											if aBit > bBit {

												return false

											} else if aBit < bBit {

												return true

											}

										}

									}

									return false

								}


								// sortKvs sorts the kv by path

								func sortKvs(kvs []kv) {

									sort.Slice(kvs, func(i, j int) bool {

										return compareBytes(kvs[i].keyPath, kvs[j].keyPath)

									})

								}


								func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) {

									if len(ks) != len(vs) {

										return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)",

											len(ks), len(vs))

									}

									kvs := make([]kv, len(ks))

									for i := 0; i < len(ks); i++ {

										keyPath := make([]byte, t.hashFunction.Len())

										copy(keyPath[:], ks[i])

										kvs[i].pos = i

										kvs[i].keyPath = ks[i]

										kvs[i].k = ks[i]

										kvs[i].v = vs[i]

									}


									return kvs, nil

								}


								// keys & values must be sorted by path, and must be length multiple of 2

								// TODO return index of failed keyvaules

								func (t *Tree) buildTreeBottomUp(kvs []kv) ([]int, error) {

									// build the leafs

									leafKeys := make([][]byte, len(kvs))

									for i := 0; i < len(kvs); i++ {

										// TODO handle the case where Key&Value == 0

										leafKey, leafValue, err := newLeafValue(t.hashFunction, kvs[i].k, kvs[i].v)

										if err != nil {

											return nil, err

										}

										// store leafKey & leafValue to db

										if err := t.tx.Put(leafKey, leafValue); err != nil {

											return nil, err

										}

										leafKeys[i] = leafKey

									}

									r, err := t.upFromKeys(leafKeys)

									if err != nil {

										return nil, err

									}

									t.root = r

									return nil, nil

								}


								func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) {

									if len(ks) == 1 {

										return ks[0], nil

									}


									var rKs [][]byte

									for i := 0; i < len(ks); i += 2 {

										// TODO handle the case where Key&Value == 0

										k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1])

										if err != nil {

											return nil, err

										}

										// store k-v to db

										if err = t.tx.Put(k, v); err != nil {

											return nil, err

										}

										rKs = append(rKs, k)

									}

									return t.upFromKeys(rKs)

								}