arnaucube
/
arbo
mirror of https://github.com/arnaucube/arbo.git

package arbo

import (
	"bytes"
	"fmt"
	"sort"
)

/*


AddBatch design
===============


CASE A: Empty Tree --> if tree is empty (root==0)
=================================================
- Build the full tree from bottom to top (from all the leaf to the root)


CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < numBuckets
==============================================================================
- Get the Leafs (key & value) (iterate the tree from the current root getting
the leafs)
- Create a new empty Tree
- Do CASE A for the new Tree, giving the already existing key&values (leafs)
from the original Tree + the new key&values to be added from the AddBatch call

      R
     / \
    A   *
       / \
      B   C


CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=numBuckets)
==============================================================================
- Use A, B, G, F as Roots of subtrees
- Do CASE B for each subtree
- Then go from L to the Root

              R
             /  \
            /    \
           /      \
          *        *
         / |      / \
        /  |     /   \
       /   |    /     \
L:    A    B   G       D
              / \
             /   \
            /     \
           C      *
                 / \
                /   \
               /     \
              D      E


CASE D: Already populated Tree
==============================
- Use A, B, C, D as subtree
- Sort the Keys in Buckets that share the initial part of the path
- For each subtree add there the new leafs

              R
             /  \
            /    \
           /      \
          *        *
         / |      / \
        /  |     /   \
       /   |    /     \
L:    A    B   C       D
     /\   /\  / \     / \
    ...  ... ... ... ... ...


CASE E: Already populated Tree Unbalanced
=========================================
- Need to fill M1 and M2, and then will be able to use CASE D
	- Search for M1 & M2 in the inputed Keys
	- Add M1 & M2 to the Tree
	- From here can use CASE D

              R
             /  \
            /    \
           /      \
          *        *
           |        \
           |         \
           |          \
L:    M1   *   M2      *        (where M1 and M2 are empty)
          / |         /
         /  |        /
        /   |       /
       A    *      *
           / \     | \
          /   \    |  \
         /     \   |   \
        B      *   *   C
              / \  |\
           ... ... | \
                   |  \
                   D  E


Algorithm decision
==================
- if nLeafs==0 (root==0): CASE A
- if nLeafs<nBuckets: CASE B
- if nLeafs>=nBuckets && nLeafs < minLeafsThreshold: CASE C
- else: CASE D & CASE E


- Multiple tree.Add calls: O(n log n)
	- Used in: cases A, B, C
- Tree from bottom to top: O(log n)
	- Used in: cases D, E

*/

// AddBatchOpt is the WIP implementation of the AddBatch method in a more
// optimized approach.
func (t *Tree) AddBatchOpt(keys, values [][]byte) ([]int, error) {
	t.updateAccessTime()
	t.Lock()
	defer t.Unlock()

	// TODO if len(keys) is not a power of 2, add padding of empty
	// keys&values. Maybe when len(keyvalues) is not a power of 2, cut at
	// the biggest power of 2 under the len(keys), add those 2**n key-values
	// using the AddBatch approach, and then add the remaining key-values
	// using tree.Add.

	kvs, err := t.keysValuesToKvs(keys, values)
	if err != nil {
		return nil, err
	}

	t.tx, err = t.db.NewTx()
	if err != nil {
		return nil, err
	}

	// if nLeafs==0 (root==0): CASE A
	e := make([]byte, t.hashFunction.Len())
	if bytes.Equal(t.root, e) {
		// CASE A
		// sort keys & values by path
		sortKvs(kvs)
		return t.buildTreeBottomUp(kvs)
	}

	return nil, fmt.Errorf("UNIMPLEMENTED")
}

type kv struct {
	pos     int // original position in the array
	keyPath []byte
	k       []byte
	v       []byte
}

// compareBytes compares byte slices where the bytes are compared from left to
// right and each byte is compared by bit from right to left
func compareBytes(a, b []byte) bool {
	// WIP
	for i := 0; i < len(a); i++ {
		for j := 0; j < 8; j++ {
			aBit := a[i] & (1 << j)
			bBit := b[i] & (1 << j)
			if aBit > bBit {
				return false
			} else if aBit < bBit {
				return true
			}
		}
	}
	return false
}

// sortKvs sorts the kv by path
func sortKvs(kvs []kv) {
	sort.Slice(kvs, func(i, j int) bool {
		return compareBytes(kvs[i].keyPath, kvs[j].keyPath)
	})
}

func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) {
	if len(ks) != len(vs) {
		return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)",
			len(ks), len(vs))
	}
	kvs := make([]kv, len(ks))
	for i := 0; i < len(ks); i++ {
		keyPath := make([]byte, t.hashFunction.Len())
		copy(keyPath[:], ks[i])
		kvs[i].pos = i
		kvs[i].keyPath = ks[i]
		kvs[i].k = ks[i]
		kvs[i].v = vs[i]
	}

	return kvs, nil
}

// keys & values must be sorted by path, and must be length multiple of 2
// TODO return index of failed keyvaules
func (t *Tree) buildTreeBottomUp(kvs []kv) ([]int, error) {
	// build the leafs
	leafKeys := make([][]byte, len(kvs))
	for i := 0; i < len(kvs); i++ {
		// TODO handle the case where Key&Value == 0
		leafKey, leafValue, err := newLeafValue(t.hashFunction, kvs[i].k, kvs[i].v)
		if err != nil {
			return nil, err
		}
		// store leafKey & leafValue to db
		if err := t.tx.Put(leafKey, leafValue); err != nil {
			return nil, err
		}
		leafKeys[i] = leafKey
	}
	r, err := t.upFromKeys(leafKeys)
	if err != nil {
		return nil, err
	}
	t.root = r
	return nil, nil
}

func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) {
	if len(ks) == 1 {
		return ks[0], nil
	}

	var rKs [][]byte
	for i := 0; i < len(ks); i += 2 {
		// TODO handle the case where Key&Value == 0
		k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1])
		if err != nil {
			return nil, err
		}
		// store k-v to db
		if err = t.tx.Put(k, v); err != nil {
			return nil, err
		}
		rKs = append(rKs, k)
	}
	return t.upFromKeys(rKs)
}