| @ -1,821 +0,0 @@ | |||||
| package arbo |  | ||||
| 
 |  | ||||
| import ( |  | ||||
| 	"bytes" |  | ||||
| 	"fmt" |  | ||||
| 	"math" |  | ||||
| 	"runtime" |  | ||||
| 	"sort" |  | ||||
| 	"sync" |  | ||||
| 
 |  | ||||
| 	"github.com/iden3/go-merkletree/db" |  | ||||
| ) |  | ||||
| 
 |  | ||||
| /* |  | ||||
| 
 |  | ||||
| AddBatch design |  | ||||
| =============== |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| CASE A: Empty Tree --> if tree is empty (root==0) |  | ||||
| ================================================= |  | ||||
| - Build the full tree from bottom to top (from all the leaf to the root) |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < minLeafsThreshold |  | ||||
| ============================================================================== |  | ||||
| - Get the Leafs (key & value) (iterate the tree from the current root getting |  | ||||
| the leafs) |  | ||||
| - Create a new empty Tree |  | ||||
| - Do CASE A for the new Tree, giving the already existing key&values (leafs) |  | ||||
| from the original Tree + the new key&values to be added from the AddBatch call |  | ||||
| 
 |  | ||||
| 
 |  | ||||
|        R                 R |  | ||||
|       / \               /  \ |  | ||||
|      A   *             /    \ |  | ||||
|         / \           /      \ |  | ||||
|        B   C         *        * |  | ||||
|                     / |      / \ |  | ||||
|                    /  |     /   \ |  | ||||
|                   /   |    /     \ |  | ||||
|            L:    A    B   G       D |  | ||||
|                          / \ |  | ||||
|                         /   \ |  | ||||
|                        /     \ |  | ||||
|                       C      * |  | ||||
|                             / \ |  | ||||
|                            /   \ |  | ||||
|                           /     \ |  | ||||
|                          ...     ... (nLeafs < minLeafsThreshold) |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=minLeafsThreshold) |  | ||||
| ============================================================================== |  | ||||
| - Use A, B, G, F as Roots of subtrees |  | ||||
| - Do CASE B for each subtree |  | ||||
| - Then go from L to the Root |  | ||||
| 
 |  | ||||
|               R |  | ||||
|              /  \ |  | ||||
|             /    \ |  | ||||
|            /      \ |  | ||||
|           *        * |  | ||||
|          / |      / \ |  | ||||
|         /  |     /   \ |  | ||||
|        /   |    /     \ |  | ||||
| L:    A    B   G       D |  | ||||
|               / \ |  | ||||
|              /   \ |  | ||||
|             /     \ |  | ||||
|            C      * |  | ||||
|                  / \ |  | ||||
|                 /   \ |  | ||||
|                /     \ |  | ||||
|               ...    ... (nLeafs >= minLeafsThreshold) |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| CASE D: Already populated Tree |  | ||||
| ============================== |  | ||||
| - Use A, B, C, D as subtree |  | ||||
| - Sort the Keys in Buckets that share the initial part of the path |  | ||||
| - For each subtree add there the new leafs |  | ||||
| 
 |  | ||||
|               R |  | ||||
|              /  \ |  | ||||
|             /    \ |  | ||||
|            /      \ |  | ||||
|           *        * |  | ||||
|          / |      / \ |  | ||||
|         /  |     /   \ |  | ||||
|        /   |    /     \ |  | ||||
| L:    A    B   C       D |  | ||||
|      /\   /\  / \     / \ |  | ||||
|     ...  ... ... ... ... ... |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| CASE E: Already populated Tree Unbalanced |  | ||||
| ========================================= |  | ||||
| - Need to fill M1 and M2, and then will be able to use CASE D |  | ||||
| 	- Search for M1 & M2 in the inputed Keys |  | ||||
| 	- Add M1 & M2 to the Tree |  | ||||
| 	- From here can use CASE D |  | ||||
| 
 |  | ||||
|               R |  | ||||
|              /  \ |  | ||||
|             /    \ |  | ||||
|            /      \ |  | ||||
|           *        * |  | ||||
|            |        \ |  | ||||
|            |         \ |  | ||||
|            |          \ |  | ||||
| L:    M1   *   M2      *        (where M1 and M2 are empty) |  | ||||
|           / |         / |  | ||||
|          /  |        / |  | ||||
|         /   |       / |  | ||||
|        A    *      * |  | ||||
|            / \     | \ |  | ||||
|           /   \    |  \ |  | ||||
|          /     \   |   \ |  | ||||
|         B      *   *   C |  | ||||
|               / \  |\ |  | ||||
|            ... ... | \ |  | ||||
|                    |  \ |  | ||||
|                    D  E |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| Algorithm decision |  | ||||
| ================== |  | ||||
| - if nLeafs==0 (root==0): CASE A |  | ||||
| - if nLeafs<minLeafsThreshold: CASE B |  | ||||
| - if nLeafs>=minLeafsThreshold && (nLeafs/nBuckets) < minLeafsThreshold: CASE C |  | ||||
| - else: CASE D & CASE E |  | ||||
| 
 |  | ||||
| 
 |  | ||||
| - Multiple tree.Add calls: O(n log n) |  | ||||
| 	- Used in: cases A, B, C |  | ||||
| - Tree from bottom to top: O(log n) |  | ||||
| 	- Used in: cases D, E |  | ||||
| 
 |  | ||||
| */ |  | ||||
| 
 |  | ||||
| const ( |  | ||||
| 	minLeafsThreshold = 100 // nolint:gomnd // TMP WIP this will be autocalculated
 |  | ||||
| ) |  | ||||
| 
 |  | ||||
| // AddBatch adds a batch of key-values to the Tree. Returns an array containing
 |  | ||||
| // the indexes of the keys failed to add.
 |  | ||||
| func (t *Tree) AddBatch(keys, values [][]byte) ([]int, error) { |  | ||||
| 	t.updateAccessTime() |  | ||||
| 	t.Lock() |  | ||||
| 	defer t.Unlock() |  | ||||
| 
 |  | ||||
| 	vt, err := t.loadVT() |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	invalids, err := vt.addBatch(keys, values) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	pairs, err := vt.computeHashes() |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	t.root = vt.root.h |  | ||||
| 
 |  | ||||
| 	// store pairs in db
 |  | ||||
| 	t.tx, err = t.db.NewTx() |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	for i := 0; i < len(pairs); i++ { |  | ||||
| 		if err := t.dbPut(pairs[i][0], pairs[i][1]); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	return t.finalizeAddBatch(len(keys), invalids) |  | ||||
| } |  | ||||
| 
 |  | ||||
| // AddBatchOLD adds a batch of key-values to the Tree. Returns an array containing
 |  | ||||
| // the indexes of the keys failed to add.
 |  | ||||
| func (t *Tree) AddBatchOLD(keys, values [][]byte) ([]int, error) { |  | ||||
| 	// TODO: support vaules=nil
 |  | ||||
| 	t.updateAccessTime() |  | ||||
| 	t.Lock() |  | ||||
| 	defer t.Unlock() |  | ||||
| 
 |  | ||||
| 	kvs, err := t.keysValuesToKvs(keys, values) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	t.tx, err = t.db.NewTx() |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// if nCPU is not a power of two, cut at the highest power of two under
 |  | ||||
| 	// nCPU
 |  | ||||
| 	nCPU := flp2(runtime.NumCPU()) |  | ||||
| 	l := int(math.Log2(float64(nCPU))) |  | ||||
| 	var invalids []int |  | ||||
| 
 |  | ||||
| 	// CASE A: if nLeafs==0 (root==0)
 |  | ||||
| 	if bytes.Equal(t.root, t.emptyHash) { |  | ||||
| 		invalids, err = t.caseA(nCPU, kvs) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 
 |  | ||||
| 		return t.finalizeAddBatch(len(keys), invalids) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// CASE B: if nLeafs<nBuckets
 |  | ||||
| 	nLeafs, err := t.GetNLeafs() |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	if nLeafs < minLeafsThreshold { // CASE B
 |  | ||||
| 		invalids, err = t.caseB(nCPU, 0, kvs) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 
 |  | ||||
| 		return t.finalizeAddBatch(len(keys), invalids) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	keysAtL, err := t.getKeysAtLevel(l + 1) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// CASE C: if nLeafs>=minLeafsThreshold && (nLeafs/nBuckets) < minLeafsThreshold
 |  | ||||
| 	// available parallelization, will need to be a power of 2 (2**n)
 |  | ||||
| 	if nLeafs >= minLeafsThreshold && |  | ||||
| 		(nLeafs/nCPU) < minLeafsThreshold && |  | ||||
| 		len(keysAtL) == nCPU { |  | ||||
| 		invalids, err = t.caseC(nCPU, l, keysAtL, kvs) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 
 |  | ||||
| 		return t.finalizeAddBatch(len(keys), invalids) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// CASE E
 |  | ||||
| 	if len(keysAtL) != nCPU { |  | ||||
| 		// CASE E: add one key at each bucket, and then do CASE D
 |  | ||||
| 		buckets := splitInBuckets(kvs, nCPU) |  | ||||
| 		kvs = []kv{} |  | ||||
| 		for i := 0; i < len(buckets); i++ { |  | ||||
| 			// add one leaf of the bucket, if there is an error when
 |  | ||||
| 			// adding the k-v, try to add the next one of the bucket
 |  | ||||
| 			// (until one is added)
 |  | ||||
| 			var inserted int |  | ||||
| 			for j := 0; j < len(buckets[i]); j++ { |  | ||||
| 				if err := t.add(0, buckets[i][j].k, buckets[i][j].v); err == nil { |  | ||||
| 					inserted = j |  | ||||
| 					break |  | ||||
| 				} |  | ||||
| 			} |  | ||||
| 
 |  | ||||
| 			// put the buckets elements except the inserted one
 |  | ||||
| 			kvs = append(kvs, buckets[i][:inserted]...) |  | ||||
| 			kvs = append(kvs, buckets[i][inserted+1:]...) |  | ||||
| 		} |  | ||||
| 		keysAtL, err = t.getKeysAtLevel(l + 1) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// CASE D
 |  | ||||
| 	if len(keysAtL) == nCPU { // enter in CASE D if len(keysAtL)=nCPU, if not, CASE E
 |  | ||||
| 		invalidsCaseD, err := t.caseD(nCPU, l, keysAtL, kvs) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 		invalids = append(invalids, invalidsCaseD...) |  | ||||
| 
 |  | ||||
| 		return t.finalizeAddBatch(len(keys), invalids) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	return nil, fmt.Errorf("UNIMPLEMENTED") |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) finalizeAddBatch(nKeys int, invalids []int) ([]int, error) { |  | ||||
| 	// store root to db
 |  | ||||
| 	if err := t.dbPut(dbKeyRoot, t.root); err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// update nLeafs
 |  | ||||
| 	if err := t.incNLeafs(nKeys - len(invalids)); err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// commit db tx
 |  | ||||
| 	if err := t.tx.Commit(); err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	return invalids, nil |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) caseA(nCPU int, kvs []kv) ([]int, error) { |  | ||||
| 	invalids, err := t.buildTreeFromLeafs(nCPU, kvs) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	return invalids, nil |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) caseB(nCPU, l int, kvs []kv) ([]int, error) { |  | ||||
| 	// get already existing keys
 |  | ||||
| 	aKs, aVs, err := t.getLeafs(t.root) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	aKvs, err := t.keysValuesToKvs(aKs, aVs) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	// add already existing key-values to the inputted key-values
 |  | ||||
| 	// kvs = append(kvs, aKvs...)
 |  | ||||
| 	kvs, invalids := combineInKVSet(aKvs, kvs) |  | ||||
| 
 |  | ||||
| 	// proceed with CASE A
 |  | ||||
| 	sortKvs(kvs) |  | ||||
| 
 |  | ||||
| 	var invalids2 []int |  | ||||
| 	if nCPU > 1 { |  | ||||
| 		invalids2, err = t.buildTreeFromLeafs(nCPU, kvs) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} else { |  | ||||
| 		invalids2, err = t.buildTreeFromLeafsSingleThread(l, kvs) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	invalids = append(invalids, invalids2...) |  | ||||
| 
 |  | ||||
| 	return invalids, nil |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) caseC(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { |  | ||||
| 	// 1. go down until level L (L=log2(nBuckets)): keysAtL
 |  | ||||
| 
 |  | ||||
| 	var excedents []kv |  | ||||
| 	buckets := splitInBuckets(kvs, nCPU) |  | ||||
| 
 |  | ||||
| 	// 2. use keys at level L as roots of the subtrees under each one
 |  | ||||
| 	subRoots := make([][]byte, nCPU) |  | ||||
| 	dbgStatsPerBucket := make([]*dbgStats, nCPU) |  | ||||
| 	txs := make([]db.Tx, nCPU) |  | ||||
| 	var wg sync.WaitGroup |  | ||||
| 	wg.Add(nCPU) |  | ||||
| 	for i := 0; i < nCPU; i++ { |  | ||||
| 		go func(cpu int) { |  | ||||
| 			var err error |  | ||||
| 			txs[cpu], err = t.db.NewTx() |  | ||||
| 			if err != nil { |  | ||||
| 				panic(err) // TODO WIP
 |  | ||||
| 			} |  | ||||
| 			if err := txs[cpu].Add(t.tx); err != nil { |  | ||||
| 				panic(err) // TODO
 |  | ||||
| 			} |  | ||||
| 			bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels, |  | ||||
| 				hashFunction: t.hashFunction, root: keysAtL[cpu], |  | ||||
| 				emptyHash: t.emptyHash, dbg: newDbgStats()} |  | ||||
| 
 |  | ||||
| 			// 3. do CASE B (with 1 cpu) for each key at level L
 |  | ||||
| 			_, err = bucketTree.caseB(1, l, buckets[cpu]) // TODO handle invalids
 |  | ||||
| 			if err != nil { |  | ||||
| 				panic(err) // TODO WIP
 |  | ||||
| 				// return nil, err
 |  | ||||
| 			} |  | ||||
| 			subRoots[cpu] = bucketTree.root |  | ||||
| 			dbgStatsPerBucket[cpu] = bucketTree.dbg |  | ||||
| 			wg.Done() |  | ||||
| 		}(i) |  | ||||
| 	} |  | ||||
| 	wg.Wait() |  | ||||
| 
 |  | ||||
| 	// merge buckets txs into Tree.tx
 |  | ||||
| 	for i := 0; i < len(txs); i++ { |  | ||||
| 		if err := t.tx.Add(txs[i]); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// 4. go upFromKeys from the new roots of the subtrees
 |  | ||||
| 	newRoot, err := t.upFromKeys(subRoots) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	t.root = newRoot |  | ||||
| 
 |  | ||||
| 	// add the key-values that have not been used yet
 |  | ||||
| 	var invalids []int |  | ||||
| 	for i := 0; i < len(excedents); i++ { |  | ||||
| 		if err = t.add(0, excedents[i].k, excedents[i].v); err != nil { |  | ||||
| 			invalids = append(invalids, excedents[i].pos) |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	for i := 0; i < len(dbgStatsPerBucket); i++ { |  | ||||
| 		t.dbg.add(dbgStatsPerBucket[i]) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	return invalids, nil |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) caseD(nCPU, l int, keysAtL [][]byte, kvs []kv) ([]int, error) { |  | ||||
| 	if nCPU == 1 { // CASE D, but with 1 cpu
 |  | ||||
| 		var invalids []int |  | ||||
| 		for i := 0; i < len(kvs); i++ { |  | ||||
| 			if err := t.add(0, kvs[i].k, kvs[i].v); err != nil { |  | ||||
| 				invalids = append(invalids, kvs[i].pos) |  | ||||
| 			} |  | ||||
| 		} |  | ||||
| 		return invalids, nil |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	buckets := splitInBuckets(kvs, nCPU) |  | ||||
| 
 |  | ||||
| 	subRoots := make([][]byte, nCPU) |  | ||||
| 	invalidsInBucket := make([][]int, nCPU) |  | ||||
| 	dbgStatsPerBucket := make([]*dbgStats, nCPU) |  | ||||
| 	txs := make([]db.Tx, nCPU) |  | ||||
| 
 |  | ||||
| 	var wg sync.WaitGroup |  | ||||
| 	wg.Add(nCPU) |  | ||||
| 	for i := 0; i < nCPU; i++ { |  | ||||
| 		go func(cpu int) { |  | ||||
| 			var err error |  | ||||
| 			txs[cpu], err = t.db.NewTx() |  | ||||
| 			if err != nil { |  | ||||
| 				panic(err) // TODO WIP
 |  | ||||
| 			} |  | ||||
| 			// put already existing tx into txs[cpu], as txs[cpu]
 |  | ||||
| 			// needs the pending key-values that are not in tree.db,
 |  | ||||
| 			// but are in tree.tx
 |  | ||||
| 			if err := txs[cpu].Add(t.tx); err != nil { |  | ||||
| 				panic(err) // TODO WIP
 |  | ||||
| 			} |  | ||||
| 
 |  | ||||
| 			bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels - l, |  | ||||
| 				hashFunction: t.hashFunction, root: keysAtL[cpu], |  | ||||
| 				emptyHash: t.emptyHash, dbg: newDbgStats()} // TODO bucketTree.dbg should be optional
 |  | ||||
| 
 |  | ||||
| 			for j := 0; j < len(buckets[cpu]); j++ { |  | ||||
| 				if err = bucketTree.add(l, buckets[cpu][j].k, buckets[cpu][j].v); err != nil { |  | ||||
| 					invalidsInBucket[cpu] = append(invalidsInBucket[cpu], buckets[cpu][j].pos) |  | ||||
| 				} |  | ||||
| 			} |  | ||||
| 			subRoots[cpu] = bucketTree.root |  | ||||
| 			dbgStatsPerBucket[cpu] = bucketTree.dbg |  | ||||
| 			wg.Done() |  | ||||
| 		}(i) |  | ||||
| 	} |  | ||||
| 	wg.Wait() |  | ||||
| 
 |  | ||||
| 	// merge buckets txs into Tree.tx
 |  | ||||
| 	for i := 0; i < len(txs); i++ { |  | ||||
| 		if err := t.tx.Add(txs[i]); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	newRoot, err := t.upFromKeys(subRoots) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	t.root = newRoot |  | ||||
| 
 |  | ||||
| 	var invalids []int |  | ||||
| 	for i := 0; i < len(invalidsInBucket); i++ { |  | ||||
| 		invalids = append(invalids, invalidsInBucket[i]...) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	for i := 0; i < len(dbgStatsPerBucket); i++ { |  | ||||
| 		t.dbg.add(dbgStatsPerBucket[i]) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	return invalids, nil |  | ||||
| } |  | ||||
| 
 |  | ||||
| func splitInBuckets(kvs []kv, nBuckets int) [][]kv { |  | ||||
| 	buckets := make([][]kv, nBuckets) |  | ||||
| 	// 1. classify the keyvalues into buckets
 |  | ||||
| 	for i := 0; i < len(kvs); i++ { |  | ||||
| 		pair := kvs[i] |  | ||||
| 
 |  | ||||
| 		// bucketnum := keyToBucket(pair.k, nBuckets)
 |  | ||||
| 		bucketnum := keyToBucket(pair.keyPath, nBuckets) |  | ||||
| 		buckets[bucketnum] = append(buckets[bucketnum], pair) |  | ||||
| 	} |  | ||||
| 	return buckets |  | ||||
| } |  | ||||
| 
 |  | ||||
| // TODO rename in a more 'real' name (calculate bucket from/for key)
 |  | ||||
| func keyToBucket(k []byte, nBuckets int) int { |  | ||||
| 	nLevels := int(math.Log2(float64(nBuckets))) |  | ||||
| 	b := make([]int, nBuckets) |  | ||||
| 	for i := 0; i < nBuckets; i++ { |  | ||||
| 		b[i] = i |  | ||||
| 	} |  | ||||
| 	r := b |  | ||||
| 	mid := len(r) / 2 //nolint:gomnd
 |  | ||||
| 	for i := 0; i < nLevels; i++ { |  | ||||
| 		if int(k[i/8]&(1<<(i%8))) != 0 { |  | ||||
| 			r = r[mid:] |  | ||||
| 			mid = len(r) / 2 //nolint:gomnd
 |  | ||||
| 		} else { |  | ||||
| 			r = r[:mid] |  | ||||
| 			mid = len(r) / 2 //nolint:gomnd
 |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	return r[0] |  | ||||
| } |  | ||||
| 
 |  | ||||
| type kv struct { |  | ||||
| 	pos     int // original position in the array
 |  | ||||
| 	keyPath []byte |  | ||||
| 	k       []byte |  | ||||
| 	v       []byte |  | ||||
| } |  | ||||
| 
 |  | ||||
| // compareBytes compares byte slices where the bytes are compared from left to
 |  | ||||
| // right and each byte is compared by bit from right to left
 |  | ||||
| func compareBytes(a, b []byte) bool { |  | ||||
| 	// WIP
 |  | ||||
| 	for i := 0; i < len(a); i++ { |  | ||||
| 		for j := 0; j < 8; j++ { |  | ||||
| 			aBit := a[i] & (1 << j) |  | ||||
| 			bBit := b[i] & (1 << j) |  | ||||
| 			if aBit > bBit { |  | ||||
| 				return false |  | ||||
| 			} else if aBit < bBit { |  | ||||
| 				return true |  | ||||
| 			} |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	return false |  | ||||
| } |  | ||||
| 
 |  | ||||
| // sortKvs sorts the kv by path
 |  | ||||
| func sortKvs(kvs []kv) { |  | ||||
| 	sort.Slice(kvs, func(i, j int) bool { |  | ||||
| 		return compareBytes(kvs[i].keyPath, kvs[j].keyPath) |  | ||||
| 	}) |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) { |  | ||||
| 	if len(ks) != len(vs) { |  | ||||
| 		return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)", |  | ||||
| 			len(ks), len(vs)) |  | ||||
| 	} |  | ||||
| 	kvs := make([]kv, len(ks)) |  | ||||
| 	for i := 0; i < len(ks); i++ { |  | ||||
| 		keyPath := make([]byte, t.hashFunction.Len()) |  | ||||
| 		copy(keyPath[:], ks[i]) |  | ||||
| 		kvs[i].pos = i |  | ||||
| 		kvs[i].keyPath = keyPath |  | ||||
| 		kvs[i].k = ks[i] |  | ||||
| 		kvs[i].v = vs[i] |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	return kvs, nil |  | ||||
| } |  | ||||
| 
 |  | ||||
| /* |  | ||||
| func (t *Tree) kvsToKeysValues(kvs []kv) ([][]byte, [][]byte) { |  | ||||
| 	ks := make([][]byte, len(kvs)) |  | ||||
| 	vs := make([][]byte, len(kvs)) |  | ||||
| 	for i := 0; i < len(kvs); i++ { |  | ||||
| 		ks[i] = kvs[i].k |  | ||||
| 		vs[i] = kvs[i].v |  | ||||
| 	} |  | ||||
| 	return ks, vs |  | ||||
| } |  | ||||
| */ |  | ||||
| 
 |  | ||||
| // buildTreeFromLeafs splits the key-values into n Buckets (where n is the number
 |  | ||||
| // of CPUs), in parallel builds a subtree for each bucket, once all the subtrees
 |  | ||||
| // are built, uses the subtrees roots as keys for a new tree, which as result
 |  | ||||
| // will have the complete Tree build from bottom to up, where until the
 |  | ||||
| // log2(nCPU) level it has been computed in parallel.
 |  | ||||
| func (t *Tree) buildTreeFromLeafs(nCPU int, kvs []kv) ([]int, error) { |  | ||||
| 	l := int(math.Log2(float64(nCPU))) |  | ||||
| 	buckets := splitInBuckets(kvs, nCPU) |  | ||||
| 
 |  | ||||
| 	subRoots := make([][]byte, nCPU) |  | ||||
| 	invalidsInBucket := make([][]int, nCPU) |  | ||||
| 	dbgStatsPerBucket := make([]*dbgStats, nCPU) |  | ||||
| 	txs := make([]db.Tx, nCPU) |  | ||||
| 
 |  | ||||
| 	var wg sync.WaitGroup |  | ||||
| 	wg.Add(nCPU) |  | ||||
| 	for i := 0; i < nCPU; i++ { |  | ||||
| 		go func(cpu int) { |  | ||||
| 			sortKvs(buckets[cpu]) |  | ||||
| 
 |  | ||||
| 			var err error |  | ||||
| 			txs[cpu], err = t.db.NewTx() |  | ||||
| 			if err != nil { |  | ||||
| 				panic(err) // TODO
 |  | ||||
| 			} |  | ||||
| 			if err := txs[cpu].Add(t.tx); err != nil { |  | ||||
| 				panic(err) // TODO
 |  | ||||
| 			} |  | ||||
| 			bucketTree := Tree{tx: txs[cpu], db: t.db, maxLevels: t.maxLevels, |  | ||||
| 				hashFunction: t.hashFunction, root: t.emptyHash, |  | ||||
| 				emptyHash: t.emptyHash, dbg: newDbgStats()} |  | ||||
| 
 |  | ||||
| 			currInvalids, err := bucketTree.buildTreeFromLeafsSingleThread(l, buckets[cpu]) |  | ||||
| 			if err != nil { |  | ||||
| 				panic(err) // TODO
 |  | ||||
| 			} |  | ||||
| 			invalidsInBucket[cpu] = currInvalids |  | ||||
| 			subRoots[cpu] = bucketTree.root |  | ||||
| 			dbgStatsPerBucket[cpu] = bucketTree.dbg |  | ||||
| 			wg.Done() |  | ||||
| 		}(i) |  | ||||
| 	} |  | ||||
| 	wg.Wait() |  | ||||
| 
 |  | ||||
| 	// merge buckets txs into Tree.tx
 |  | ||||
| 	for i := 0; i < len(txs); i++ { |  | ||||
| 		if err := t.tx.Add(txs[i]); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	newRoot, err := t.upFromKeys(subRoots) |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 	t.root = newRoot |  | ||||
| 
 |  | ||||
| 	var invalids []int |  | ||||
| 	for i := 0; i < len(invalidsInBucket); i++ { |  | ||||
| 		invalids = append(invalids, invalidsInBucket[i]...) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	for i := 0; i < len(dbgStatsPerBucket); i++ { |  | ||||
| 		t.dbg.add(dbgStatsPerBucket[i]) |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	return invalids, err |  | ||||
| } |  | ||||
| 
 |  | ||||
| // buildTreeFromLeafsSingleThread builds the tree with the given []kv from bottom
 |  | ||||
| // to the root
 |  | ||||
| func (t *Tree) buildTreeFromLeafsSingleThread(l int, kvsRaw []kv) ([]int, error) { |  | ||||
| 	// TODO check that log2(len(leafs)) < t.maxLevels, if not, maxLevels
 |  | ||||
| 	// would be reached and should return error
 |  | ||||
| 	if len(kvsRaw) == 0 { |  | ||||
| 		return nil, nil |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	vt := newVT(t.maxLevels, t.hashFunction) |  | ||||
| 	if t.dbg != nil { |  | ||||
| 		vt.params.dbg = newDbgStats() |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	for i := 0; i < len(kvsRaw); i++ { |  | ||||
| 		if err := vt.add(l, kvsRaw[i].k, kvsRaw[i].v); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	pairs, err := vt.computeHashes() |  | ||||
| 	if err != nil { |  | ||||
| 		return nil, err |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	// store pairs in db
 |  | ||||
| 	for i := 0; i < len(pairs); i++ { |  | ||||
| 		if err := t.dbPut(pairs[i][0], pairs[i][1]); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	t.dbg.add(vt.params.dbg) |  | ||||
| 
 |  | ||||
| 	// set tree.root from the virtual tree root
 |  | ||||
| 	t.root = vt.root.h |  | ||||
| 
 |  | ||||
| 	return nil, nil // TODO invalids
 |  | ||||
| } |  | ||||
| 
 |  | ||||
| // keys & values must be sorted by path, and the array ks must be length
 |  | ||||
| // multiple of 2
 |  | ||||
| func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) { |  | ||||
| 	if len(ks) == 1 { |  | ||||
| 		return ks[0], nil |  | ||||
| 	} |  | ||||
| 
 |  | ||||
| 	var rKs [][]byte |  | ||||
| 	for i := 0; i < len(ks); i += 2 { |  | ||||
| 		if bytes.Equal(ks[i], t.emptyHash) && bytes.Equal(ks[i+1], t.emptyHash) { |  | ||||
| 			// when both sub keys are empty, the key is also empty
 |  | ||||
| 			rKs = append(rKs, t.emptyHash) |  | ||||
| 			continue |  | ||||
| 		} |  | ||||
| 		k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1]) |  | ||||
| 		if err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 		// store k-v to db
 |  | ||||
| 		if err = t.dbPut(k, v); err != nil { |  | ||||
| 			return nil, err |  | ||||
| 		} |  | ||||
| 		rKs = append(rKs, k) |  | ||||
| 	} |  | ||||
| 	return t.upFromKeys(rKs) |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) getLeafs(root []byte) ([][]byte, [][]byte, error) { |  | ||||
| 	var ks, vs [][]byte |  | ||||
| 	err := t.iter(root, func(k, v []byte) { |  | ||||
| 		if v[0] != PrefixValueLeaf { |  | ||||
| 			return |  | ||||
| 		} |  | ||||
| 		leafK, leafV := ReadLeafValue(v) |  | ||||
| 		ks = append(ks, leafK) |  | ||||
| 		vs = append(vs, leafV) |  | ||||
| 	}) |  | ||||
| 	return ks, vs, err |  | ||||
| } |  | ||||
| 
 |  | ||||
| func (t *Tree) getKeysAtLevel(l int) ([][]byte, error) { |  | ||||
| 	var keys [][]byte |  | ||||
| 	err := t.iterWithStop(t.root, 0, func(currLvl int, k, v []byte) bool { |  | ||||
| 		if currLvl == l && !bytes.Equal(k, t.emptyHash) { |  | ||||
| 			keys = append(keys, k) |  | ||||
| 		} |  | ||||
| 		if currLvl >= l { |  | ||||
| 			return true // to stop the iter from going down
 |  | ||||
| 		} |  | ||||
| 		return false |  | ||||
| 	}) |  | ||||
| 
 |  | ||||
| 	return keys, err |  | ||||
| } |  | ||||
| 
 |  | ||||
| // flp2 computes the floor power of 2, the highest power of 2 under the given
 |  | ||||
| // value.
 |  | ||||
| func flp2(n int) int { |  | ||||
| 	res := 0 |  | ||||
| 	for i := n; i >= 1; i-- { |  | ||||
| 		if (i & (i - 1)) == 0 { |  | ||||
| 			res = i |  | ||||
| 			break |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	return res |  | ||||
| } |  | ||||
| 
 |  | ||||
| // combineInKVSet combines two kv array in one single array without repeated
 |  | ||||
| // keys.
 |  | ||||
| func combineInKVSet(base, toAdd []kv) ([]kv, []int) { |  | ||||
| 	// TODO this is a naive version, this will be implemented in a more
 |  | ||||
| 	// efficient way or through maps, or through sorted binary search
 |  | ||||
| 	r := base |  | ||||
| 	var invalids []int |  | ||||
| 	for i := 0; i < len(toAdd); i++ { |  | ||||
| 		e := false |  | ||||
| 		// check if toAdd[i] exists in the base set
 |  | ||||
| 		for j := 0; j < len(base); j++ { |  | ||||
| 			if bytes.Equal(toAdd[i].k, base[j].k) { |  | ||||
| 				e = true |  | ||||
| 			} |  | ||||
| 		} |  | ||||
| 		if !e { |  | ||||
| 			r = append(r, toAdd[i]) |  | ||||
| 		} else { |  | ||||
| 			invalids = append(invalids, toAdd[i].pos) |  | ||||
| 		} |  | ||||
| 	} |  | ||||
| 	return r, invalids |  | ||||
| } |  | ||||
| 
 |  | ||||
| // loadVT loads a new virtual tree (vt) from the current Tree, which contains
 |  | ||||
| // the same leafs.
 |  | ||||
| func (t *Tree) loadVT() (vt, error) { |  | ||||
| 	vt := newVT(t.maxLevels, t.hashFunction) |  | ||||
| 	vt.params.dbg = t.dbg |  | ||||
| 	err := t.Iterate(func(k, v []byte) { |  | ||||
| 		switch v[0] { |  | ||||
| 		case PrefixValueEmpty: |  | ||||
| 		case PrefixValueLeaf: |  | ||||
| 			leafK, leafV := ReadLeafValue(v) |  | ||||
| 			if err := vt.add(0, leafK, leafV); err != nil { |  | ||||
| 				panic(err) |  | ||||
| 			} |  | ||||
| 		case PrefixValueIntermediate: |  | ||||
| 		default: |  | ||||
| 		} |  | ||||
| 	}) |  | ||||
| 
 |  | ||||
| 	return vt, err |  | ||||
| } |  | ||||
| 
 |  | ||||
| // func computeSimpleAddCost(nLeafs int) int {
 |  | ||||
| //         // nLvls 2^nLvls
 |  | ||||
| //         nLvls := int(math.Log2(float64(nLeafs)))
 |  | ||||
| //         return nLvls * int(math.Pow(2, float64(nLvls)))
 |  | ||||
| // }
 |  | ||||
| //
 |  | ||||
| // func computeFromLeafsAddCost(nLeafs int) int {
 |  | ||||
| //         // 2^nLvls * 2 - 1
 |  | ||||
| //         nLvls := int(math.Log2(float64(nLeafs)))
 |  | ||||
| //         return (int(math.Pow(2, float64(nLvls))) * 2) - 1
 |  | ||||
| // }
 |  | ||||