In case that the tree is empty, build the full tree from bottom to top (from all the leaf to the root).master
@ -0,0 +1,256 @@ |
|||||
|
package arbo |
||||
|
|
||||
|
import ( |
||||
|
"bytes" |
||||
|
"fmt" |
||||
|
"sort" |
||||
|
) |
||||
|
|
||||
|
/* |
||||
|
|
||||
|
|
||||
|
AddBatch design |
||||
|
=============== |
||||
|
|
||||
|
|
||||
|
CASE A: Empty Tree --> if tree is empty (root==0) |
||||
|
================================================= |
||||
|
- Build the full tree from bottom to top (from all the leaf to the root) |
||||
|
|
||||
|
|
||||
|
CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < numBuckets |
||||
|
============================================================================== |
||||
|
- Get the Leafs (key & value) (iterate the tree from the current root getting |
||||
|
the leafs) |
||||
|
- Create a new empty Tree |
||||
|
- Do CASE A for the new Tree, giving the already existing key&values (leafs) |
||||
|
from the original Tree + the new key&values to be added from the AddBatch call |
||||
|
|
||||
|
R |
||||
|
/ \ |
||||
|
A * |
||||
|
/ \ |
||||
|
B C |
||||
|
|
||||
|
|
||||
|
CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=numBuckets) |
||||
|
============================================================================== |
||||
|
- Use A, B, G, F as Roots of subtrees |
||||
|
- Do CASE B for each subtree |
||||
|
- Then go from L to the Root |
||||
|
|
||||
|
R |
||||
|
/ \ |
||||
|
/ \ |
||||
|
/ \ |
||||
|
* * |
||||
|
/ | / \ |
||||
|
/ | / \ |
||||
|
/ | / \ |
||||
|
L: A B G D |
||||
|
/ \ |
||||
|
/ \ |
||||
|
/ \ |
||||
|
C * |
||||
|
/ \ |
||||
|
/ \ |
||||
|
/ \ |
||||
|
D E |
||||
|
|
||||
|
|
||||
|
|
||||
|
CASE D: Already populated Tree |
||||
|
============================== |
||||
|
- Use A, B, C, D as subtree |
||||
|
- Sort the Keys in Buckets that share the initial part of the path |
||||
|
- For each subtree add there the new leafs |
||||
|
|
||||
|
R |
||||
|
/ \ |
||||
|
/ \ |
||||
|
/ \ |
||||
|
* * |
||||
|
/ | / \ |
||||
|
/ | / \ |
||||
|
/ | / \ |
||||
|
L: A B C D |
||||
|
/\ /\ / \ / \ |
||||
|
... ... ... ... ... ... |
||||
|
|
||||
|
|
||||
|
CASE E: Already populated Tree Unbalanced |
||||
|
========================================= |
||||
|
- Need to fill M1 and M2, and then will be able to use CASE D |
||||
|
- Search for M1 & M2 in the inputed Keys |
||||
|
- Add M1 & M2 to the Tree |
||||
|
- From here can use CASE D |
||||
|
|
||||
|
R |
||||
|
/ \ |
||||
|
/ \ |
||||
|
/ \ |
||||
|
* * |
||||
|
| \ |
||||
|
| \ |
||||
|
| \ |
||||
|
L: M1 * M2 * (where M1 and M2 are empty) |
||||
|
/ | / |
||||
|
/ | / |
||||
|
/ | / |
||||
|
A * * |
||||
|
/ \ | \ |
||||
|
/ \ | \ |
||||
|
/ \ | \ |
||||
|
B * * C |
||||
|
/ \ |\ |
||||
|
... ... | \ |
||||
|
| \ |
||||
|
D E |
||||
|
|
||||
|
|
||||
|
|
||||
|
Algorithm decision |
||||
|
================== |
||||
|
- if nLeafs==0 (root==0): CASE A |
||||
|
- if nLeafs<nBuckets: CASE B |
||||
|
- if nLeafs>=nBuckets && nLeafs < minLeafsThreshold: CASE C |
||||
|
- else: CASE D & CASE E |
||||
|
|
||||
|
|
||||
|
- Multiple tree.Add calls: O(n log n) |
||||
|
- Used in: cases A, B, C |
||||
|
- Tree from bottom to top: O(log n) |
||||
|
- Used in: cases D, E |
||||
|
|
||||
|
*/ |
||||
|
|
||||
|
// AddBatchOpt is the WIP implementation of the AddBatch method in a more
|
||||
|
// optimized approach.
|
||||
|
func (t *Tree) AddBatchOpt(keys, values [][]byte) ([]int, error) { |
||||
|
t.updateAccessTime() |
||||
|
t.Lock() |
||||
|
defer t.Unlock() |
||||
|
|
||||
|
// TODO if len(keys) is not a power of 2, add padding of empty
|
||||
|
// keys&values. Maybe when len(keyvalues) is not a power of 2, cut at
|
||||
|
// the biggest power of 2 under the len(keys), add those 2**n key-values
|
||||
|
// using the AddBatch approach, and then add the remaining key-values
|
||||
|
// using tree.Add.
|
||||
|
|
||||
|
kvs, err := t.keysValuesToKvs(keys, values) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
|
||||
|
t.tx, err = t.db.NewTx() |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
|
||||
|
// if nLeafs==0 (root==0): CASE A
|
||||
|
e := make([]byte, t.hashFunction.Len()) |
||||
|
if bytes.Equal(t.root, e) { |
||||
|
// CASE A
|
||||
|
// sort keys & values by path
|
||||
|
sortKvs(kvs) |
||||
|
return t.buildTreeBottomUp(kvs) |
||||
|
} |
||||
|
|
||||
|
return nil, fmt.Errorf("UNIMPLEMENTED") |
||||
|
} |
||||
|
|
||||
|
type kv struct { |
||||
|
pos int // original position in the array
|
||||
|
keyPath []byte |
||||
|
k []byte |
||||
|
v []byte |
||||
|
} |
||||
|
|
||||
|
// compareBytes compares byte slices where the bytes are compared from left to
|
||||
|
// right and each byte is compared by bit from right to left
|
||||
|
func compareBytes(a, b []byte) bool { |
||||
|
// WIP
|
||||
|
for i := 0; i < len(a); i++ { |
||||
|
for j := 0; j < 8; j++ { |
||||
|
aBit := a[i] & (1 << j) |
||||
|
bBit := b[i] & (1 << j) |
||||
|
if aBit > bBit { |
||||
|
return false |
||||
|
} else if aBit < bBit { |
||||
|
return true |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return false |
||||
|
} |
||||
|
|
||||
|
// sortKvs sorts the kv by path
|
||||
|
func sortKvs(kvs []kv) { |
||||
|
sort.Slice(kvs, func(i, j int) bool { |
||||
|
return compareBytes(kvs[i].keyPath, kvs[j].keyPath) |
||||
|
}) |
||||
|
} |
||||
|
|
||||
|
func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) { |
||||
|
if len(ks) != len(vs) { |
||||
|
return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)", |
||||
|
len(ks), len(vs)) |
||||
|
} |
||||
|
kvs := make([]kv, len(ks)) |
||||
|
for i := 0; i < len(ks); i++ { |
||||
|
keyPath := make([]byte, t.hashFunction.Len()) |
||||
|
copy(keyPath[:], ks[i]) |
||||
|
kvs[i].pos = i |
||||
|
kvs[i].keyPath = ks[i] |
||||
|
kvs[i].k = ks[i] |
||||
|
kvs[i].v = vs[i] |
||||
|
} |
||||
|
|
||||
|
return kvs, nil |
||||
|
} |
||||
|
|
||||
|
// keys & values must be sorted by path, and must be length multiple of 2
|
||||
|
// TODO return index of failed keyvaules
|
||||
|
func (t *Tree) buildTreeBottomUp(kvs []kv) ([]int, error) { |
||||
|
// build the leafs
|
||||
|
leafKeys := make([][]byte, len(kvs)) |
||||
|
for i := 0; i < len(kvs); i++ { |
||||
|
// TODO handle the case where Key&Value == 0
|
||||
|
leafKey, leafValue, err := newLeafValue(t.hashFunction, kvs[i].k, kvs[i].v) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
// store leafKey & leafValue to db
|
||||
|
if err := t.tx.Put(leafKey, leafValue); err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
leafKeys[i] = leafKey |
||||
|
} |
||||
|
r, err := t.upFromKeys(leafKeys) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
t.root = r |
||||
|
return nil, nil |
||||
|
} |
||||
|
|
||||
|
func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) { |
||||
|
if len(ks) == 1 { |
||||
|
return ks[0], nil |
||||
|
} |
||||
|
|
||||
|
var rKs [][]byte |
||||
|
for i := 0; i < len(ks); i += 2 { |
||||
|
// TODO handle the case where Key&Value == 0
|
||||
|
k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1]) |
||||
|
if err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
// store k-v to db
|
||||
|
if err = t.tx.Put(k, v); err != nil { |
||||
|
return nil, err |
||||
|
} |
||||
|
rKs = append(rKs, k) |
||||
|
} |
||||
|
return t.upFromKeys(rKs) |
||||
|
} |
@ -0,0 +1,51 @@ |
|||||
|
package arbo |
||||
|
|
||||
|
import ( |
||||
|
"fmt" |
||||
|
"math/big" |
||||
|
"testing" |
||||
|
"time" |
||||
|
|
||||
|
qt "github.com/frankban/quicktest" |
||||
|
"github.com/iden3/go-merkletree/db/memory" |
||||
|
) |
||||
|
|
||||
|
func TestAddBatchCaseA(t *testing.T) { |
||||
|
c := qt.New(t) |
||||
|
|
||||
|
nLeafs := 1024 |
||||
|
|
||||
|
tree, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) |
||||
|
c.Assert(err, qt.IsNil) |
||||
|
defer tree.db.Close() |
||||
|
|
||||
|
start := time.Now() |
||||
|
for i := 0; i < nLeafs; i++ { |
||||
|
k := BigIntToBytes(big.NewInt(int64(i))) |
||||
|
v := BigIntToBytes(big.NewInt(int64(i * 2))) |
||||
|
if err := tree.Add(k, v); err != nil { |
||||
|
t.Fatal(err) |
||||
|
} |
||||
|
} |
||||
|
fmt.Println(time.Since(start)) |
||||
|
|
||||
|
tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) |
||||
|
c.Assert(err, qt.IsNil) |
||||
|
defer tree2.db.Close() |
||||
|
|
||||
|
var keys, values [][]byte |
||||
|
for i := 0; i < nLeafs; i++ { |
||||
|
k := BigIntToBytes(big.NewInt(int64(i))) |
||||
|
v := BigIntToBytes(big.NewInt(int64(i * 2))) |
||||
|
keys = append(keys, k) |
||||
|
values = append(values, v) |
||||
|
} |
||||
|
start = time.Now() |
||||
|
indexes, err := tree2.AddBatchOpt(keys, values) |
||||
|
c.Assert(err, qt.IsNil) |
||||
|
fmt.Println(time.Since(start)) |
||||
|
c.Check(len(indexes), qt.Equals, 0) |
||||
|
|
||||
|
// check that both trees roots are equal
|
||||
|
c.Check(tree2.Root(), qt.DeepEquals, tree.Root()) |
||||
|
} |