diff --git a/addbatch.go b/addbatch.go new file mode 100644 index 0000000..49a4426 --- /dev/null +++ b/addbatch.go @@ -0,0 +1,256 @@ +package arbo + +import ( + "bytes" + "fmt" + "sort" +) + +/* + + +AddBatch design +=============== + + +CASE A: Empty Tree --> if tree is empty (root==0) +================================================= +- Build the full tree from bottom to top (from all the leaf to the root) + + +CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < numBuckets +============================================================================== +- Get the Leafs (key & value) (iterate the tree from the current root getting +the leafs) +- Create a new empty Tree +- Do CASE A for the new Tree, giving the already existing key&values (leafs) +from the original Tree + the new key&values to be added from the AddBatch call + + R + / \ + A * + / \ + B C + + +CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=numBuckets) +============================================================================== +- Use A, B, G, F as Roots of subtrees +- Do CASE B for each subtree +- Then go from L to the Root + + R + / \ + / \ + / \ + * * + / | / \ + / | / \ + / | / \ +L: A B G D + / \ + / \ + / \ + C * + / \ + / \ + / \ + D E + + + +CASE D: Already populated Tree +============================== +- Use A, B, C, D as subtree +- Sort the Keys in Buckets that share the initial part of the path +- For each subtree add there the new leafs + + R + / \ + / \ + / \ + * * + / | / \ + / | / \ + / | / \ +L: A B C D + /\ /\ / \ / \ + ... ... ... ... ... ... + + +CASE E: Already populated Tree Unbalanced +========================================= +- Need to fill M1 and M2, and then will be able to use CASE D + - Search for M1 & M2 in the inputed Keys + - Add M1 & M2 to the Tree + - From here can use CASE D + + R + / \ + / \ + / \ + * * + | \ + | \ + | \ +L: M1 * M2 * (where M1 and M2 are empty) + / | / + / | / + / | / + A * * + / \ | \ + / \ | \ + / \ | \ + B * * C + / \ |\ + ... ... | \ + | \ + D E + + + +Algorithm decision +================== +- if nLeafs==0 (root==0): CASE A +- if nLeafs=nBuckets && nLeafs < minLeafsThreshold: CASE C +- else: CASE D & CASE E + + +- Multiple tree.Add calls: O(n log n) + - Used in: cases A, B, C +- Tree from bottom to top: O(log n) + - Used in: cases D, E + +*/ + +// AddBatchOpt is the WIP implementation of the AddBatch method in a more +// optimized approach. +func (t *Tree) AddBatchOpt(keys, values [][]byte) ([]int, error) { + t.updateAccessTime() + t.Lock() + defer t.Unlock() + + // TODO if len(keys) is not a power of 2, add padding of empty + // keys&values. Maybe when len(keyvalues) is not a power of 2, cut at + // the biggest power of 2 under the len(keys), add those 2**n key-values + // using the AddBatch approach, and then add the remaining key-values + // using tree.Add. + + kvs, err := t.keysValuesToKvs(keys, values) + if err != nil { + return nil, err + } + + t.tx, err = t.db.NewTx() + if err != nil { + return nil, err + } + + // if nLeafs==0 (root==0): CASE A + e := make([]byte, t.hashFunction.Len()) + if bytes.Equal(t.root, e) { + // CASE A + // sort keys & values by path + sortKvs(kvs) + return t.buildTreeBottomUp(kvs) + } + + return nil, fmt.Errorf("UNIMPLEMENTED") +} + +type kv struct { + pos int // original position in the array + keyPath []byte + k []byte + v []byte +} + +// compareBytes compares byte slices where the bytes are compared from left to +// right and each byte is compared by bit from right to left +func compareBytes(a, b []byte) bool { + // WIP + for i := 0; i < len(a); i++ { + for j := 0; j < 8; j++ { + aBit := a[i] & (1 << j) + bBit := b[i] & (1 << j) + if aBit > bBit { + return false + } else if aBit < bBit { + return true + } + } + } + return false +} + +// sortKvs sorts the kv by path +func sortKvs(kvs []kv) { + sort.Slice(kvs, func(i, j int) bool { + return compareBytes(kvs[i].keyPath, kvs[j].keyPath) + }) +} + +func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) { + if len(ks) != len(vs) { + return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)", + len(ks), len(vs)) + } + kvs := make([]kv, len(ks)) + for i := 0; i < len(ks); i++ { + keyPath := make([]byte, t.hashFunction.Len()) + copy(keyPath[:], ks[i]) + kvs[i].pos = i + kvs[i].keyPath = ks[i] + kvs[i].k = ks[i] + kvs[i].v = vs[i] + } + + return kvs, nil +} + +// keys & values must be sorted by path, and must be length multiple of 2 +// TODO return index of failed keyvaules +func (t *Tree) buildTreeBottomUp(kvs []kv) ([]int, error) { + // build the leafs + leafKeys := make([][]byte, len(kvs)) + for i := 0; i < len(kvs); i++ { + // TODO handle the case where Key&Value == 0 + leafKey, leafValue, err := newLeafValue(t.hashFunction, kvs[i].k, kvs[i].v) + if err != nil { + return nil, err + } + // store leafKey & leafValue to db + if err := t.tx.Put(leafKey, leafValue); err != nil { + return nil, err + } + leafKeys[i] = leafKey + } + r, err := t.upFromKeys(leafKeys) + if err != nil { + return nil, err + } + t.root = r + return nil, nil +} + +func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) { + if len(ks) == 1 { + return ks[0], nil + } + + var rKs [][]byte + for i := 0; i < len(ks); i += 2 { + // TODO handle the case where Key&Value == 0 + k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1]) + if err != nil { + return nil, err + } + // store k-v to db + if err = t.tx.Put(k, v); err != nil { + return nil, err + } + rKs = append(rKs, k) + } + return t.upFromKeys(rKs) +} diff --git a/addbatch_test.go b/addbatch_test.go new file mode 100644 index 0000000..64dfb4d --- /dev/null +++ b/addbatch_test.go @@ -0,0 +1,51 @@ +package arbo + +import ( + "fmt" + "math/big" + "testing" + "time" + + qt "github.com/frankban/quicktest" + "github.com/iden3/go-merkletree/db/memory" +) + +func TestAddBatchCaseA(t *testing.T) { + c := qt.New(t) + + nLeafs := 1024 + + tree, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) + c.Assert(err, qt.IsNil) + defer tree.db.Close() + + start := time.Now() + for i := 0; i < nLeafs; i++ { + k := BigIntToBytes(big.NewInt(int64(i))) + v := BigIntToBytes(big.NewInt(int64(i * 2))) + if err := tree.Add(k, v); err != nil { + t.Fatal(err) + } + } + fmt.Println(time.Since(start)) + + tree2, err := NewTree(memory.NewMemoryStorage(), 100, HashFunctionPoseidon) + c.Assert(err, qt.IsNil) + defer tree2.db.Close() + + var keys, values [][]byte + for i := 0; i < nLeafs; i++ { + k := BigIntToBytes(big.NewInt(int64(i))) + v := BigIntToBytes(big.NewInt(int64(i * 2))) + keys = append(keys, k) + values = append(values, v) + } + start = time.Now() + indexes, err := tree2.AddBatchOpt(keys, values) + c.Assert(err, qt.IsNil) + fmt.Println(time.Since(start)) + c.Check(len(indexes), qt.Equals, 0) + + // check that both trees roots are equal + c.Check(tree2.Root(), qt.DeepEquals, tree.Root()) +}