Implement addBatchInDisk for big trees

Implement addBatchInDisk for big trees, which does not puts the tree in
memory, and works directly over the db data, parallelizing for each CPU.
This commit is contained in:
2021-10-14 16:59:26 +02:00
parent 3f7e769974
commit 6cf1e58d9f
10 changed files with 660 additions and 105 deletions

View File

@@ -11,6 +11,7 @@ import (
qt "github.com/frankban/quicktest"
"go.vocdoni.io/dvote/db"
"go.vocdoni.io/dvote/db/badgerdb"
"go.vocdoni.io/dvote/db/pebbledb"
)
func checkRootBIString(c *qt.C, tree *Tree, expected string) {
@@ -23,11 +24,20 @@ func checkRootBIString(c *qt.C, tree *Tree, expected string) {
func TestDBTx(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
dbBadger, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
wTx := database.WriteTx()
testDBTx(c, dbBadger)
_, err = wTx.Get([]byte("a"))
dbPebble, err := pebbledb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
testDBTx(c, dbPebble)
}
func testDBTx(c *qt.C, database db.Database) {
wTx := database.WriteTx()
defer wTx.Discard()
_, err := wTx.Get([]byte("a"))
c.Assert(err, qt.Equals, db.ErrKeyNotFound)
err = wTx.Set([]byte("a"), []byte("b"))
@@ -60,7 +70,7 @@ func TestAddTestVectors(t *testing.T) {
}
func testAdd(c *qt.C, hashFunc HashFunction, testVectors []string) {
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, hashFunc)
c.Assert(err, qt.IsNil)
@@ -92,7 +102,7 @@ func testAdd(c *qt.C, hashFunc HashFunction, testVectors []string) {
func TestAddBatch(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -110,7 +120,7 @@ func TestAddBatch(t *testing.T) {
checkRootBIString(c, tree,
"296519252211642170490407814696803112091039265640052570497930797516015811235")
database, err = badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err = badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree2, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -133,7 +143,7 @@ func TestAddBatch(t *testing.T) {
func TestAddDifferentOrder(t *testing.T) {
c := qt.New(t)
database1, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database1, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree1, err := NewTree(database1, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -148,7 +158,7 @@ func TestAddDifferentOrder(t *testing.T) {
}
}
database2, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database2, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree2, err := NewTree(database2, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -173,7 +183,7 @@ func TestAddDifferentOrder(t *testing.T) {
func TestAddRepeatedIndex(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -191,7 +201,7 @@ func TestAddRepeatedIndex(t *testing.T) {
func TestUpdate(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -244,7 +254,7 @@ func TestUpdate(t *testing.T) {
func TestAux(t *testing.T) { // TODO split in proper tests
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -283,7 +293,7 @@ func TestAux(t *testing.T) { // TODO split in proper tests
func TestGet(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -307,7 +317,7 @@ func TestGet(t *testing.T) {
func TestGenProofAndVerify(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -339,7 +349,7 @@ func TestGenProofAndVerify(t *testing.T) {
func TestDumpAndImportDump(t *testing.T) {
c := qt.New(t)
database1, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database1, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree1, err := NewTree(database1, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -357,7 +367,7 @@ func TestDumpAndImportDump(t *testing.T) {
e, err := tree1.Dump(nil)
c.Assert(err, qt.IsNil)
database2, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database2, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree2, err := NewTree(database2, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -376,7 +386,7 @@ func TestDumpAndImportDump(t *testing.T) {
func TestRWMutex(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -408,7 +418,7 @@ func TestRWMutex(t *testing.T) {
// TODO UPDATE
// func TestSetGetNLeafs(t *testing.T) {
// c := qt.New(t)
// database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
// database, err := badgerdb.New(db.Options{Path: c.TempDir()})
// c.Assert(err, qt.IsNil)
// tree, err := NewTree(database, 100, HashFunctionPoseidon)
// c.Assert(err, qt.IsNil)
@@ -459,12 +469,12 @@ func TestRWMutex(t *testing.T) {
func TestAddBatchFullyUsed(t *testing.T) {
c := qt.New(t)
database1, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database1, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree1, err := NewTree(database1, 4, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
database2, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database2, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree2, err := NewTree(database2, 4, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -518,7 +528,7 @@ func TestAddBatchFullyUsed(t *testing.T) {
func TestSetRoot(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -574,7 +584,7 @@ func TestSetRoot(t *testing.T) {
func TestSnapshot(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -624,7 +634,7 @@ func TestSnapshot(t *testing.T) {
func TestGetFromSnapshotExpectArboErrKeyNotFound(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 256, HashFunctionPoseidon)
c.Assert(err, qt.IsNil)
@@ -644,7 +654,7 @@ func TestGetFromSnapshotExpectArboErrKeyNotFound(t *testing.T) {
func TestKeyLen(t *testing.T) {
c := qt.New(t)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
// maxLevels is 100, keyPath length = ceil(maxLevels/8) = 13
maxLevels := 100
@@ -678,7 +688,7 @@ func TestKeyLen(t *testing.T) {
// expect errors when adding a key bigger than maximum capacity of the
// tree: ceil(maxLevels/8)
maxLevels = 32
database, err = badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err = badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err = NewTree(database, maxLevels, HashFunctionBlake2b)
c.Assert(err, qt.IsNil)
@@ -748,7 +758,7 @@ func TestKeyLen(t *testing.T) {
func TestKeyLenBiggerThan32(t *testing.T) {
c := qt.New(t)
maxLevels := 264
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, maxLevels, HashFunctionBlake2b)
c.Assert(err, qt.IsNil)
@@ -790,7 +800,7 @@ func BenchmarkAdd(b *testing.B) {
func benchmarkAdd(b *testing.B, hashFunc HashFunction, ks, vs [][]byte) {
c := qt.New(b)
database, err := badgerdb.New(badgerdb.Options{Path: c.TempDir()})
database, err := badgerdb.New(db.Options{Path: c.TempDir()})
c.Assert(err, qt.IsNil)
tree, err := NewTree(database, 140, hashFunc)
c.Assert(err, qt.IsNil)