You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

256 lines
5.8 KiB

  1. package arbo
  2. import (
  3. "bytes"
  4. "fmt"
  5. "sort"
  6. )
  7. /*
  8. AddBatch design
  9. ===============
  10. CASE A: Empty Tree --> if tree is empty (root==0)
  11. =================================================
  12. - Build the full tree from bottom to top (from all the leaf to the root)
  13. CASE B: ALMOST CASE A, Almost empty Tree --> if Tree has numLeafs < numBuckets
  14. ==============================================================================
  15. - Get the Leafs (key & value) (iterate the tree from the current root getting
  16. the leafs)
  17. - Create a new empty Tree
  18. - Do CASE A for the new Tree, giving the already existing key&values (leafs)
  19. from the original Tree + the new key&values to be added from the AddBatch call
  20. R
  21. / \
  22. A *
  23. / \
  24. B C
  25. CASE C: ALMOST CASE B --> if Tree has few Leafs (but numLeafs>=numBuckets)
  26. ==============================================================================
  27. - Use A, B, G, F as Roots of subtrees
  28. - Do CASE B for each subtree
  29. - Then go from L to the Root
  30. R
  31. / \
  32. / \
  33. / \
  34. * *
  35. / | / \
  36. / | / \
  37. / | / \
  38. L: A B G D
  39. / \
  40. / \
  41. / \
  42. C *
  43. / \
  44. / \
  45. / \
  46. D E
  47. CASE D: Already populated Tree
  48. ==============================
  49. - Use A, B, C, D as subtree
  50. - Sort the Keys in Buckets that share the initial part of the path
  51. - For each subtree add there the new leafs
  52. R
  53. / \
  54. / \
  55. / \
  56. * *
  57. / | / \
  58. / | / \
  59. / | / \
  60. L: A B C D
  61. /\ /\ / \ / \
  62. ... ... ... ... ... ...
  63. CASE E: Already populated Tree Unbalanced
  64. =========================================
  65. - Need to fill M1 and M2, and then will be able to use CASE D
  66. - Search for M1 & M2 in the inputed Keys
  67. - Add M1 & M2 to the Tree
  68. - From here can use CASE D
  69. R
  70. / \
  71. / \
  72. / \
  73. * *
  74. | \
  75. | \
  76. | \
  77. L: M1 * M2 * (where M1 and M2 are empty)
  78. / | /
  79. / | /
  80. / | /
  81. A * *
  82. / \ | \
  83. / \ | \
  84. / \ | \
  85. B * * C
  86. / \ |\
  87. ... ... | \
  88. | \
  89. D E
  90. Algorithm decision
  91. ==================
  92. - if nLeafs==0 (root==0): CASE A
  93. - if nLeafs<nBuckets: CASE B
  94. - if nLeafs>=nBuckets && nLeafs < minLeafsThreshold: CASE C
  95. - else: CASE D & CASE E
  96. - Multiple tree.Add calls: O(n log n)
  97. - Used in: cases A, B, C
  98. - Tree from bottom to top: O(log n)
  99. - Used in: cases D, E
  100. */
  101. // AddBatchOpt is the WIP implementation of the AddBatch method in a more
  102. // optimized approach.
  103. func (t *Tree) AddBatchOpt(keys, values [][]byte) ([]int, error) {
  104. t.updateAccessTime()
  105. t.Lock()
  106. defer t.Unlock()
  107. // TODO if len(keys) is not a power of 2, add padding of empty
  108. // keys&values. Maybe when len(keyvalues) is not a power of 2, cut at
  109. // the biggest power of 2 under the len(keys), add those 2**n key-values
  110. // using the AddBatch approach, and then add the remaining key-values
  111. // using tree.Add.
  112. kvs, err := t.keysValuesToKvs(keys, values)
  113. if err != nil {
  114. return nil, err
  115. }
  116. t.tx, err = t.db.NewTx()
  117. if err != nil {
  118. return nil, err
  119. }
  120. // if nLeafs==0 (root==0): CASE A
  121. e := make([]byte, t.hashFunction.Len())
  122. if bytes.Equal(t.root, e) {
  123. // CASE A
  124. // sort keys & values by path
  125. sortKvs(kvs)
  126. return t.buildTreeBottomUp(kvs)
  127. }
  128. return nil, fmt.Errorf("UNIMPLEMENTED")
  129. }
  130. type kv struct {
  131. pos int // original position in the array
  132. keyPath []byte
  133. k []byte
  134. v []byte
  135. }
  136. // compareBytes compares byte slices where the bytes are compared from left to
  137. // right and each byte is compared by bit from right to left
  138. func compareBytes(a, b []byte) bool {
  139. // WIP
  140. for i := 0; i < len(a); i++ {
  141. for j := 0; j < 8; j++ {
  142. aBit := a[i] & (1 << j)
  143. bBit := b[i] & (1 << j)
  144. if aBit > bBit {
  145. return false
  146. } else if aBit < bBit {
  147. return true
  148. }
  149. }
  150. }
  151. return false
  152. }
  153. // sortKvs sorts the kv by path
  154. func sortKvs(kvs []kv) {
  155. sort.Slice(kvs, func(i, j int) bool {
  156. return compareBytes(kvs[i].keyPath, kvs[j].keyPath)
  157. })
  158. }
  159. func (t *Tree) keysValuesToKvs(ks, vs [][]byte) ([]kv, error) {
  160. if len(ks) != len(vs) {
  161. return nil, fmt.Errorf("len(keys)!=len(values) (%d!=%d)",
  162. len(ks), len(vs))
  163. }
  164. kvs := make([]kv, len(ks))
  165. for i := 0; i < len(ks); i++ {
  166. keyPath := make([]byte, t.hashFunction.Len())
  167. copy(keyPath[:], ks[i])
  168. kvs[i].pos = i
  169. kvs[i].keyPath = ks[i]
  170. kvs[i].k = ks[i]
  171. kvs[i].v = vs[i]
  172. }
  173. return kvs, nil
  174. }
  175. // keys & values must be sorted by path, and must be length multiple of 2
  176. // TODO return index of failed keyvaules
  177. func (t *Tree) buildTreeBottomUp(kvs []kv) ([]int, error) {
  178. // build the leafs
  179. leafKeys := make([][]byte, len(kvs))
  180. for i := 0; i < len(kvs); i++ {
  181. // TODO handle the case where Key&Value == 0
  182. leafKey, leafValue, err := newLeafValue(t.hashFunction, kvs[i].k, kvs[i].v)
  183. if err != nil {
  184. return nil, err
  185. }
  186. // store leafKey & leafValue to db
  187. if err := t.tx.Put(leafKey, leafValue); err != nil {
  188. return nil, err
  189. }
  190. leafKeys[i] = leafKey
  191. }
  192. r, err := t.upFromKeys(leafKeys)
  193. if err != nil {
  194. return nil, err
  195. }
  196. t.root = r
  197. return nil, nil
  198. }
  199. func (t *Tree) upFromKeys(ks [][]byte) ([]byte, error) {
  200. if len(ks) == 1 {
  201. return ks[0], nil
  202. }
  203. var rKs [][]byte
  204. for i := 0; i < len(ks); i += 2 {
  205. // TODO handle the case where Key&Value == 0
  206. k, v, err := newIntermediate(t.hashFunction, ks[i], ks[i+1])
  207. if err != nil {
  208. return nil, err
  209. }
  210. // store k-v to db
  211. if err = t.tx.Put(k, v); err != nil {
  212. return nil, err
  213. }
  214. rKs = append(rKs, k)
  215. }
  216. return t.upFromKeys(rKs)
  217. }