You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1098 lines
26 KiB

  1. // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com>
  2. // All rights reserved.
  3. //
  4. // Use of this source code is governed by a BSD-style license that can be
  5. // found in the LICENSE file.
  6. package leveldb
  7. import (
  8. "container/list"
  9. "fmt"
  10. "io"
  11. "os"
  12. "runtime"
  13. "strings"
  14. "sync"
  15. "sync/atomic"
  16. "time"
  17. "github.com/syndtr/goleveldb/leveldb/errors"
  18. "github.com/syndtr/goleveldb/leveldb/iterator"
  19. "github.com/syndtr/goleveldb/leveldb/journal"
  20. "github.com/syndtr/goleveldb/leveldb/memdb"
  21. "github.com/syndtr/goleveldb/leveldb/opt"
  22. "github.com/syndtr/goleveldb/leveldb/storage"
  23. "github.com/syndtr/goleveldb/leveldb/table"
  24. "github.com/syndtr/goleveldb/leveldb/util"
  25. )
  26. // DB is a LevelDB database.
  27. type DB struct {
  28. // Need 64-bit alignment.
  29. seq uint64
  30. // Stats. Need 64-bit alignment.
  31. cWriteDelay int64 // The cumulative duration of write delays
  32. cWriteDelayN int32 // The cumulative number of write delays
  33. aliveSnaps, aliveIters int32
  34. // Session.
  35. s *session
  36. // MemDB.
  37. memMu sync.RWMutex
  38. memPool chan *memdb.DB
  39. mem, frozenMem *memDB
  40. journal *journal.Writer
  41. journalWriter storage.Writer
  42. journalFd storage.FileDesc
  43. frozenJournalFd storage.FileDesc
  44. frozenSeq uint64
  45. // Snapshot.
  46. snapsMu sync.Mutex
  47. snapsList *list.List
  48. // Write.
  49. batchPool sync.Pool
  50. writeMergeC chan writeMerge
  51. writeMergedC chan bool
  52. writeLockC chan struct{}
  53. writeAckC chan error
  54. writeDelay time.Duration
  55. writeDelayN int
  56. tr *Transaction
  57. // Compaction.
  58. compCommitLk sync.Mutex
  59. tcompCmdC chan cCmd
  60. tcompPauseC chan chan<- struct{}
  61. mcompCmdC chan cCmd
  62. compErrC chan error
  63. compPerErrC chan error
  64. compErrSetC chan error
  65. compWriteLocking bool
  66. compStats cStats
  67. memdbMaxLevel int // For testing.
  68. // Close.
  69. closeW sync.WaitGroup
  70. closeC chan struct{}
  71. closed uint32
  72. closer io.Closer
  73. }
  74. func openDB(s *session) (*DB, error) {
  75. s.log("db@open opening")
  76. start := time.Now()
  77. db := &DB{
  78. s: s,
  79. // Initial sequence
  80. seq: s.stSeqNum,
  81. // MemDB
  82. memPool: make(chan *memdb.DB, 1),
  83. // Snapshot
  84. snapsList: list.New(),
  85. // Write
  86. batchPool: sync.Pool{New: newBatch},
  87. writeMergeC: make(chan writeMerge),
  88. writeMergedC: make(chan bool),
  89. writeLockC: make(chan struct{}, 1),
  90. writeAckC: make(chan error),
  91. // Compaction
  92. tcompCmdC: make(chan cCmd),
  93. tcompPauseC: make(chan chan<- struct{}),
  94. mcompCmdC: make(chan cCmd),
  95. compErrC: make(chan error),
  96. compPerErrC: make(chan error),
  97. compErrSetC: make(chan error),
  98. // Close
  99. closeC: make(chan struct{}),
  100. }
  101. // Read-only mode.
  102. readOnly := s.o.GetReadOnly()
  103. if readOnly {
  104. // Recover journals (read-only mode).
  105. if err := db.recoverJournalRO(); err != nil {
  106. return nil, err
  107. }
  108. } else {
  109. // Recover journals.
  110. if err := db.recoverJournal(); err != nil {
  111. return nil, err
  112. }
  113. // Remove any obsolete files.
  114. if err := db.checkAndCleanFiles(); err != nil {
  115. // Close journal.
  116. if db.journal != nil {
  117. db.journal.Close()
  118. db.journalWriter.Close()
  119. }
  120. return nil, err
  121. }
  122. }
  123. // Doesn't need to be included in the wait group.
  124. go db.compactionError()
  125. go db.mpoolDrain()
  126. if readOnly {
  127. db.SetReadOnly()
  128. } else {
  129. db.closeW.Add(2)
  130. go db.tCompaction()
  131. go db.mCompaction()
  132. // go db.jWriter()
  133. }
  134. s.logf("db@open done T·%v", time.Since(start))
  135. runtime.SetFinalizer(db, (*DB).Close)
  136. return db, nil
  137. }
  138. // Open opens or creates a DB for the given storage.
  139. // The DB will be created if not exist, unless ErrorIfMissing is true.
  140. // Also, if ErrorIfExist is true and the DB exist Open will returns
  141. // os.ErrExist error.
  142. //
  143. // Open will return an error with type of ErrCorrupted if corruption
  144. // detected in the DB. Use errors.IsCorrupted to test whether an error is
  145. // due to corruption. Corrupted DB can be recovered with Recover function.
  146. //
  147. // The returned DB instance is safe for concurrent use.
  148. // The DB must be closed after use, by calling Close method.
  149. func Open(stor storage.Storage, o *opt.Options) (db *DB, err error) {
  150. s, err := newSession(stor, o)
  151. if err != nil {
  152. return
  153. }
  154. defer func() {
  155. if err != nil {
  156. s.close()
  157. s.release()
  158. }
  159. }()
  160. err = s.recover()
  161. if err != nil {
  162. if !os.IsNotExist(err) || s.o.GetErrorIfMissing() {
  163. return
  164. }
  165. err = s.create()
  166. if err != nil {
  167. return
  168. }
  169. } else if s.o.GetErrorIfExist() {
  170. err = os.ErrExist
  171. return
  172. }
  173. return openDB(s)
  174. }
  175. // OpenFile opens or creates a DB for the given path.
  176. // The DB will be created if not exist, unless ErrorIfMissing is true.
  177. // Also, if ErrorIfExist is true and the DB exist OpenFile will returns
  178. // os.ErrExist error.
  179. //
  180. // OpenFile uses standard file-system backed storage implementation as
  181. // described in the leveldb/storage package.
  182. //
  183. // OpenFile will return an error with type of ErrCorrupted if corruption
  184. // detected in the DB. Use errors.IsCorrupted to test whether an error is
  185. // due to corruption. Corrupted DB can be recovered with Recover function.
  186. //
  187. // The returned DB instance is safe for concurrent use.
  188. // The DB must be closed after use, by calling Close method.
  189. func OpenFile(path string, o *opt.Options) (db *DB, err error) {
  190. stor, err := storage.OpenFile(path, o.GetReadOnly())
  191. if err != nil {
  192. return
  193. }
  194. db, err = Open(stor, o)
  195. if err != nil {
  196. stor.Close()
  197. } else {
  198. db.closer = stor
  199. }
  200. return
  201. }
  202. // Recover recovers and opens a DB with missing or corrupted manifest files
  203. // for the given storage. It will ignore any manifest files, valid or not.
  204. // The DB must already exist or it will returns an error.
  205. // Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
  206. //
  207. // The returned DB instance is safe for concurrent use.
  208. // The DB must be closed after use, by calling Close method.
  209. func Recover(stor storage.Storage, o *opt.Options) (db *DB, err error) {
  210. s, err := newSession(stor, o)
  211. if err != nil {
  212. return
  213. }
  214. defer func() {
  215. if err != nil {
  216. s.close()
  217. s.release()
  218. }
  219. }()
  220. err = recoverTable(s, o)
  221. if err != nil {
  222. return
  223. }
  224. return openDB(s)
  225. }
  226. // RecoverFile recovers and opens a DB with missing or corrupted manifest files
  227. // for the given path. It will ignore any manifest files, valid or not.
  228. // The DB must already exist or it will returns an error.
  229. // Also, Recover will ignore ErrorIfMissing and ErrorIfExist options.
  230. //
  231. // RecoverFile uses standard file-system backed storage implementation as described
  232. // in the leveldb/storage package.
  233. //
  234. // The returned DB instance is safe for concurrent use.
  235. // The DB must be closed after use, by calling Close method.
  236. func RecoverFile(path string, o *opt.Options) (db *DB, err error) {
  237. stor, err := storage.OpenFile(path, false)
  238. if err != nil {
  239. return
  240. }
  241. db, err = Recover(stor, o)
  242. if err != nil {
  243. stor.Close()
  244. } else {
  245. db.closer = stor
  246. }
  247. return
  248. }
  249. func recoverTable(s *session, o *opt.Options) error {
  250. o = dupOptions(o)
  251. // Mask StrictReader, lets StrictRecovery doing its job.
  252. o.Strict &= ^opt.StrictReader
  253. // Get all tables and sort it by file number.
  254. fds, err := s.stor.List(storage.TypeTable)
  255. if err != nil {
  256. return err
  257. }
  258. sortFds(fds)
  259. var (
  260. maxSeq uint64
  261. recoveredKey, goodKey, corruptedKey, corruptedBlock, droppedTable int
  262. // We will drop corrupted table.
  263. strict = o.GetStrict(opt.StrictRecovery)
  264. noSync = o.GetNoSync()
  265. rec = &sessionRecord{}
  266. bpool = util.NewBufferPool(o.GetBlockSize() + 5)
  267. )
  268. buildTable := func(iter iterator.Iterator) (tmpFd storage.FileDesc, size int64, err error) {
  269. tmpFd = s.newTemp()
  270. writer, err := s.stor.Create(tmpFd)
  271. if err != nil {
  272. return
  273. }
  274. defer func() {
  275. writer.Close()
  276. if err != nil {
  277. s.stor.Remove(tmpFd)
  278. tmpFd = storage.FileDesc{}
  279. }
  280. }()
  281. // Copy entries.
  282. tw := table.NewWriter(writer, o)
  283. for iter.Next() {
  284. key := iter.Key()
  285. if validInternalKey(key) {
  286. err = tw.Append(key, iter.Value())
  287. if err != nil {
  288. return
  289. }
  290. }
  291. }
  292. err = iter.Error()
  293. if err != nil && !errors.IsCorrupted(err) {
  294. return
  295. }
  296. err = tw.Close()
  297. if err != nil {
  298. return
  299. }
  300. if !noSync {
  301. err = writer.Sync()
  302. if err != nil {
  303. return
  304. }
  305. }
  306. size = int64(tw.BytesLen())
  307. return
  308. }
  309. recoverTable := func(fd storage.FileDesc) error {
  310. s.logf("table@recovery recovering @%d", fd.Num)
  311. reader, err := s.stor.Open(fd)
  312. if err != nil {
  313. return err
  314. }
  315. var closed bool
  316. defer func() {
  317. if !closed {
  318. reader.Close()
  319. }
  320. }()
  321. // Get file size.
  322. size, err := reader.Seek(0, 2)
  323. if err != nil {
  324. return err
  325. }
  326. var (
  327. tSeq uint64
  328. tgoodKey, tcorruptedKey, tcorruptedBlock int
  329. imin, imax []byte
  330. )
  331. tr, err := table.NewReader(reader, size, fd, nil, bpool, o)
  332. if err != nil {
  333. return err
  334. }
  335. iter := tr.NewIterator(nil, nil)
  336. if itererr, ok := iter.(iterator.ErrorCallbackSetter); ok {
  337. itererr.SetErrorCallback(func(err error) {
  338. if errors.IsCorrupted(err) {
  339. s.logf("table@recovery block corruption @%d %q", fd.Num, err)
  340. tcorruptedBlock++
  341. }
  342. })
  343. }
  344. // Scan the table.
  345. for iter.Next() {
  346. key := iter.Key()
  347. _, seq, _, kerr := parseInternalKey(key)
  348. if kerr != nil {
  349. tcorruptedKey++
  350. continue
  351. }
  352. tgoodKey++
  353. if seq > tSeq {
  354. tSeq = seq
  355. }
  356. if imin == nil {
  357. imin = append([]byte{}, key...)
  358. }
  359. imax = append(imax[:0], key...)
  360. }
  361. if err := iter.Error(); err != nil && !errors.IsCorrupted(err) {
  362. iter.Release()
  363. return err
  364. }
  365. iter.Release()
  366. goodKey += tgoodKey
  367. corruptedKey += tcorruptedKey
  368. corruptedBlock += tcorruptedBlock
  369. if strict && (tcorruptedKey > 0 || tcorruptedBlock > 0) {
  370. droppedTable++
  371. s.logf("table@recovery dropped @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
  372. return nil
  373. }
  374. if tgoodKey > 0 {
  375. if tcorruptedKey > 0 || tcorruptedBlock > 0 {
  376. // Rebuild the table.
  377. s.logf("table@recovery rebuilding @%d", fd.Num)
  378. iter := tr.NewIterator(nil, nil)
  379. tmpFd, newSize, err := buildTable(iter)
  380. iter.Release()
  381. if err != nil {
  382. return err
  383. }
  384. closed = true
  385. reader.Close()
  386. if err := s.stor.Rename(tmpFd, fd); err != nil {
  387. return err
  388. }
  389. size = newSize
  390. }
  391. if tSeq > maxSeq {
  392. maxSeq = tSeq
  393. }
  394. recoveredKey += tgoodKey
  395. // Add table to level 0.
  396. rec.addTable(0, fd.Num, size, imin, imax)
  397. s.logf("table@recovery recovered @%d Gk·%d Ck·%d Cb·%d S·%d Q·%d", fd.Num, tgoodKey, tcorruptedKey, tcorruptedBlock, size, tSeq)
  398. } else {
  399. droppedTable++
  400. s.logf("table@recovery unrecoverable @%d Ck·%d Cb·%d S·%d", fd.Num, tcorruptedKey, tcorruptedBlock, size)
  401. }
  402. return nil
  403. }
  404. // Recover all tables.
  405. if len(fds) > 0 {
  406. s.logf("table@recovery F·%d", len(fds))
  407. // Mark file number as used.
  408. s.markFileNum(fds[len(fds)-1].Num)
  409. for _, fd := range fds {
  410. if err := recoverTable(fd); err != nil {
  411. return err
  412. }
  413. }
  414. s.logf("table@recovery recovered F·%d N·%d Gk·%d Ck·%d Q·%d", len(fds), recoveredKey, goodKey, corruptedKey, maxSeq)
  415. }
  416. // Set sequence number.
  417. rec.setSeqNum(maxSeq)
  418. // Create new manifest.
  419. if err := s.create(); err != nil {
  420. return err
  421. }
  422. // Commit.
  423. return s.commit(rec)
  424. }
  425. func (db *DB) recoverJournal() error {
  426. // Get all journals and sort it by file number.
  427. rawFds, err := db.s.stor.List(storage.TypeJournal)
  428. if err != nil {
  429. return err
  430. }
  431. sortFds(rawFds)
  432. // Journals that will be recovered.
  433. var fds []storage.FileDesc
  434. for _, fd := range rawFds {
  435. if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
  436. fds = append(fds, fd)
  437. }
  438. }
  439. var (
  440. ofd storage.FileDesc // Obsolete file.
  441. rec = &sessionRecord{}
  442. )
  443. // Recover journals.
  444. if len(fds) > 0 {
  445. db.logf("journal@recovery F·%d", len(fds))
  446. // Mark file number as used.
  447. db.s.markFileNum(fds[len(fds)-1].Num)
  448. var (
  449. // Options.
  450. strict = db.s.o.GetStrict(opt.StrictJournal)
  451. checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
  452. writeBuffer = db.s.o.GetWriteBuffer()
  453. jr *journal.Reader
  454. mdb = memdb.New(db.s.icmp, writeBuffer)
  455. buf = &util.Buffer{}
  456. batchSeq uint64
  457. batchLen int
  458. )
  459. for _, fd := range fds {
  460. db.logf("journal@recovery recovering @%d", fd.Num)
  461. fr, err := db.s.stor.Open(fd)
  462. if err != nil {
  463. return err
  464. }
  465. // Create or reset journal reader instance.
  466. if jr == nil {
  467. jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
  468. } else {
  469. jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
  470. }
  471. // Flush memdb and remove obsolete journal file.
  472. if !ofd.Zero() {
  473. if mdb.Len() > 0 {
  474. if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
  475. fr.Close()
  476. return err
  477. }
  478. }
  479. rec.setJournalNum(fd.Num)
  480. rec.setSeqNum(db.seq)
  481. if err := db.s.commit(rec); err != nil {
  482. fr.Close()
  483. return err
  484. }
  485. rec.resetAddedTables()
  486. db.s.stor.Remove(ofd)
  487. ofd = storage.FileDesc{}
  488. }
  489. // Replay journal to memdb.
  490. mdb.Reset()
  491. for {
  492. r, err := jr.Next()
  493. if err != nil {
  494. if err == io.EOF {
  495. break
  496. }
  497. fr.Close()
  498. return errors.SetFd(err, fd)
  499. }
  500. buf.Reset()
  501. if _, err := buf.ReadFrom(r); err != nil {
  502. if err == io.ErrUnexpectedEOF {
  503. // This is error returned due to corruption, with strict == false.
  504. continue
  505. }
  506. fr.Close()
  507. return errors.SetFd(err, fd)
  508. }
  509. batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
  510. if err != nil {
  511. if !strict && errors.IsCorrupted(err) {
  512. db.s.logf("journal error: %v (skipped)", err)
  513. // We won't apply sequence number as it might be corrupted.
  514. continue
  515. }
  516. fr.Close()
  517. return errors.SetFd(err, fd)
  518. }
  519. // Save sequence number.
  520. db.seq = batchSeq + uint64(batchLen)
  521. // Flush it if large enough.
  522. if mdb.Size() >= writeBuffer {
  523. if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
  524. fr.Close()
  525. return err
  526. }
  527. mdb.Reset()
  528. }
  529. }
  530. fr.Close()
  531. ofd = fd
  532. }
  533. // Flush the last memdb.
  534. if mdb.Len() > 0 {
  535. if _, err := db.s.flushMemdb(rec, mdb, 0); err != nil {
  536. return err
  537. }
  538. }
  539. }
  540. // Create a new journal.
  541. if _, err := db.newMem(0); err != nil {
  542. return err
  543. }
  544. // Commit.
  545. rec.setJournalNum(db.journalFd.Num)
  546. rec.setSeqNum(db.seq)
  547. if err := db.s.commit(rec); err != nil {
  548. // Close journal on error.
  549. if db.journal != nil {
  550. db.journal.Close()
  551. db.journalWriter.Close()
  552. }
  553. return err
  554. }
  555. // Remove the last obsolete journal file.
  556. if !ofd.Zero() {
  557. db.s.stor.Remove(ofd)
  558. }
  559. return nil
  560. }
  561. func (db *DB) recoverJournalRO() error {
  562. // Get all journals and sort it by file number.
  563. rawFds, err := db.s.stor.List(storage.TypeJournal)
  564. if err != nil {
  565. return err
  566. }
  567. sortFds(rawFds)
  568. // Journals that will be recovered.
  569. var fds []storage.FileDesc
  570. for _, fd := range rawFds {
  571. if fd.Num >= db.s.stJournalNum || fd.Num == db.s.stPrevJournalNum {
  572. fds = append(fds, fd)
  573. }
  574. }
  575. var (
  576. // Options.
  577. strict = db.s.o.GetStrict(opt.StrictJournal)
  578. checksum = db.s.o.GetStrict(opt.StrictJournalChecksum)
  579. writeBuffer = db.s.o.GetWriteBuffer()
  580. mdb = memdb.New(db.s.icmp, writeBuffer)
  581. )
  582. // Recover journals.
  583. if len(fds) > 0 {
  584. db.logf("journal@recovery RO·Mode F·%d", len(fds))
  585. var (
  586. jr *journal.Reader
  587. buf = &util.Buffer{}
  588. batchSeq uint64
  589. batchLen int
  590. )
  591. for _, fd := range fds {
  592. db.logf("journal@recovery recovering @%d", fd.Num)
  593. fr, err := db.s.stor.Open(fd)
  594. if err != nil {
  595. return err
  596. }
  597. // Create or reset journal reader instance.
  598. if jr == nil {
  599. jr = journal.NewReader(fr, dropper{db.s, fd}, strict, checksum)
  600. } else {
  601. jr.Reset(fr, dropper{db.s, fd}, strict, checksum)
  602. }
  603. // Replay journal to memdb.
  604. for {
  605. r, err := jr.Next()
  606. if err != nil {
  607. if err == io.EOF {
  608. break
  609. }
  610. fr.Close()
  611. return errors.SetFd(err, fd)
  612. }
  613. buf.Reset()
  614. if _, err := buf.ReadFrom(r); err != nil {
  615. if err == io.ErrUnexpectedEOF {
  616. // This is error returned due to corruption, with strict == false.
  617. continue
  618. }
  619. fr.Close()
  620. return errors.SetFd(err, fd)
  621. }
  622. batchSeq, batchLen, err = decodeBatchToMem(buf.Bytes(), db.seq, mdb)
  623. if err != nil {
  624. if !strict && errors.IsCorrupted(err) {
  625. db.s.logf("journal error: %v (skipped)", err)
  626. // We won't apply sequence number as it might be corrupted.
  627. continue
  628. }
  629. fr.Close()
  630. return errors.SetFd(err, fd)
  631. }
  632. // Save sequence number.
  633. db.seq = batchSeq + uint64(batchLen)
  634. }
  635. fr.Close()
  636. }
  637. }
  638. // Set memDB.
  639. db.mem = &memDB{db: db, DB: mdb, ref: 1}
  640. return nil
  641. }
  642. func memGet(mdb *memdb.DB, ikey internalKey, icmp *iComparer) (ok bool, mv []byte, err error) {
  643. mk, mv, err := mdb.Find(ikey)
  644. if err == nil {
  645. ukey, _, kt, kerr := parseInternalKey(mk)
  646. if kerr != nil {
  647. // Shouldn't have had happen.
  648. panic(kerr)
  649. }
  650. if icmp.uCompare(ukey, ikey.ukey()) == 0 {
  651. if kt == keyTypeDel {
  652. return true, nil, ErrNotFound
  653. }
  654. return true, mv, nil
  655. }
  656. } else if err != ErrNotFound {
  657. return true, nil, err
  658. }
  659. return
  660. }
  661. func (db *DB) get(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (value []byte, err error) {
  662. ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
  663. if auxm != nil {
  664. if ok, mv, me := memGet(auxm, ikey, db.s.icmp); ok {
  665. return append([]byte{}, mv...), me
  666. }
  667. }
  668. em, fm := db.getMems()
  669. for _, m := range [...]*memDB{em, fm} {
  670. if m == nil {
  671. continue
  672. }
  673. defer m.decref()
  674. if ok, mv, me := memGet(m.DB, ikey, db.s.icmp); ok {
  675. return append([]byte{}, mv...), me
  676. }
  677. }
  678. v := db.s.version()
  679. value, cSched, err := v.get(auxt, ikey, ro, false)
  680. v.release()
  681. if cSched {
  682. // Trigger table compaction.
  683. db.compTrigger(db.tcompCmdC)
  684. }
  685. return
  686. }
  687. func nilIfNotFound(err error) error {
  688. if err == ErrNotFound {
  689. return nil
  690. }
  691. return err
  692. }
  693. func (db *DB) has(auxm *memdb.DB, auxt tFiles, key []byte, seq uint64, ro *opt.ReadOptions) (ret bool, err error) {
  694. ikey := makeInternalKey(nil, key, seq, keyTypeSeek)
  695. if auxm != nil {
  696. if ok, _, me := memGet(auxm, ikey, db.s.icmp); ok {
  697. return me == nil, nilIfNotFound(me)
  698. }
  699. }
  700. em, fm := db.getMems()
  701. for _, m := range [...]*memDB{em, fm} {
  702. if m == nil {
  703. continue
  704. }
  705. defer m.decref()
  706. if ok, _, me := memGet(m.DB, ikey, db.s.icmp); ok {
  707. return me == nil, nilIfNotFound(me)
  708. }
  709. }
  710. v := db.s.version()
  711. _, cSched, err := v.get(auxt, ikey, ro, true)
  712. v.release()
  713. if cSched {
  714. // Trigger table compaction.
  715. db.compTrigger(db.tcompCmdC)
  716. }
  717. if err == nil {
  718. ret = true
  719. } else if err == ErrNotFound {
  720. err = nil
  721. }
  722. return
  723. }
  724. // Get gets the value for the given key. It returns ErrNotFound if the
  725. // DB does not contains the key.
  726. //
  727. // The returned slice is its own copy, it is safe to modify the contents
  728. // of the returned slice.
  729. // It is safe to modify the contents of the argument after Get returns.
  730. func (db *DB) Get(key []byte, ro *opt.ReadOptions) (value []byte, err error) {
  731. err = db.ok()
  732. if err != nil {
  733. return
  734. }
  735. se := db.acquireSnapshot()
  736. defer db.releaseSnapshot(se)
  737. return db.get(nil, nil, key, se.seq, ro)
  738. }
  739. // Has returns true if the DB does contains the given key.
  740. //
  741. // It is safe to modify the contents of the argument after Has returns.
  742. func (db *DB) Has(key []byte, ro *opt.ReadOptions) (ret bool, err error) {
  743. err = db.ok()
  744. if err != nil {
  745. return
  746. }
  747. se := db.acquireSnapshot()
  748. defer db.releaseSnapshot(se)
  749. return db.has(nil, nil, key, se.seq, ro)
  750. }
  751. // NewIterator returns an iterator for the latest snapshot of the
  752. // underlying DB.
  753. // The returned iterator is not safe for concurrent use, but it is safe to use
  754. // multiple iterators concurrently, with each in a dedicated goroutine.
  755. // It is also safe to use an iterator concurrently with modifying its
  756. // underlying DB. The resultant key/value pairs are guaranteed to be
  757. // consistent.
  758. //
  759. // Slice allows slicing the iterator to only contains keys in the given
  760. // range. A nil Range.Start is treated as a key before all keys in the
  761. // DB. And a nil Range.Limit is treated as a key after all keys in
  762. // the DB.
  763. //
  764. // The iterator must be released after use, by calling Release method.
  765. //
  766. // Also read Iterator documentation of the leveldb/iterator package.
  767. func (db *DB) NewIterator(slice *util.Range, ro *opt.ReadOptions) iterator.Iterator {
  768. if err := db.ok(); err != nil {
  769. return iterator.NewEmptyIterator(err)
  770. }
  771. se := db.acquireSnapshot()
  772. defer db.releaseSnapshot(se)
  773. // Iterator holds 'version' lock, 'version' is immutable so snapshot
  774. // can be released after iterator created.
  775. return db.newIterator(nil, nil, se.seq, slice, ro)
  776. }
  777. // GetSnapshot returns a latest snapshot of the underlying DB. A snapshot
  778. // is a frozen snapshot of a DB state at a particular point in time. The
  779. // content of snapshot are guaranteed to be consistent.
  780. //
  781. // The snapshot must be released after use, by calling Release method.
  782. func (db *DB) GetSnapshot() (*Snapshot, error) {
  783. if err := db.ok(); err != nil {
  784. return nil, err
  785. }
  786. return db.newSnapshot(), nil
  787. }
  788. // GetProperty returns value of the given property name.
  789. //
  790. // Property names:
  791. // leveldb.num-files-at-level{n}
  792. // Returns the number of files at level 'n'.
  793. // leveldb.stats
  794. // Returns statistics of the underlying DB.
  795. // leveldb.writedelay
  796. // Returns cumulative write delay caused by compaction.
  797. // leveldb.sstables
  798. // Returns sstables list for each level.
  799. // leveldb.blockpool
  800. // Returns block pool stats.
  801. // leveldb.cachedblock
  802. // Returns size of cached block.
  803. // leveldb.openedtables
  804. // Returns number of opened tables.
  805. // leveldb.alivesnaps
  806. // Returns number of alive snapshots.
  807. // leveldb.aliveiters
  808. // Returns number of alive iterators.
  809. func (db *DB) GetProperty(name string) (value string, err error) {
  810. err = db.ok()
  811. if err != nil {
  812. return
  813. }
  814. const prefix = "leveldb."
  815. if !strings.HasPrefix(name, prefix) {
  816. return "", ErrNotFound
  817. }
  818. p := name[len(prefix):]
  819. v := db.s.version()
  820. defer v.release()
  821. numFilesPrefix := "num-files-at-level"
  822. switch {
  823. case strings.HasPrefix(p, numFilesPrefix):
  824. var level uint
  825. var rest string
  826. n, _ := fmt.Sscanf(p[len(numFilesPrefix):], "%d%s", &level, &rest)
  827. if n != 1 {
  828. err = ErrNotFound
  829. } else {
  830. value = fmt.Sprint(v.tLen(int(level)))
  831. }
  832. case p == "stats":
  833. value = "Compactions\n" +
  834. " Level | Tables | Size(MB) | Time(sec) | Read(MB) | Write(MB)\n" +
  835. "-------+------------+---------------+---------------+---------------+---------------\n"
  836. for level, tables := range v.levels {
  837. duration, read, write := db.compStats.getStat(level)
  838. if len(tables) == 0 && duration == 0 {
  839. continue
  840. }
  841. value += fmt.Sprintf(" %3d | %10d | %13.5f | %13.5f | %13.5f | %13.5f\n",
  842. level, len(tables), float64(tables.size())/1048576.0, duration.Seconds(),
  843. float64(read)/1048576.0, float64(write)/1048576.0)
  844. }
  845. case p == "writedelay":
  846. writeDelayN, writeDelay := atomic.LoadInt32(&db.cWriteDelayN), time.Duration(atomic.LoadInt64(&db.cWriteDelay))
  847. value = fmt.Sprintf("DelayN:%d Delay:%s", writeDelayN, writeDelay)
  848. case p == "sstables":
  849. for level, tables := range v.levels {
  850. value += fmt.Sprintf("--- level %d ---\n", level)
  851. for _, t := range tables {
  852. value += fmt.Sprintf("%d:%d[%q .. %q]\n", t.fd.Num, t.size, t.imin, t.imax)
  853. }
  854. }
  855. case p == "blockpool":
  856. value = fmt.Sprintf("%v", db.s.tops.bpool)
  857. case p == "cachedblock":
  858. if db.s.tops.bcache != nil {
  859. value = fmt.Sprintf("%d", db.s.tops.bcache.Size())
  860. } else {
  861. value = "<nil>"
  862. }
  863. case p == "openedtables":
  864. value = fmt.Sprintf("%d", db.s.tops.cache.Size())
  865. case p == "alivesnaps":
  866. value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveSnaps))
  867. case p == "aliveiters":
  868. value = fmt.Sprintf("%d", atomic.LoadInt32(&db.aliveIters))
  869. default:
  870. err = ErrNotFound
  871. }
  872. return
  873. }
  874. // SizeOf calculates approximate sizes of the given key ranges.
  875. // The length of the returned sizes are equal with the length of the given
  876. // ranges. The returned sizes measure storage space usage, so if the user
  877. // data compresses by a factor of ten, the returned sizes will be one-tenth
  878. // the size of the corresponding user data size.
  879. // The results may not include the sizes of recently written data.
  880. func (db *DB) SizeOf(ranges []util.Range) (Sizes, error) {
  881. if err := db.ok(); err != nil {
  882. return nil, err
  883. }
  884. v := db.s.version()
  885. defer v.release()
  886. sizes := make(Sizes, 0, len(ranges))
  887. for _, r := range ranges {
  888. imin := makeInternalKey(nil, r.Start, keyMaxSeq, keyTypeSeek)
  889. imax := makeInternalKey(nil, r.Limit, keyMaxSeq, keyTypeSeek)
  890. start, err := v.offsetOf(imin)
  891. if err != nil {
  892. return nil, err
  893. }
  894. limit, err := v.offsetOf(imax)
  895. if err != nil {
  896. return nil, err
  897. }
  898. var size int64
  899. if limit >= start {
  900. size = limit - start
  901. }
  902. sizes = append(sizes, size)
  903. }
  904. return sizes, nil
  905. }
  906. // Close closes the DB. This will also releases any outstanding snapshot,
  907. // abort any in-flight compaction and discard open transaction.
  908. //
  909. // It is not safe to close a DB until all outstanding iterators are released.
  910. // It is valid to call Close multiple times. Other methods should not be
  911. // called after the DB has been closed.
  912. func (db *DB) Close() error {
  913. if !db.setClosed() {
  914. return ErrClosed
  915. }
  916. start := time.Now()
  917. db.log("db@close closing")
  918. // Clear the finalizer.
  919. runtime.SetFinalizer(db, nil)
  920. // Get compaction error.
  921. var err error
  922. select {
  923. case err = <-db.compErrC:
  924. if err == ErrReadOnly {
  925. err = nil
  926. }
  927. default:
  928. }
  929. // Signal all goroutines.
  930. close(db.closeC)
  931. // Discard open transaction.
  932. if db.tr != nil {
  933. db.tr.Discard()
  934. }
  935. // Acquire writer lock.
  936. db.writeLockC <- struct{}{}
  937. // Wait for all gorotines to exit.
  938. db.closeW.Wait()
  939. // Closes journal.
  940. if db.journal != nil {
  941. db.journal.Close()
  942. db.journalWriter.Close()
  943. db.journal = nil
  944. db.journalWriter = nil
  945. }
  946. if db.writeDelayN > 0 {
  947. db.logf("db@write was delayed N·%d T·%v", db.writeDelayN, db.writeDelay)
  948. }
  949. // Close session.
  950. db.s.close()
  951. db.logf("db@close done T·%v", time.Since(start))
  952. db.s.release()
  953. if db.closer != nil {
  954. if err1 := db.closer.Close(); err == nil {
  955. err = err1
  956. }
  957. db.closer = nil
  958. }
  959. // Clear memdbs.
  960. db.clearMems()
  961. return err
  962. }