You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

187 lines
4.9 KiB

  1. // Copyright 2016 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // A faster implementation of filepath.Walk.
  5. //
  6. // filepath.Walk's design necessarily calls os.Lstat on each file,
  7. // even if the caller needs less info. And goimports only need to know
  8. // the type of each file. The kernel interface provides the type in
  9. // the Readdir call but the standard library ignored it.
  10. // fastwalk_unix.go contains a fork of the syscall routines.
  11. //
  12. // See golang.org/issue/16399
  13. package imports
  14. import (
  15. "errors"
  16. "os"
  17. "path/filepath"
  18. "runtime"
  19. "sync"
  20. )
  21. // traverseLink is a sentinel error for fastWalk, similar to filepath.SkipDir.
  22. var traverseLink = errors.New("traverse symlink, assuming target is a directory")
  23. // fastWalk walks the file tree rooted at root, calling walkFn for
  24. // each file or directory in the tree, including root.
  25. //
  26. // If fastWalk returns filepath.SkipDir, the directory is skipped.
  27. //
  28. // Unlike filepath.Walk:
  29. // * file stat calls must be done by the user.
  30. // The only provided metadata is the file type, which does not include
  31. // any permission bits.
  32. // * multiple goroutines stat the filesystem concurrently. The provided
  33. // walkFn must be safe for concurrent use.
  34. // * fastWalk can follow symlinks if walkFn returns the traverseLink
  35. // sentinel error. It is the walkFn's responsibility to prevent
  36. // fastWalk from going into symlink cycles.
  37. func fastWalk(root string, walkFn func(path string, typ os.FileMode) error) error {
  38. // TODO(bradfitz): make numWorkers configurable? We used a
  39. // minimum of 4 to give the kernel more info about multiple
  40. // things we want, in hopes its I/O scheduling can take
  41. // advantage of that. Hopefully most are in cache. Maybe 4 is
  42. // even too low of a minimum. Profile more.
  43. numWorkers := 4
  44. if n := runtime.NumCPU(); n > numWorkers {
  45. numWorkers = n
  46. }
  47. // Make sure to wait for all workers to finish, otherwise
  48. // walkFn could still be called after returning. This Wait call
  49. // runs after close(e.donec) below.
  50. var wg sync.WaitGroup
  51. defer wg.Wait()
  52. w := &walker{
  53. fn: walkFn,
  54. enqueuec: make(chan walkItem, numWorkers), // buffered for performance
  55. workc: make(chan walkItem, numWorkers), // buffered for performance
  56. donec: make(chan struct{}),
  57. // buffered for correctness & not leaking goroutines:
  58. resc: make(chan error, numWorkers),
  59. }
  60. defer close(w.donec)
  61. for i := 0; i < numWorkers; i++ {
  62. wg.Add(1)
  63. go w.doWork(&wg)
  64. }
  65. todo := []walkItem{{dir: root}}
  66. out := 0
  67. for {
  68. workc := w.workc
  69. var workItem walkItem
  70. if len(todo) == 0 {
  71. workc = nil
  72. } else {
  73. workItem = todo[len(todo)-1]
  74. }
  75. select {
  76. case workc <- workItem:
  77. todo = todo[:len(todo)-1]
  78. out++
  79. case it := <-w.enqueuec:
  80. todo = append(todo, it)
  81. case err := <-w.resc:
  82. out--
  83. if err != nil {
  84. return err
  85. }
  86. if out == 0 && len(todo) == 0 {
  87. // It's safe to quit here, as long as the buffered
  88. // enqueue channel isn't also readable, which might
  89. // happen if the worker sends both another unit of
  90. // work and its result before the other select was
  91. // scheduled and both w.resc and w.enqueuec were
  92. // readable.
  93. select {
  94. case it := <-w.enqueuec:
  95. todo = append(todo, it)
  96. default:
  97. return nil
  98. }
  99. }
  100. }
  101. }
  102. }
  103. // doWork reads directories as instructed (via workc) and runs the
  104. // user's callback function.
  105. func (w *walker) doWork(wg *sync.WaitGroup) {
  106. defer wg.Done()
  107. for {
  108. select {
  109. case <-w.donec:
  110. return
  111. case it := <-w.workc:
  112. select {
  113. case <-w.donec:
  114. return
  115. case w.resc <- w.walk(it.dir, !it.callbackDone):
  116. }
  117. }
  118. }
  119. }
  120. type walker struct {
  121. fn func(path string, typ os.FileMode) error
  122. donec chan struct{} // closed on fastWalk's return
  123. workc chan walkItem // to workers
  124. enqueuec chan walkItem // from workers
  125. resc chan error // from workers
  126. }
  127. type walkItem struct {
  128. dir string
  129. callbackDone bool // callback already called; don't do it again
  130. }
  131. func (w *walker) enqueue(it walkItem) {
  132. select {
  133. case w.enqueuec <- it:
  134. case <-w.donec:
  135. }
  136. }
  137. func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error {
  138. joined := dirName + string(os.PathSeparator) + baseName
  139. if typ == os.ModeDir {
  140. w.enqueue(walkItem{dir: joined})
  141. return nil
  142. }
  143. err := w.fn(joined, typ)
  144. if typ == os.ModeSymlink {
  145. if err == traverseLink {
  146. // Set callbackDone so we don't call it twice for both the
  147. // symlink-as-symlink and the symlink-as-directory later:
  148. w.enqueue(walkItem{dir: joined, callbackDone: true})
  149. return nil
  150. }
  151. if err == filepath.SkipDir {
  152. // Permit SkipDir on symlinks too.
  153. return nil
  154. }
  155. }
  156. return err
  157. }
  158. func (w *walker) walk(root string, runUserCallback bool) error {
  159. if runUserCallback {
  160. err := w.fn(root, os.ModeDir)
  161. if err == filepath.SkipDir {
  162. return nil
  163. }
  164. if err != nil {
  165. return err
  166. }
  167. }
  168. return readDir(root, w.onDirEnt)
  169. }