|
|
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// A faster implementation of filepath.Walk.
//
// filepath.Walk's design necessarily calls os.Lstat on each file,
// even if the caller needs less info. And goimports only need to know
// the type of each file. The kernel interface provides the type in
// the Readdir call but the standard library ignored it.
// fastwalk_unix.go contains a fork of the syscall routines.
//
// See golang.org/issue/16399
package imports
import ( "errors" "os" "path/filepath" "runtime" "sync" )
// traverseLink is a sentinel error for fastWalk, similar to filepath.SkipDir.
var traverseLink = errors.New("traverse symlink, assuming target is a directory")
// fastWalk walks the file tree rooted at root, calling walkFn for
// each file or directory in the tree, including root.
//
// If fastWalk returns filepath.SkipDir, the directory is skipped.
//
// Unlike filepath.Walk:
// * file stat calls must be done by the user.
// The only provided metadata is the file type, which does not include
// any permission bits.
// * multiple goroutines stat the filesystem concurrently. The provided
// walkFn must be safe for concurrent use.
// * fastWalk can follow symlinks if walkFn returns the traverseLink
// sentinel error. It is the walkFn's responsibility to prevent
// fastWalk from going into symlink cycles.
func fastWalk(root string, walkFn func(path string, typ os.FileMode) error) error { // TODO(bradfitz): make numWorkers configurable? We used a
// minimum of 4 to give the kernel more info about multiple
// things we want, in hopes its I/O scheduling can take
// advantage of that. Hopefully most are in cache. Maybe 4 is
// even too low of a minimum. Profile more.
numWorkers := 4 if n := runtime.NumCPU(); n > numWorkers { numWorkers = n }
// Make sure to wait for all workers to finish, otherwise
// walkFn could still be called after returning. This Wait call
// runs after close(e.donec) below.
var wg sync.WaitGroup defer wg.Wait()
w := &walker{ fn: walkFn, enqueuec: make(chan walkItem, numWorkers), // buffered for performance
workc: make(chan walkItem, numWorkers), // buffered for performance
donec: make(chan struct{}),
// buffered for correctness & not leaking goroutines:
resc: make(chan error, numWorkers), } defer close(w.donec)
for i := 0; i < numWorkers; i++ { wg.Add(1) go w.doWork(&wg) } todo := []walkItem{{dir: root}} out := 0 for { workc := w.workc var workItem walkItem if len(todo) == 0 { workc = nil } else { workItem = todo[len(todo)-1] } select { case workc <- workItem: todo = todo[:len(todo)-1] out++ case it := <-w.enqueuec: todo = append(todo, it) case err := <-w.resc: out-- if err != nil { return err } if out == 0 && len(todo) == 0 { // It's safe to quit here, as long as the buffered
// enqueue channel isn't also readable, which might
// happen if the worker sends both another unit of
// work and its result before the other select was
// scheduled and both w.resc and w.enqueuec were
// readable.
select { case it := <-w.enqueuec: todo = append(todo, it) default: return nil } } } } }
// doWork reads directories as instructed (via workc) and runs the
// user's callback function.
func (w *walker) doWork(wg *sync.WaitGroup) { defer wg.Done() for { select { case <-w.donec: return case it := <-w.workc: select { case <-w.donec: return case w.resc <- w.walk(it.dir, !it.callbackDone): } } } }
type walker struct { fn func(path string, typ os.FileMode) error
donec chan struct{} // closed on fastWalk's return
workc chan walkItem // to workers
enqueuec chan walkItem // from workers
resc chan error // from workers
}
type walkItem struct { dir string callbackDone bool // callback already called; don't do it again
}
func (w *walker) enqueue(it walkItem) { select { case w.enqueuec <- it: case <-w.donec: } }
func (w *walker) onDirEnt(dirName, baseName string, typ os.FileMode) error { joined := dirName + string(os.PathSeparator) + baseName if typ == os.ModeDir { w.enqueue(walkItem{dir: joined}) return nil }
err := w.fn(joined, typ) if typ == os.ModeSymlink { if err == traverseLink { // Set callbackDone so we don't call it twice for both the
// symlink-as-symlink and the symlink-as-directory later:
w.enqueue(walkItem{dir: joined, callbackDone: true}) return nil } if err == filepath.SkipDir { // Permit SkipDir on symlinks too.
return nil } } return err }
func (w *walker) walk(root string, runUserCallback bool) error { if runUserCallback { err := w.fn(root, os.ModeDir) if err == filepath.SkipDir { return nil } if err != nil { return err } }
return readDir(root, w.onDirEnt) }
|