// Copyright 2009 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
// This file contains the infrastructure to create an
|
|
// identifier and full-text index for a set of Go files.
|
|
//
|
|
// Algorithm for identifier index:
|
|
// - traverse all .go files of the file tree specified by root
|
|
// - for each identifier (word) encountered, collect all occurrences (spots)
|
|
// into a list; this produces a list of spots for each word
|
|
// - reduce the lists: from a list of spots to a list of FileRuns,
|
|
// and from a list of FileRuns into a list of PakRuns
|
|
// - make a HitList from the PakRuns
|
|
//
|
|
// Details:
|
|
// - keep two lists per word: one containing package-level declarations
|
|
// that have snippets, and one containing all other spots
|
|
// - keep the snippets in a separate table indexed by snippet index
|
|
// and store the snippet index in place of the line number in a SpotInfo
|
|
// (the line number for spots with snippets is stored in the snippet)
|
|
// - at the end, create lists of alternative spellings for a given
|
|
// word
|
|
//
|
|
// Algorithm for full text index:
|
|
// - concatenate all source code in a byte buffer (in memory)
|
|
// - add the files to a file set in lockstep as they are added to the byte
|
|
// buffer such that a byte buffer offset corresponds to the Pos value for
|
|
// that file location
|
|
// - create a suffix array from the concatenated sources
|
|
//
|
|
// String lookup in full text index:
|
|
// - use the suffix array to lookup a string's offsets - the offsets
|
|
// correspond to the Pos values relative to the file set
|
|
// - translate the Pos values back into file and line information and
|
|
// sort the result
|
|
|
|
package godoc
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"encoding/gob"
|
|
"errors"
|
|
"fmt"
|
|
"go/ast"
|
|
"go/doc"
|
|
"go/parser"
|
|
"go/token"
|
|
"index/suffixarray"
|
|
"io"
|
|
"log"
|
|
"os"
|
|
pathpkg "path"
|
|
"path/filepath"
|
|
"regexp"
|
|
"runtime"
|
|
"sort"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
"unicode"
|
|
|
|
"golang.org/x/tools/godoc/util"
|
|
"golang.org/x/tools/godoc/vfs"
|
|
)
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// InterfaceSlice is a helper type for sorting interface
|
|
// slices according to some slice-specific sort criteria.
|
|
|
|
type comparer func(x, y interface{}) bool
|
|
|
|
type interfaceSlice struct {
|
|
slice []interface{}
|
|
less comparer
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// RunList
|
|
|
|
// A RunList is a list of entries that can be sorted according to some
|
|
// criteria. A RunList may be compressed by grouping "runs" of entries
|
|
// which are equal (according to the sort critera) into a new RunList of
|
|
// runs. For instance, a RunList containing pairs (x, y) may be compressed
|
|
// into a RunList containing pair runs (x, {y}) where each run consists of
|
|
// a list of y's with the same x.
|
|
type RunList []interface{}
|
|
|
|
func (h RunList) sort(less comparer) {
|
|
sort.Sort(&interfaceSlice{h, less})
|
|
}
|
|
|
|
func (p *interfaceSlice) Len() int { return len(p.slice) }
|
|
func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
|
|
func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
|
|
|
|
// Compress entries which are the same according to a sort criteria
|
|
// (specified by less) into "runs".
|
|
func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
|
|
if len(h) == 0 {
|
|
return nil
|
|
}
|
|
// len(h) > 0
|
|
|
|
// create runs of entries with equal values
|
|
h.sort(less)
|
|
|
|
// for each run, make a new run object and collect them in a new RunList
|
|
var hh RunList
|
|
i, x := 0, h[0]
|
|
for j, y := range h {
|
|
if less(x, y) {
|
|
hh = append(hh, newRun(h[i:j]))
|
|
i, x = j, h[j] // start a new run
|
|
}
|
|
}
|
|
// add final run, if any
|
|
if i < len(h) {
|
|
hh = append(hh, newRun(h[i:]))
|
|
}
|
|
|
|
return hh
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// KindRun
|
|
|
|
// Debugging support. Disable to see multiple entries per line.
|
|
const removeDuplicates = true
|
|
|
|
// A KindRun is a run of SpotInfos of the same kind in a given file.
|
|
// The kind (3 bits) is stored in each SpotInfo element; to find the
|
|
// kind of a KindRun, look at any of its elements.
|
|
type KindRun []SpotInfo
|
|
|
|
// KindRuns are sorted by line number or index. Since the isIndex bit
|
|
// is always the same for all infos in one list we can compare lori's.
|
|
func (k KindRun) Len() int { return len(k) }
|
|
func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
|
|
func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
|
|
|
|
// FileRun contents are sorted by Kind for the reduction into KindRuns.
|
|
func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
|
|
|
|
// newKindRun allocates a new KindRun from the SpotInfo run h.
|
|
func newKindRun(h RunList) interface{} {
|
|
run := make(KindRun, len(h))
|
|
for i, x := range h {
|
|
run[i] = x.(SpotInfo)
|
|
}
|
|
|
|
// Spots were sorted by file and kind to create this run.
|
|
// Within this run, sort them by line number or index.
|
|
sort.Sort(run)
|
|
|
|
if removeDuplicates {
|
|
// Since both the lori and kind field must be
|
|
// same for duplicates, and since the isIndex
|
|
// bit is always the same for all infos in one
|
|
// list we can simply compare the entire info.
|
|
k := 0
|
|
prev := SpotInfo(1<<32 - 1) // an unlikely value
|
|
for _, x := range run {
|
|
if x != prev {
|
|
run[k] = x
|
|
k++
|
|
prev = x
|
|
}
|
|
}
|
|
run = run[0:k]
|
|
}
|
|
|
|
return run
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// FileRun
|
|
|
|
// A Pak describes a Go package.
|
|
type Pak struct {
|
|
Path string // path of directory containing the package
|
|
Name string // package name as declared by package clause
|
|
}
|
|
|
|
// Paks are sorted by name (primary key) and by import path (secondary key).
|
|
func (p *Pak) less(q *Pak) bool {
|
|
return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
|
|
}
|
|
|
|
// A File describes a Go file.
|
|
type File struct {
|
|
Name string // directory-local file name
|
|
Pak *Pak // the package to which the file belongs
|
|
}
|
|
|
|
// Path returns the file path of f.
|
|
func (f *File) Path() string {
|
|
return pathpkg.Join(f.Pak.Path, f.Name)
|
|
}
|
|
|
|
// A Spot describes a single occurrence of a word.
|
|
type Spot struct {
|
|
File *File
|
|
Info SpotInfo
|
|
}
|
|
|
|
// A FileRun is a list of KindRuns belonging to the same file.
|
|
type FileRun struct {
|
|
File *File
|
|
Groups []KindRun
|
|
}
|
|
|
|
// Spots are sorted by file path for the reduction into FileRuns.
|
|
func lessSpot(x, y interface{}) bool {
|
|
fx := x.(Spot).File
|
|
fy := y.(Spot).File
|
|
// same as "return fx.Path() < fy.Path()" but w/o computing the file path first
|
|
px := fx.Pak.Path
|
|
py := fy.Pak.Path
|
|
return px < py || px == py && fx.Name < fy.Name
|
|
}
|
|
|
|
// newFileRun allocates a new FileRun from the Spot run h.
|
|
func newFileRun(h RunList) interface{} {
|
|
file := h[0].(Spot).File
|
|
|
|
// reduce the list of Spots into a list of KindRuns
|
|
h1 := make(RunList, len(h))
|
|
for i, x := range h {
|
|
h1[i] = x.(Spot).Info
|
|
}
|
|
h2 := h1.reduce(lessKind, newKindRun)
|
|
|
|
// create the FileRun
|
|
groups := make([]KindRun, len(h2))
|
|
for i, x := range h2 {
|
|
groups[i] = x.(KindRun)
|
|
}
|
|
return &FileRun{file, groups}
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// PakRun
|
|
|
|
// A PakRun describes a run of *FileRuns of a package.
|
|
type PakRun struct {
|
|
Pak *Pak
|
|
Files []*FileRun
|
|
}
|
|
|
|
// Sorting support for files within a PakRun.
|
|
func (p *PakRun) Len() int { return len(p.Files) }
|
|
func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
|
|
func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
|
|
|
|
// FileRuns are sorted by package for the reduction into PakRuns.
|
|
func lessFileRun(x, y interface{}) bool {
|
|
return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
|
|
}
|
|
|
|
// newPakRun allocates a new PakRun from the *FileRun run h.
|
|
func newPakRun(h RunList) interface{} {
|
|
pak := h[0].(*FileRun).File.Pak
|
|
files := make([]*FileRun, len(h))
|
|
for i, x := range h {
|
|
files[i] = x.(*FileRun)
|
|
}
|
|
run := &PakRun{pak, files}
|
|
sort.Sort(run) // files were sorted by package; sort them by file now
|
|
return run
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// HitList
|
|
|
|
// A HitList describes a list of PakRuns.
|
|
type HitList []*PakRun
|
|
|
|
// PakRuns are sorted by package.
|
|
func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
|
|
|
|
func reduce(h0 RunList) HitList {
|
|
// reduce a list of Spots into a list of FileRuns
|
|
h1 := h0.reduce(lessSpot, newFileRun)
|
|
// reduce a list of FileRuns into a list of PakRuns
|
|
h2 := h1.reduce(lessFileRun, newPakRun)
|
|
// sort the list of PakRuns by package
|
|
h2.sort(lessPakRun)
|
|
// create a HitList
|
|
h := make(HitList, len(h2))
|
|
for i, p := range h2 {
|
|
h[i] = p.(*PakRun)
|
|
}
|
|
return h
|
|
}
|
|
|
|
// filter returns a new HitList created by filtering
|
|
// all PakRuns from h that have a matching pakname.
|
|
func (h HitList) filter(pakname string) HitList {
|
|
var hh HitList
|
|
for _, p := range h {
|
|
if p.Pak.Name == pakname {
|
|
hh = append(hh, p)
|
|
}
|
|
}
|
|
return hh
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// AltWords
|
|
|
|
type wordPair struct {
|
|
canon string // canonical word spelling (all lowercase)
|
|
alt string // alternative spelling
|
|
}
|
|
|
|
// An AltWords describes a list of alternative spellings for a
|
|
// canonical (all lowercase) spelling of a word.
|
|
type AltWords struct {
|
|
Canon string // canonical word spelling (all lowercase)
|
|
Alts []string // alternative spelling for the same word
|
|
}
|
|
|
|
// wordPairs are sorted by their canonical spelling.
|
|
func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
|
|
|
|
// newAltWords allocates a new AltWords from the *wordPair run h.
|
|
func newAltWords(h RunList) interface{} {
|
|
canon := h[0].(*wordPair).canon
|
|
alts := make([]string, len(h))
|
|
for i, x := range h {
|
|
alts[i] = x.(*wordPair).alt
|
|
}
|
|
return &AltWords{canon, alts}
|
|
}
|
|
|
|
func (a *AltWords) filter(s string) *AltWords {
|
|
var alts []string
|
|
for _, w := range a.Alts {
|
|
if w != s {
|
|
alts = append(alts, w)
|
|
}
|
|
}
|
|
if len(alts) > 0 {
|
|
return &AltWords{a.Canon, alts}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Ident stores information about external identifiers in order to create
|
|
// links to package documentation.
|
|
type Ident struct {
|
|
Path string // e.g. "net/http"
|
|
Package string // e.g. "http"
|
|
Name string // e.g. "NewRequest"
|
|
Doc string // e.g. "NewRequest returns a new Request..."
|
|
}
|
|
|
|
// byImportCount sorts the given slice of Idents by the import
|
|
// counts of the packages to which they belong.
|
|
type byImportCount struct {
|
|
Idents []Ident
|
|
ImportCount map[string]int
|
|
}
|
|
|
|
func (ic byImportCount) Len() int {
|
|
return len(ic.Idents)
|
|
}
|
|
|
|
func (ic byImportCount) Less(i, j int) bool {
|
|
ri := ic.ImportCount[ic.Idents[i].Path]
|
|
rj := ic.ImportCount[ic.Idents[j].Path]
|
|
if ri == rj {
|
|
return ic.Idents[i].Path < ic.Idents[j].Path
|
|
}
|
|
return ri > rj
|
|
}
|
|
|
|
func (ic byImportCount) Swap(i, j int) {
|
|
ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
|
|
}
|
|
|
|
func (ic byImportCount) String() string {
|
|
buf := bytes.NewBuffer([]byte("["))
|
|
for _, v := range ic.Idents {
|
|
buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
|
|
}
|
|
buf.WriteString("\n]")
|
|
return buf.String()
|
|
}
|
|
|
|
// filter creates a new Ident list where the results match the given
|
|
// package name.
|
|
func (ic byImportCount) filter(pakname string) []Ident {
|
|
if ic.Idents == nil {
|
|
return nil
|
|
}
|
|
var res []Ident
|
|
for _, i := range ic.Idents {
|
|
if i.Package == pakname {
|
|
res = append(res, i)
|
|
}
|
|
}
|
|
return res
|
|
}
|
|
|
|
// top returns the top n identifiers.
|
|
func (ic byImportCount) top(n int) []Ident {
|
|
if len(ic.Idents) > n {
|
|
return ic.Idents[:n]
|
|
}
|
|
return ic.Idents
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Indexer
|
|
|
|
type IndexResult struct {
|
|
Decls RunList // package-level declarations (with snippets)
|
|
Others RunList // all other occurrences
|
|
}
|
|
|
|
// Statistics provides statistics information for an index.
|
|
type Statistics struct {
|
|
Bytes int // total size of indexed source files
|
|
Files int // number of indexed source files
|
|
Lines int // number of lines (all files)
|
|
Words int // number of different identifiers
|
|
Spots int // number of identifier occurrences
|
|
}
|
|
|
|
// An Indexer maintains the data structures and provides the machinery
|
|
// for indexing .go files under a file tree. It implements the path.Visitor
|
|
// interface for walking file trees, and the ast.Visitor interface for
|
|
// walking Go ASTs.
|
|
type Indexer struct {
|
|
c *Corpus
|
|
fset *token.FileSet // file set for all indexed files
|
|
fsOpenGate chan bool // send pre fs.Open; receive on close
|
|
|
|
mu sync.Mutex // guards all the following
|
|
sources bytes.Buffer // concatenated sources
|
|
strings map[string]string // interned string
|
|
packages map[Pak]*Pak // interned *Paks
|
|
words map[string]*IndexResult // RunLists of Spots
|
|
snippets []*Snippet // indices are stored in SpotInfos
|
|
current *token.File // last file added to file set
|
|
file *File // AST for current file
|
|
decl ast.Decl // AST for current decl
|
|
stats Statistics
|
|
throttle *util.Throttle
|
|
importCount map[string]int // package path ("net/http") => count
|
|
packagePath map[string]map[string]bool // "template" => "text/template" => true
|
|
exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
|
|
curPkgExports map[string]SpotKind
|
|
idents map[SpotKind]map[string][]Ident // kind => name => list of Idents
|
|
}
|
|
|
|
func (x *Indexer) intern(s string) string {
|
|
if s, ok := x.strings[s]; ok {
|
|
return s
|
|
}
|
|
x.strings[s] = s
|
|
return s
|
|
}
|
|
|
|
func (x *Indexer) lookupPackage(path, name string) *Pak {
|
|
// In the source directory tree, more than one package may
|
|
// live in the same directory. For the packages map, construct
|
|
// a key that includes both the directory path and the package
|
|
// name.
|
|
key := Pak{Path: x.intern(path), Name: x.intern(name)}
|
|
pak := x.packages[key]
|
|
if pak == nil {
|
|
pak = &key
|
|
x.packages[key] = pak
|
|
}
|
|
return pak
|
|
}
|
|
|
|
func (x *Indexer) addSnippet(s *Snippet) int {
|
|
index := len(x.snippets)
|
|
x.snippets = append(x.snippets, s)
|
|
return index
|
|
}
|
|
|
|
func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
|
|
if id == nil {
|
|
return
|
|
}
|
|
name := x.intern(id.Name)
|
|
|
|
switch kind {
|
|
case TypeDecl, FuncDecl, ConstDecl, VarDecl:
|
|
x.curPkgExports[name] = kind
|
|
}
|
|
|
|
lists, found := x.words[name]
|
|
if !found {
|
|
lists = new(IndexResult)
|
|
x.words[name] = lists
|
|
}
|
|
|
|
if kind == Use || x.decl == nil {
|
|
if x.c.IndexGoCode {
|
|
// not a declaration or no snippet required
|
|
info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
|
|
lists.Others = append(lists.Others, Spot{x.file, info})
|
|
}
|
|
} else {
|
|
// a declaration with snippet
|
|
index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
|
|
info := makeSpotInfo(kind, index, true)
|
|
lists.Decls = append(lists.Decls, Spot{x.file, info})
|
|
}
|
|
|
|
x.stats.Spots++
|
|
}
|
|
|
|
func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
|
|
for _, f := range flist.List {
|
|
x.decl = nil // no snippets for fields
|
|
for _, name := range f.Names {
|
|
x.visitIdent(kind, name)
|
|
}
|
|
ast.Walk(x, f.Type)
|
|
// ignore tag - not indexed at the moment
|
|
}
|
|
}
|
|
|
|
func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
|
|
switch n := spec.(type) {
|
|
case *ast.ImportSpec:
|
|
x.visitIdent(ImportDecl, n.Name)
|
|
if n.Path != nil {
|
|
if imp, err := strconv.Unquote(n.Path.Value); err == nil {
|
|
x.importCount[x.intern(imp)]++
|
|
}
|
|
}
|
|
|
|
case *ast.ValueSpec:
|
|
for _, n := range n.Names {
|
|
x.visitIdent(kind, n)
|
|
}
|
|
ast.Walk(x, n.Type)
|
|
for _, v := range n.Values {
|
|
ast.Walk(x, v)
|
|
}
|
|
|
|
case *ast.TypeSpec:
|
|
x.visitIdent(TypeDecl, n.Name)
|
|
ast.Walk(x, n.Type)
|
|
}
|
|
}
|
|
|
|
func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
|
|
kind := VarDecl
|
|
if decl.Tok == token.CONST {
|
|
kind = ConstDecl
|
|
}
|
|
x.decl = decl
|
|
for _, s := range decl.Specs {
|
|
x.visitSpec(kind, s)
|
|
}
|
|
}
|
|
|
|
func (x *Indexer) Visit(node ast.Node) ast.Visitor {
|
|
switch n := node.(type) {
|
|
case nil:
|
|
// nothing to do
|
|
|
|
case *ast.Ident:
|
|
x.visitIdent(Use, n)
|
|
|
|
case *ast.FieldList:
|
|
x.visitFieldList(VarDecl, n)
|
|
|
|
case *ast.InterfaceType:
|
|
x.visitFieldList(MethodDecl, n.Methods)
|
|
|
|
case *ast.DeclStmt:
|
|
// local declarations should only be *ast.GenDecls;
|
|
// ignore incorrect ASTs
|
|
if decl, ok := n.Decl.(*ast.GenDecl); ok {
|
|
x.decl = nil // no snippets for local declarations
|
|
x.visitGenDecl(decl)
|
|
}
|
|
|
|
case *ast.GenDecl:
|
|
x.decl = n
|
|
x.visitGenDecl(n)
|
|
|
|
case *ast.FuncDecl:
|
|
kind := FuncDecl
|
|
if n.Recv != nil {
|
|
kind = MethodDecl
|
|
ast.Walk(x, n.Recv)
|
|
}
|
|
x.decl = n
|
|
x.visitIdent(kind, n.Name)
|
|
ast.Walk(x, n.Type)
|
|
if n.Body != nil {
|
|
ast.Walk(x, n.Body)
|
|
}
|
|
|
|
case *ast.File:
|
|
x.decl = nil
|
|
x.visitIdent(PackageClause, n.Name)
|
|
for _, d := range n.Decls {
|
|
ast.Walk(x, d)
|
|
}
|
|
|
|
default:
|
|
return x
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// addFile adds a file to the index if possible and returns the file set file
|
|
// and the file's AST if it was successfully parsed as a Go file. If addFile
|
|
// failed (that is, if the file was not added), it returns file == nil.
|
|
func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
|
|
defer f.Close()
|
|
|
|
// The file set's base offset and x.sources size must be in lock-step;
|
|
// this permits the direct mapping of suffix array lookup results to
|
|
// to corresponding Pos values.
|
|
//
|
|
// When a file is added to the file set, its offset base increases by
|
|
// the size of the file + 1; and the initial base offset is 1. Add an
|
|
// extra byte to the sources here.
|
|
x.sources.WriteByte(0)
|
|
|
|
// If the sources length doesn't match the file set base at this point
|
|
// the file set implementation changed or we have another error.
|
|
base := x.fset.Base()
|
|
if x.sources.Len() != base {
|
|
panic("internal error: file base incorrect")
|
|
}
|
|
|
|
// append file contents (src) to x.sources
|
|
if _, err := x.sources.ReadFrom(f); err == nil {
|
|
src := x.sources.Bytes()[base:]
|
|
|
|
if goFile {
|
|
// parse the file and in the process add it to the file set
|
|
if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
|
|
file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
|
|
return
|
|
}
|
|
// file has parse errors, and the AST may be incorrect -
|
|
// set lines information explicitly and index as ordinary
|
|
// text file (cannot fall through to the text case below
|
|
// because the file has already been added to the file set
|
|
// by the parser)
|
|
file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
|
|
file.SetLinesForContent(src)
|
|
ast = nil
|
|
return
|
|
}
|
|
|
|
if util.IsText(src) {
|
|
// only add the file to the file set (for the full text index)
|
|
file = x.fset.AddFile(filename, x.fset.Base(), len(src))
|
|
file.SetLinesForContent(src)
|
|
return
|
|
}
|
|
}
|
|
|
|
// discard possibly added data
|
|
x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
|
|
return
|
|
}
|
|
|
|
// Design note: Using an explicit white list of permitted files for indexing
|
|
// makes sure that the important files are included and massively reduces the
|
|
// number of files to index. The advantage over a blacklist is that unexpected
|
|
// (non-blacklisted) files won't suddenly explode the index.
|
|
|
|
// Files are whitelisted if they have a file name or extension
|
|
// present as key in whitelisted.
|
|
var whitelisted = map[string]bool{
|
|
".bash": true,
|
|
".c": true,
|
|
".cc": true,
|
|
".cpp": true,
|
|
".cxx": true,
|
|
".css": true,
|
|
".go": true,
|
|
".goc": true,
|
|
".h": true,
|
|
".hh": true,
|
|
".hpp": true,
|
|
".hxx": true,
|
|
".html": true,
|
|
".js": true,
|
|
".out": true,
|
|
".py": true,
|
|
".s": true,
|
|
".sh": true,
|
|
".txt": true,
|
|
".xml": true,
|
|
"AUTHORS": true,
|
|
"CONTRIBUTORS": true,
|
|
"LICENSE": true,
|
|
"Makefile": true,
|
|
"PATENTS": true,
|
|
"README": true,
|
|
}
|
|
|
|
// isWhitelisted returns true if a file is on the list
|
|
// of "permitted" files for indexing. The filename must
|
|
// be the directory-local name of the file.
|
|
func isWhitelisted(filename string) bool {
|
|
key := pathpkg.Ext(filename)
|
|
if key == "" {
|
|
// file has no extension - use entire filename
|
|
key = filename
|
|
}
|
|
return whitelisted[key]
|
|
}
|
|
|
|
func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
|
|
pkgName := x.intern(astFile.Name.Name)
|
|
if pkgName == "main" {
|
|
return
|
|
}
|
|
pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
|
|
astPkg := ast.Package{
|
|
Name: pkgName,
|
|
Files: map[string]*ast.File{
|
|
filename: astFile,
|
|
},
|
|
}
|
|
var m doc.Mode
|
|
docPkg := doc.New(&astPkg, dirname, m)
|
|
addIdent := func(sk SpotKind, name string, docstr string) {
|
|
if x.idents[sk] == nil {
|
|
x.idents[sk] = make(map[string][]Ident)
|
|
}
|
|
name = x.intern(name)
|
|
x.idents[sk][name] = append(x.idents[sk][name], Ident{
|
|
Path: pkgPath,
|
|
Package: pkgName,
|
|
Name: name,
|
|
Doc: doc.Synopsis(docstr),
|
|
})
|
|
}
|
|
|
|
if x.idents[PackageClause] == nil {
|
|
x.idents[PackageClause] = make(map[string][]Ident)
|
|
}
|
|
// List of words under which the package identifier will be stored.
|
|
// This includes the package name and the components of the directory
|
|
// in which it resides.
|
|
words := strings.Split(pathpkg.Dir(pkgPath), "/")
|
|
if words[0] == "." {
|
|
words = []string{}
|
|
}
|
|
name := x.intern(docPkg.Name)
|
|
synopsis := doc.Synopsis(docPkg.Doc)
|
|
words = append(words, name)
|
|
pkgIdent := Ident{
|
|
Path: pkgPath,
|
|
Package: pkgName,
|
|
Name: name,
|
|
Doc: synopsis,
|
|
}
|
|
for _, word := range words {
|
|
word = x.intern(word)
|
|
found := false
|
|
pkgs := x.idents[PackageClause][word]
|
|
for i, p := range pkgs {
|
|
if p.Path == pkgPath {
|
|
if docPkg.Doc != "" {
|
|
p.Doc = synopsis
|
|
pkgs[i] = p
|
|
}
|
|
found = true
|
|
break
|
|
}
|
|
}
|
|
if !found {
|
|
x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
|
|
}
|
|
}
|
|
|
|
for _, c := range docPkg.Consts {
|
|
for _, name := range c.Names {
|
|
addIdent(ConstDecl, name, c.Doc)
|
|
}
|
|
}
|
|
for _, t := range docPkg.Types {
|
|
addIdent(TypeDecl, t.Name, t.Doc)
|
|
for _, c := range t.Consts {
|
|
for _, name := range c.Names {
|
|
addIdent(ConstDecl, name, c.Doc)
|
|
}
|
|
}
|
|
for _, v := range t.Vars {
|
|
for _, name := range v.Names {
|
|
addIdent(VarDecl, name, v.Doc)
|
|
}
|
|
}
|
|
for _, f := range t.Funcs {
|
|
addIdent(FuncDecl, f.Name, f.Doc)
|
|
}
|
|
for _, f := range t.Methods {
|
|
addIdent(MethodDecl, f.Name, f.Doc)
|
|
// Change the name of methods to be "<typename>.<methodname>".
|
|
// They will still be indexed as <methodname>.
|
|
idents := x.idents[MethodDecl][f.Name]
|
|
idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
|
|
}
|
|
}
|
|
for _, v := range docPkg.Vars {
|
|
for _, name := range v.Names {
|
|
addIdent(VarDecl, name, v.Doc)
|
|
}
|
|
}
|
|
for _, f := range docPkg.Funcs {
|
|
addIdent(FuncDecl, f.Name, f.Doc)
|
|
}
|
|
}
|
|
|
|
func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
|
|
pkgName := astFile.Name.Name
|
|
|
|
if x.c.IndexGoCode {
|
|
x.current = file
|
|
pak := x.lookupPackage(dirname, pkgName)
|
|
x.file = &File{filename, pak}
|
|
ast.Walk(x, astFile)
|
|
}
|
|
|
|
if x.c.IndexDocs {
|
|
// Test files are already filtered out in visitFile if IndexGoCode and
|
|
// IndexFullText are false. Otherwise, check here.
|
|
isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
|
|
(strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
|
|
if !isTestFile {
|
|
x.indexDocs(dirname, filename, astFile)
|
|
}
|
|
}
|
|
|
|
ppKey := x.intern(pkgName)
|
|
if _, ok := x.packagePath[ppKey]; !ok {
|
|
x.packagePath[ppKey] = make(map[string]bool)
|
|
}
|
|
pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
|
|
x.packagePath[ppKey][pkgPath] = true
|
|
|
|
// Merge in exported symbols found walking this file into
|
|
// the map for that package.
|
|
if len(x.curPkgExports) > 0 {
|
|
dest, ok := x.exports[pkgPath]
|
|
if !ok {
|
|
dest = make(map[string]SpotKind)
|
|
x.exports[pkgPath] = dest
|
|
}
|
|
for k, v := range x.curPkgExports {
|
|
dest[k] = v
|
|
}
|
|
}
|
|
}
|
|
|
|
func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
|
|
if fi.IsDir() || !x.c.IndexEnabled {
|
|
return
|
|
}
|
|
|
|
filename := pathpkg.Join(dirname, fi.Name())
|
|
goFile := isGoFile(fi)
|
|
|
|
switch {
|
|
case x.c.IndexFullText:
|
|
if !isWhitelisted(fi.Name()) {
|
|
return
|
|
}
|
|
case x.c.IndexGoCode:
|
|
if !goFile {
|
|
return
|
|
}
|
|
case x.c.IndexDocs:
|
|
if !goFile ||
|
|
strings.HasSuffix(fi.Name(), "_test.go") ||
|
|
strings.HasPrefix(dirname, "/test/") {
|
|
return
|
|
}
|
|
default:
|
|
// No indexing turned on.
|
|
return
|
|
}
|
|
|
|
x.fsOpenGate <- true
|
|
defer func() { <-x.fsOpenGate }()
|
|
|
|
// open file
|
|
f, err := x.c.fs.Open(filename)
|
|
if err != nil {
|
|
return
|
|
}
|
|
|
|
x.mu.Lock()
|
|
defer x.mu.Unlock()
|
|
|
|
x.throttle.Throttle()
|
|
|
|
x.curPkgExports = make(map[string]SpotKind)
|
|
file, fast := x.addFile(f, filename, goFile)
|
|
if file == nil {
|
|
return // addFile failed
|
|
}
|
|
|
|
if fast != nil {
|
|
x.indexGoFile(dirname, fi.Name(), file, fast)
|
|
}
|
|
|
|
// update statistics
|
|
x.stats.Bytes += file.Size()
|
|
x.stats.Files++
|
|
x.stats.Lines += file.LineCount()
|
|
}
|
|
|
|
// indexOptions contains information that affects the contents of an index.
|
|
type indexOptions struct {
|
|
// Docs provides documentation search results.
|
|
// It is only consulted if IndexEnabled is true.
|
|
// The default values is true.
|
|
Docs bool
|
|
|
|
// GoCode provides Go source code search results.
|
|
// It is only consulted if IndexEnabled is true.
|
|
// The default values is true.
|
|
GoCode bool
|
|
|
|
// FullText provides search results from all files.
|
|
// It is only consulted if IndexEnabled is true.
|
|
// The default values is true.
|
|
FullText bool
|
|
|
|
// MaxResults optionally specifies the maximum results for indexing.
|
|
// The default is 1000.
|
|
MaxResults int
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// Index
|
|
|
|
type LookupResult struct {
|
|
Decls HitList // package-level declarations (with snippets)
|
|
Others HitList // all other occurrences
|
|
}
|
|
|
|
type Index struct {
|
|
fset *token.FileSet // file set used during indexing; nil if no textindex
|
|
suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex
|
|
words map[string]*LookupResult // maps words to hit lists
|
|
alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings
|
|
snippets []*Snippet // all snippets, indexed by snippet index
|
|
stats Statistics
|
|
importCount map[string]int // package path ("net/http") => count
|
|
packagePath map[string]map[string]bool // "template" => "text/template" => true
|
|
exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
|
|
idents map[SpotKind]map[string][]Ident
|
|
opts indexOptions
|
|
}
|
|
|
|
func canonical(w string) string { return strings.ToLower(w) }
|
|
|
|
// Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
|
|
// consuming file descriptors, where some systems have low 256 or 512 limits.
|
|
// Go should have a built-in way to cap fd usage under the ulimit.
|
|
const (
|
|
maxOpenFiles = 200
|
|
maxOpenDirs = 50
|
|
)
|
|
|
|
func (c *Corpus) throttle() float64 {
|
|
if c.IndexThrottle <= 0 {
|
|
return 0.9
|
|
}
|
|
if c.IndexThrottle > 1.0 {
|
|
return 1.0
|
|
}
|
|
return c.IndexThrottle
|
|
}
|
|
|
|
// NewIndex creates a new index for the .go files provided by the corpus.
|
|
func (c *Corpus) NewIndex() *Index {
|
|
// initialize Indexer
|
|
// (use some reasonably sized maps to start)
|
|
x := &Indexer{
|
|
c: c,
|
|
fset: token.NewFileSet(),
|
|
fsOpenGate: make(chan bool, maxOpenFiles),
|
|
strings: make(map[string]string),
|
|
packages: make(map[Pak]*Pak, 256),
|
|
words: make(map[string]*IndexResult, 8192),
|
|
throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
|
|
importCount: make(map[string]int),
|
|
packagePath: make(map[string]map[string]bool),
|
|
exports: make(map[string]map[string]SpotKind),
|
|
idents: make(map[SpotKind]map[string][]Ident, 4),
|
|
}
|
|
|
|
// index all files in the directories given by dirnames
|
|
var wg sync.WaitGroup // outstanding ReadDir + visitFile
|
|
dirGate := make(chan bool, maxOpenDirs)
|
|
for dirname := range c.fsDirnames() {
|
|
if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
|
|
continue
|
|
}
|
|
dirGate <- true
|
|
wg.Add(1)
|
|
go func(dirname string) {
|
|
defer func() { <-dirGate }()
|
|
defer wg.Done()
|
|
|
|
list, err := c.fs.ReadDir(dirname)
|
|
if err != nil {
|
|
log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
|
|
return // ignore this directory
|
|
}
|
|
for _, fi := range list {
|
|
wg.Add(1)
|
|
go func(fi os.FileInfo) {
|
|
defer wg.Done()
|
|
x.visitFile(dirname, fi)
|
|
}(fi)
|
|
}
|
|
}(dirname)
|
|
}
|
|
wg.Wait()
|
|
|
|
if !c.IndexFullText {
|
|
// the file set, the current file, and the sources are
|
|
// not needed after indexing if no text index is built -
|
|
// help GC and clear them
|
|
x.fset = nil
|
|
x.sources.Reset()
|
|
x.current = nil // contains reference to fset!
|
|
}
|
|
|
|
// for each word, reduce the RunLists into a LookupResult;
|
|
// also collect the word with its canonical spelling in a
|
|
// word list for later computation of alternative spellings
|
|
words := make(map[string]*LookupResult)
|
|
var wlist RunList
|
|
for w, h := range x.words {
|
|
decls := reduce(h.Decls)
|
|
others := reduce(h.Others)
|
|
words[w] = &LookupResult{
|
|
Decls: decls,
|
|
Others: others,
|
|
}
|
|
wlist = append(wlist, &wordPair{canonical(w), w})
|
|
x.throttle.Throttle()
|
|
}
|
|
x.stats.Words = len(words)
|
|
|
|
// reduce the word list {canonical(w), w} into
|
|
// a list of AltWords runs {canonical(w), {w}}
|
|
alist := wlist.reduce(lessWordPair, newAltWords)
|
|
|
|
// convert alist into a map of alternative spellings
|
|
alts := make(map[string]*AltWords)
|
|
for i := 0; i < len(alist); i++ {
|
|
a := alist[i].(*AltWords)
|
|
alts[a.Canon] = a
|
|
}
|
|
|
|
// create text index
|
|
var suffixes *suffixarray.Index
|
|
if c.IndexFullText {
|
|
suffixes = suffixarray.New(x.sources.Bytes())
|
|
}
|
|
|
|
// sort idents by the number of imports of their respective packages
|
|
for _, idMap := range x.idents {
|
|
for _, ir := range idMap {
|
|
sort.Sort(byImportCount{ir, x.importCount})
|
|
}
|
|
}
|
|
|
|
return &Index{
|
|
fset: x.fset,
|
|
suffixes: suffixes,
|
|
words: words,
|
|
alts: alts,
|
|
snippets: x.snippets,
|
|
stats: x.stats,
|
|
importCount: x.importCount,
|
|
packagePath: x.packagePath,
|
|
exports: x.exports,
|
|
idents: x.idents,
|
|
opts: indexOptions{
|
|
Docs: x.c.IndexDocs,
|
|
GoCode: x.c.IndexGoCode,
|
|
FullText: x.c.IndexFullText,
|
|
MaxResults: x.c.MaxResults,
|
|
},
|
|
}
|
|
}
|
|
|
|
var ErrFileIndexVersion = errors.New("file index version out of date")
|
|
|
|
const fileIndexVersion = 3
|
|
|
|
// fileIndex is the subset of Index that's gob-encoded for use by
|
|
// Index.Write and Index.Read.
|
|
type fileIndex struct {
|
|
Version int
|
|
Words map[string]*LookupResult
|
|
Alts map[string]*AltWords
|
|
Snippets []*Snippet
|
|
Fulltext bool
|
|
Stats Statistics
|
|
ImportCount map[string]int
|
|
PackagePath map[string]map[string]bool
|
|
Exports map[string]map[string]SpotKind
|
|
Idents map[SpotKind]map[string][]Ident
|
|
Opts indexOptions
|
|
}
|
|
|
|
func (x *fileIndex) Write(w io.Writer) error {
|
|
return gob.NewEncoder(w).Encode(x)
|
|
}
|
|
|
|
func (x *fileIndex) Read(r io.Reader) error {
|
|
return gob.NewDecoder(r).Decode(x)
|
|
}
|
|
|
|
// WriteTo writes the index x to w.
|
|
func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
|
|
w = countingWriter{&n, w}
|
|
fulltext := false
|
|
if x.suffixes != nil {
|
|
fulltext = true
|
|
}
|
|
fx := fileIndex{
|
|
Version: fileIndexVersion,
|
|
Words: x.words,
|
|
Alts: x.alts,
|
|
Snippets: x.snippets,
|
|
Fulltext: fulltext,
|
|
Stats: x.stats,
|
|
ImportCount: x.importCount,
|
|
PackagePath: x.packagePath,
|
|
Exports: x.exports,
|
|
Idents: x.idents,
|
|
Opts: x.opts,
|
|
}
|
|
if err := fx.Write(w); err != nil {
|
|
return 0, err
|
|
}
|
|
if fulltext {
|
|
encode := func(x interface{}) error {
|
|
return gob.NewEncoder(w).Encode(x)
|
|
}
|
|
if err := x.fset.Write(encode); err != nil {
|
|
return 0, err
|
|
}
|
|
if err := x.suffixes.Write(w); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
// ReadFrom reads the index from r into x; x must not be nil.
|
|
// If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
|
|
// If the index is from an old version, the error is ErrFileIndexVersion.
|
|
func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
|
|
// We use the ability to read bytes as a plausible surrogate for buffering.
|
|
if _, ok := r.(io.ByteReader); !ok {
|
|
r = bufio.NewReader(r)
|
|
}
|
|
r = countingReader{&n, r.(byteReader)}
|
|
var fx fileIndex
|
|
if err := fx.Read(r); err != nil {
|
|
return n, err
|
|
}
|
|
if fx.Version != fileIndexVersion {
|
|
return 0, ErrFileIndexVersion
|
|
}
|
|
x.words = fx.Words
|
|
x.alts = fx.Alts
|
|
x.snippets = fx.Snippets
|
|
x.stats = fx.Stats
|
|
x.importCount = fx.ImportCount
|
|
x.packagePath = fx.PackagePath
|
|
x.exports = fx.Exports
|
|
x.idents = fx.Idents
|
|
x.opts = fx.Opts
|
|
if fx.Fulltext {
|
|
x.fset = token.NewFileSet()
|
|
decode := func(x interface{}) error {
|
|
return gob.NewDecoder(r).Decode(x)
|
|
}
|
|
if err := x.fset.Read(decode); err != nil {
|
|
return n, err
|
|
}
|
|
x.suffixes = new(suffixarray.Index)
|
|
if err := x.suffixes.Read(r); err != nil {
|
|
return n, err
|
|
}
|
|
}
|
|
return n, nil
|
|
}
|
|
|
|
// Stats returns index statistics.
|
|
func (x *Index) Stats() Statistics {
|
|
return x.stats
|
|
}
|
|
|
|
// ImportCount returns a map from import paths to how many times they were seen.
|
|
func (x *Index) ImportCount() map[string]int {
|
|
return x.importCount
|
|
}
|
|
|
|
// PackagePath returns a map from short package name to a set
|
|
// of full package path names that use that short package name.
|
|
func (x *Index) PackagePath() map[string]map[string]bool {
|
|
return x.packagePath
|
|
}
|
|
|
|
// Exports returns a map from full package path to exported
|
|
// symbol name to its type.
|
|
func (x *Index) Exports() map[string]map[string]SpotKind {
|
|
return x.exports
|
|
}
|
|
|
|
// Idents returns a map from identifier type to exported
|
|
// symbol name to the list of identifiers matching that name.
|
|
func (x *Index) Idents() map[SpotKind]map[string][]Ident {
|
|
return x.idents
|
|
}
|
|
|
|
func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
|
|
match = x.words[w]
|
|
alt = x.alts[canonical(w)]
|
|
// remove current spelling from alternatives
|
|
// (if there is no match, the alternatives do
|
|
// not contain the current spelling)
|
|
if match != nil && alt != nil {
|
|
alt = alt.filter(w)
|
|
}
|
|
return
|
|
}
|
|
|
|
// isIdentifier reports whether s is a Go identifier.
|
|
func isIdentifier(s string) bool {
|
|
for i, ch := range s {
|
|
if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
|
|
continue
|
|
}
|
|
return false
|
|
}
|
|
return len(s) > 0
|
|
}
|
|
|
|
// For a given query, which is either a single identifier or a qualified
|
|
// identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
|
|
// list of alternative spellings, and identifiers, if any. Any and all results
|
|
// may be nil. If the query syntax is wrong, an error is reported.
|
|
func (x *Index) Lookup(query string) (*SearchResult, error) {
|
|
ss := strings.Split(query, ".")
|
|
|
|
// check query syntax
|
|
for _, s := range ss {
|
|
if !isIdentifier(s) {
|
|
return nil, errors.New("all query parts must be identifiers")
|
|
}
|
|
}
|
|
rslt := &SearchResult{
|
|
Query: query,
|
|
Idents: make(map[SpotKind][]Ident, 5),
|
|
}
|
|
// handle simple and qualified identifiers
|
|
switch len(ss) {
|
|
case 1:
|
|
ident := ss[0]
|
|
rslt.Hit, rslt.Alt = x.lookupWord(ident)
|
|
if rslt.Hit != nil {
|
|
// found a match - filter packages with same name
|
|
// for the list of packages called ident, if any
|
|
rslt.Pak = rslt.Hit.Others.filter(ident)
|
|
}
|
|
for k, v := range x.idents {
|
|
const rsltLimit = 50
|
|
ids := byImportCount{v[ident], x.importCount}
|
|
rslt.Idents[k] = ids.top(rsltLimit)
|
|
}
|
|
|
|
case 2:
|
|
pakname, ident := ss[0], ss[1]
|
|
rslt.Hit, rslt.Alt = x.lookupWord(ident)
|
|
if rslt.Hit != nil {
|
|
// found a match - filter by package name
|
|
// (no paks - package names are not qualified)
|
|
decls := rslt.Hit.Decls.filter(pakname)
|
|
others := rslt.Hit.Others.filter(pakname)
|
|
rslt.Hit = &LookupResult{decls, others}
|
|
}
|
|
for k, v := range x.idents {
|
|
ids := byImportCount{v[ident], x.importCount}
|
|
rslt.Idents[k] = ids.filter(pakname)
|
|
}
|
|
|
|
default:
|
|
return nil, errors.New("query is not a (qualified) identifier")
|
|
}
|
|
|
|
return rslt, nil
|
|
}
|
|
|
|
func (x *Index) Snippet(i int) *Snippet {
|
|
// handle illegal snippet indices gracefully
|
|
if 0 <= i && i < len(x.snippets) {
|
|
return x.snippets[i]
|
|
}
|
|
return nil
|
|
}
|
|
|
|
type positionList []struct {
|
|
filename string
|
|
line int
|
|
}
|
|
|
|
func (list positionList) Len() int { return len(list) }
|
|
func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
|
|
func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
|
|
|
|
// unique returns the list sorted and with duplicate entries removed
|
|
func unique(list []int) []int {
|
|
sort.Ints(list)
|
|
var last int
|
|
i := 0
|
|
for _, x := range list {
|
|
if i == 0 || x != last {
|
|
last = x
|
|
list[i] = x
|
|
i++
|
|
}
|
|
}
|
|
return list[0:i]
|
|
}
|
|
|
|
// A FileLines value specifies a file and line numbers within that file.
|
|
type FileLines struct {
|
|
Filename string
|
|
Lines []int
|
|
}
|
|
|
|
// LookupRegexp returns the number of matches and the matches where a regular
|
|
// expression r is found in the full text index. At most n matches are
|
|
// returned (thus found <= n).
|
|
//
|
|
func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
|
|
if x.suffixes == nil || n <= 0 {
|
|
return
|
|
}
|
|
// n > 0
|
|
|
|
var list positionList
|
|
// FindAllIndex may returns matches that span across file boundaries.
|
|
// Such matches are unlikely, buf after eliminating them we may end up
|
|
// with fewer than n matches. If we don't have enough at the end, redo
|
|
// the search with an increased value n1, but only if FindAllIndex
|
|
// returned all the requested matches in the first place (if it
|
|
// returned fewer than that there cannot be more).
|
|
for n1 := n; found < n; n1 += n - found {
|
|
found = 0
|
|
matches := x.suffixes.FindAllIndex(r, n1)
|
|
// compute files, exclude matches that span file boundaries,
|
|
// and map offsets to file-local offsets
|
|
list = make(positionList, len(matches))
|
|
for _, m := range matches {
|
|
// by construction, an offset corresponds to the Pos value
|
|
// for the file set - use it to get the file and line
|
|
p := token.Pos(m[0])
|
|
if file := x.fset.File(p); file != nil {
|
|
if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
|
|
// match [m[0], m[1]) is within the file boundaries
|
|
list[found].filename = file.Name()
|
|
list[found].line = file.Line(p)
|
|
found++
|
|
}
|
|
}
|
|
}
|
|
if found == n || len(matches) < n1 {
|
|
// found all matches or there's no chance to find more
|
|
break
|
|
}
|
|
}
|
|
list = list[0:found]
|
|
sort.Sort(list) // sort by filename
|
|
|
|
// collect matches belonging to the same file
|
|
var last string
|
|
var lines []int
|
|
addLines := func() {
|
|
if len(lines) > 0 {
|
|
// remove duplicate lines
|
|
result = append(result, FileLines{last, unique(lines)})
|
|
lines = nil
|
|
}
|
|
}
|
|
for _, m := range list {
|
|
if m.filename != last {
|
|
addLines()
|
|
last = m.filename
|
|
}
|
|
lines = append(lines, m.line)
|
|
}
|
|
addLines()
|
|
|
|
return
|
|
}
|
|
|
|
// InvalidateIndex should be called whenever any of the file systems
|
|
// under godoc's observation change so that the indexer is kicked on.
|
|
func (c *Corpus) invalidateIndex() {
|
|
c.fsModified.Set(nil)
|
|
c.refreshMetadata()
|
|
}
|
|
|
|
// feedDirnames feeds the directory names of all directories
|
|
// under the file system given by root to channel c.
|
|
//
|
|
func (c *Corpus) feedDirnames(ch chan<- string) {
|
|
if dir, _ := c.fsTree.Get(); dir != nil {
|
|
for d := range dir.(*Directory).iter(false) {
|
|
ch <- d.Path
|
|
}
|
|
}
|
|
}
|
|
|
|
// fsDirnames() returns a channel sending all directory names
|
|
// of all the file systems under godoc's observation.
|
|
//
|
|
func (c *Corpus) fsDirnames() <-chan string {
|
|
ch := make(chan string, 256) // buffered for fewer context switches
|
|
go func() {
|
|
c.feedDirnames(ch)
|
|
close(ch)
|
|
}()
|
|
return ch
|
|
}
|
|
|
|
// CompatibleWith reports whether the Index x is compatible with the corpus
|
|
// indexing options set in c.
|
|
func (x *Index) CompatibleWith(c *Corpus) bool {
|
|
return x.opts.Docs == c.IndexDocs &&
|
|
x.opts.GoCode == c.IndexGoCode &&
|
|
x.opts.FullText == c.IndexFullText &&
|
|
x.opts.MaxResults == c.MaxResults
|
|
}
|
|
|
|
func (c *Corpus) readIndex(filenames string) error {
|
|
matches, err := filepath.Glob(filenames)
|
|
if err != nil {
|
|
return err
|
|
} else if matches == nil {
|
|
return fmt.Errorf("no index files match %q", filenames)
|
|
}
|
|
sort.Strings(matches) // make sure files are in the right order
|
|
files := make([]io.Reader, 0, len(matches))
|
|
for _, filename := range matches {
|
|
f, err := os.Open(filename)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
files = append(files, f)
|
|
}
|
|
return c.ReadIndexFrom(io.MultiReader(files...))
|
|
}
|
|
|
|
// ReadIndexFrom sets the current index from the serialized version found in r.
|
|
func (c *Corpus) ReadIndexFrom(r io.Reader) error {
|
|
x := new(Index)
|
|
if _, err := x.ReadFrom(r); err != nil {
|
|
return err
|
|
}
|
|
if !x.CompatibleWith(c) {
|
|
return fmt.Errorf("index file options are incompatible: %v", x.opts)
|
|
}
|
|
c.searchIndex.Set(x)
|
|
return nil
|
|
}
|
|
|
|
func (c *Corpus) UpdateIndex() {
|
|
if c.Verbose {
|
|
log.Printf("updating index...")
|
|
}
|
|
start := time.Now()
|
|
index := c.NewIndex()
|
|
stop := time.Now()
|
|
c.searchIndex.Set(index)
|
|
if c.Verbose {
|
|
secs := stop.Sub(start).Seconds()
|
|
stats := index.Stats()
|
|
log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
|
|
secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
|
|
}
|
|
memstats := new(runtime.MemStats)
|
|
runtime.ReadMemStats(memstats)
|
|
if c.Verbose {
|
|
log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
|
|
}
|
|
runtime.GC()
|
|
runtime.ReadMemStats(memstats)
|
|
if c.Verbose {
|
|
log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
|
|
}
|
|
}
|
|
|
|
// RunIndexer runs forever, indexing.
|
|
func (c *Corpus) RunIndexer() {
|
|
// initialize the index from disk if possible
|
|
if c.IndexFiles != "" {
|
|
c.initFSTree()
|
|
if err := c.readIndex(c.IndexFiles); err != nil {
|
|
log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
// Repeatedly update the package directory tree and index.
|
|
// TODO(bgarcia): Use fsnotify to only update when notified of a filesystem change.
|
|
for {
|
|
c.initFSTree()
|
|
c.UpdateIndex()
|
|
if c.IndexInterval < 0 {
|
|
return
|
|
}
|
|
delay := 5 * time.Minute // by default, reindex every 5 minutes
|
|
if c.IndexInterval > 0 {
|
|
delay = c.IndexInterval
|
|
}
|
|
time.Sleep(delay)
|
|
}
|
|
}
|
|
|
|
type countingWriter struct {
|
|
n *int64
|
|
w io.Writer
|
|
}
|
|
|
|
func (c countingWriter) Write(p []byte) (n int, err error) {
|
|
n, err = c.w.Write(p)
|
|
*c.n += int64(n)
|
|
return
|
|
}
|
|
|
|
type byteReader interface {
|
|
io.Reader
|
|
io.ByteReader
|
|
}
|
|
|
|
type countingReader struct {
|
|
n *int64
|
|
r byteReader
|
|
}
|
|
|
|
func (c countingReader) Read(p []byte) (n int, err error) {
|
|
n, err = c.r.Read(p)
|
|
*c.n += int64(n)
|
|
return
|
|
}
|
|
|
|
func (c countingReader) ReadByte() (b byte, err error) {
|
|
b, err = c.r.ReadByte()
|
|
*c.n += 1
|
|
return
|
|
}
|