You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1581 lines
42 KiB

  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // This file contains the infrastructure to create an
  5. // identifier and full-text index for a set of Go files.
  6. //
  7. // Algorithm for identifier index:
  8. // - traverse all .go files of the file tree specified by root
  9. // - for each identifier (word) encountered, collect all occurrences (spots)
  10. // into a list; this produces a list of spots for each word
  11. // - reduce the lists: from a list of spots to a list of FileRuns,
  12. // and from a list of FileRuns into a list of PakRuns
  13. // - make a HitList from the PakRuns
  14. //
  15. // Details:
  16. // - keep two lists per word: one containing package-level declarations
  17. // that have snippets, and one containing all other spots
  18. // - keep the snippets in a separate table indexed by snippet index
  19. // and store the snippet index in place of the line number in a SpotInfo
  20. // (the line number for spots with snippets is stored in the snippet)
  21. // - at the end, create lists of alternative spellings for a given
  22. // word
  23. //
  24. // Algorithm for full text index:
  25. // - concatenate all source code in a byte buffer (in memory)
  26. // - add the files to a file set in lockstep as they are added to the byte
  27. // buffer such that a byte buffer offset corresponds to the Pos value for
  28. // that file location
  29. // - create a suffix array from the concatenated sources
  30. //
  31. // String lookup in full text index:
  32. // - use the suffix array to lookup a string's offsets - the offsets
  33. // correspond to the Pos values relative to the file set
  34. // - translate the Pos values back into file and line information and
  35. // sort the result
  36. package godoc
  37. import (
  38. "bufio"
  39. "bytes"
  40. "encoding/gob"
  41. "errors"
  42. "fmt"
  43. "go/ast"
  44. "go/doc"
  45. "go/parser"
  46. "go/token"
  47. "index/suffixarray"
  48. "io"
  49. "log"
  50. "os"
  51. pathpkg "path"
  52. "path/filepath"
  53. "regexp"
  54. "runtime"
  55. "sort"
  56. "strconv"
  57. "strings"
  58. "sync"
  59. "time"
  60. "unicode"
  61. "golang.org/x/tools/godoc/util"
  62. "golang.org/x/tools/godoc/vfs"
  63. )
  64. // ----------------------------------------------------------------------------
  65. // InterfaceSlice is a helper type for sorting interface
  66. // slices according to some slice-specific sort criteria.
  67. type comparer func(x, y interface{}) bool
  68. type interfaceSlice struct {
  69. slice []interface{}
  70. less comparer
  71. }
  72. // ----------------------------------------------------------------------------
  73. // RunList
  74. // A RunList is a list of entries that can be sorted according to some
  75. // criteria. A RunList may be compressed by grouping "runs" of entries
  76. // which are equal (according to the sort critera) into a new RunList of
  77. // runs. For instance, a RunList containing pairs (x, y) may be compressed
  78. // into a RunList containing pair runs (x, {y}) where each run consists of
  79. // a list of y's with the same x.
  80. type RunList []interface{}
  81. func (h RunList) sort(less comparer) {
  82. sort.Sort(&interfaceSlice{h, less})
  83. }
  84. func (p *interfaceSlice) Len() int { return len(p.slice) }
  85. func (p *interfaceSlice) Less(i, j int) bool { return p.less(p.slice[i], p.slice[j]) }
  86. func (p *interfaceSlice) Swap(i, j int) { p.slice[i], p.slice[j] = p.slice[j], p.slice[i] }
  87. // Compress entries which are the same according to a sort criteria
  88. // (specified by less) into "runs".
  89. func (h RunList) reduce(less comparer, newRun func(h RunList) interface{}) RunList {
  90. if len(h) == 0 {
  91. return nil
  92. }
  93. // len(h) > 0
  94. // create runs of entries with equal values
  95. h.sort(less)
  96. // for each run, make a new run object and collect them in a new RunList
  97. var hh RunList
  98. i, x := 0, h[0]
  99. for j, y := range h {
  100. if less(x, y) {
  101. hh = append(hh, newRun(h[i:j]))
  102. i, x = j, h[j] // start a new run
  103. }
  104. }
  105. // add final run, if any
  106. if i < len(h) {
  107. hh = append(hh, newRun(h[i:]))
  108. }
  109. return hh
  110. }
  111. // ----------------------------------------------------------------------------
  112. // KindRun
  113. // Debugging support. Disable to see multiple entries per line.
  114. const removeDuplicates = true
  115. // A KindRun is a run of SpotInfos of the same kind in a given file.
  116. // The kind (3 bits) is stored in each SpotInfo element; to find the
  117. // kind of a KindRun, look at any of its elements.
  118. type KindRun []SpotInfo
  119. // KindRuns are sorted by line number or index. Since the isIndex bit
  120. // is always the same for all infos in one list we can compare lori's.
  121. func (k KindRun) Len() int { return len(k) }
  122. func (k KindRun) Less(i, j int) bool { return k[i].Lori() < k[j].Lori() }
  123. func (k KindRun) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
  124. // FileRun contents are sorted by Kind for the reduction into KindRuns.
  125. func lessKind(x, y interface{}) bool { return x.(SpotInfo).Kind() < y.(SpotInfo).Kind() }
  126. // newKindRun allocates a new KindRun from the SpotInfo run h.
  127. func newKindRun(h RunList) interface{} {
  128. run := make(KindRun, len(h))
  129. for i, x := range h {
  130. run[i] = x.(SpotInfo)
  131. }
  132. // Spots were sorted by file and kind to create this run.
  133. // Within this run, sort them by line number or index.
  134. sort.Sort(run)
  135. if removeDuplicates {
  136. // Since both the lori and kind field must be
  137. // same for duplicates, and since the isIndex
  138. // bit is always the same for all infos in one
  139. // list we can simply compare the entire info.
  140. k := 0
  141. prev := SpotInfo(1<<32 - 1) // an unlikely value
  142. for _, x := range run {
  143. if x != prev {
  144. run[k] = x
  145. k++
  146. prev = x
  147. }
  148. }
  149. run = run[0:k]
  150. }
  151. return run
  152. }
  153. // ----------------------------------------------------------------------------
  154. // FileRun
  155. // A Pak describes a Go package.
  156. type Pak struct {
  157. Path string // path of directory containing the package
  158. Name string // package name as declared by package clause
  159. }
  160. // Paks are sorted by name (primary key) and by import path (secondary key).
  161. func (p *Pak) less(q *Pak) bool {
  162. return p.Name < q.Name || p.Name == q.Name && p.Path < q.Path
  163. }
  164. // A File describes a Go file.
  165. type File struct {
  166. Name string // directory-local file name
  167. Pak *Pak // the package to which the file belongs
  168. }
  169. // Path returns the file path of f.
  170. func (f *File) Path() string {
  171. return pathpkg.Join(f.Pak.Path, f.Name)
  172. }
  173. // A Spot describes a single occurrence of a word.
  174. type Spot struct {
  175. File *File
  176. Info SpotInfo
  177. }
  178. // A FileRun is a list of KindRuns belonging to the same file.
  179. type FileRun struct {
  180. File *File
  181. Groups []KindRun
  182. }
  183. // Spots are sorted by file path for the reduction into FileRuns.
  184. func lessSpot(x, y interface{}) bool {
  185. fx := x.(Spot).File
  186. fy := y.(Spot).File
  187. // same as "return fx.Path() < fy.Path()" but w/o computing the file path first
  188. px := fx.Pak.Path
  189. py := fy.Pak.Path
  190. return px < py || px == py && fx.Name < fy.Name
  191. }
  192. // newFileRun allocates a new FileRun from the Spot run h.
  193. func newFileRun(h RunList) interface{} {
  194. file := h[0].(Spot).File
  195. // reduce the list of Spots into a list of KindRuns
  196. h1 := make(RunList, len(h))
  197. for i, x := range h {
  198. h1[i] = x.(Spot).Info
  199. }
  200. h2 := h1.reduce(lessKind, newKindRun)
  201. // create the FileRun
  202. groups := make([]KindRun, len(h2))
  203. for i, x := range h2 {
  204. groups[i] = x.(KindRun)
  205. }
  206. return &FileRun{file, groups}
  207. }
  208. // ----------------------------------------------------------------------------
  209. // PakRun
  210. // A PakRun describes a run of *FileRuns of a package.
  211. type PakRun struct {
  212. Pak *Pak
  213. Files []*FileRun
  214. }
  215. // Sorting support for files within a PakRun.
  216. func (p *PakRun) Len() int { return len(p.Files) }
  217. func (p *PakRun) Less(i, j int) bool { return p.Files[i].File.Name < p.Files[j].File.Name }
  218. func (p *PakRun) Swap(i, j int) { p.Files[i], p.Files[j] = p.Files[j], p.Files[i] }
  219. // FileRuns are sorted by package for the reduction into PakRuns.
  220. func lessFileRun(x, y interface{}) bool {
  221. return x.(*FileRun).File.Pak.less(y.(*FileRun).File.Pak)
  222. }
  223. // newPakRun allocates a new PakRun from the *FileRun run h.
  224. func newPakRun(h RunList) interface{} {
  225. pak := h[0].(*FileRun).File.Pak
  226. files := make([]*FileRun, len(h))
  227. for i, x := range h {
  228. files[i] = x.(*FileRun)
  229. }
  230. run := &PakRun{pak, files}
  231. sort.Sort(run) // files were sorted by package; sort them by file now
  232. return run
  233. }
  234. // ----------------------------------------------------------------------------
  235. // HitList
  236. // A HitList describes a list of PakRuns.
  237. type HitList []*PakRun
  238. // PakRuns are sorted by package.
  239. func lessPakRun(x, y interface{}) bool { return x.(*PakRun).Pak.less(y.(*PakRun).Pak) }
  240. func reduce(h0 RunList) HitList {
  241. // reduce a list of Spots into a list of FileRuns
  242. h1 := h0.reduce(lessSpot, newFileRun)
  243. // reduce a list of FileRuns into a list of PakRuns
  244. h2 := h1.reduce(lessFileRun, newPakRun)
  245. // sort the list of PakRuns by package
  246. h2.sort(lessPakRun)
  247. // create a HitList
  248. h := make(HitList, len(h2))
  249. for i, p := range h2 {
  250. h[i] = p.(*PakRun)
  251. }
  252. return h
  253. }
  254. // filter returns a new HitList created by filtering
  255. // all PakRuns from h that have a matching pakname.
  256. func (h HitList) filter(pakname string) HitList {
  257. var hh HitList
  258. for _, p := range h {
  259. if p.Pak.Name == pakname {
  260. hh = append(hh, p)
  261. }
  262. }
  263. return hh
  264. }
  265. // ----------------------------------------------------------------------------
  266. // AltWords
  267. type wordPair struct {
  268. canon string // canonical word spelling (all lowercase)
  269. alt string // alternative spelling
  270. }
  271. // An AltWords describes a list of alternative spellings for a
  272. // canonical (all lowercase) spelling of a word.
  273. type AltWords struct {
  274. Canon string // canonical word spelling (all lowercase)
  275. Alts []string // alternative spelling for the same word
  276. }
  277. // wordPairs are sorted by their canonical spelling.
  278. func lessWordPair(x, y interface{}) bool { return x.(*wordPair).canon < y.(*wordPair).canon }
  279. // newAltWords allocates a new AltWords from the *wordPair run h.
  280. func newAltWords(h RunList) interface{} {
  281. canon := h[0].(*wordPair).canon
  282. alts := make([]string, len(h))
  283. for i, x := range h {
  284. alts[i] = x.(*wordPair).alt
  285. }
  286. return &AltWords{canon, alts}
  287. }
  288. func (a *AltWords) filter(s string) *AltWords {
  289. var alts []string
  290. for _, w := range a.Alts {
  291. if w != s {
  292. alts = append(alts, w)
  293. }
  294. }
  295. if len(alts) > 0 {
  296. return &AltWords{a.Canon, alts}
  297. }
  298. return nil
  299. }
  300. // Ident stores information about external identifiers in order to create
  301. // links to package documentation.
  302. type Ident struct {
  303. Path string // e.g. "net/http"
  304. Package string // e.g. "http"
  305. Name string // e.g. "NewRequest"
  306. Doc string // e.g. "NewRequest returns a new Request..."
  307. }
  308. // byImportCount sorts the given slice of Idents by the import
  309. // counts of the packages to which they belong.
  310. type byImportCount struct {
  311. Idents []Ident
  312. ImportCount map[string]int
  313. }
  314. func (ic byImportCount) Len() int {
  315. return len(ic.Idents)
  316. }
  317. func (ic byImportCount) Less(i, j int) bool {
  318. ri := ic.ImportCount[ic.Idents[i].Path]
  319. rj := ic.ImportCount[ic.Idents[j].Path]
  320. if ri == rj {
  321. return ic.Idents[i].Path < ic.Idents[j].Path
  322. }
  323. return ri > rj
  324. }
  325. func (ic byImportCount) Swap(i, j int) {
  326. ic.Idents[i], ic.Idents[j] = ic.Idents[j], ic.Idents[i]
  327. }
  328. func (ic byImportCount) String() string {
  329. buf := bytes.NewBuffer([]byte("["))
  330. for _, v := range ic.Idents {
  331. buf.WriteString(fmt.Sprintf("\n\t%s, %s (%d)", v.Path, v.Name, ic.ImportCount[v.Path]))
  332. }
  333. buf.WriteString("\n]")
  334. return buf.String()
  335. }
  336. // filter creates a new Ident list where the results match the given
  337. // package name.
  338. func (ic byImportCount) filter(pakname string) []Ident {
  339. if ic.Idents == nil {
  340. return nil
  341. }
  342. var res []Ident
  343. for _, i := range ic.Idents {
  344. if i.Package == pakname {
  345. res = append(res, i)
  346. }
  347. }
  348. return res
  349. }
  350. // top returns the top n identifiers.
  351. func (ic byImportCount) top(n int) []Ident {
  352. if len(ic.Idents) > n {
  353. return ic.Idents[:n]
  354. }
  355. return ic.Idents
  356. }
  357. // ----------------------------------------------------------------------------
  358. // Indexer
  359. type IndexResult struct {
  360. Decls RunList // package-level declarations (with snippets)
  361. Others RunList // all other occurrences
  362. }
  363. // Statistics provides statistics information for an index.
  364. type Statistics struct {
  365. Bytes int // total size of indexed source files
  366. Files int // number of indexed source files
  367. Lines int // number of lines (all files)
  368. Words int // number of different identifiers
  369. Spots int // number of identifier occurrences
  370. }
  371. // An Indexer maintains the data structures and provides the machinery
  372. // for indexing .go files under a file tree. It implements the path.Visitor
  373. // interface for walking file trees, and the ast.Visitor interface for
  374. // walking Go ASTs.
  375. type Indexer struct {
  376. c *Corpus
  377. fset *token.FileSet // file set for all indexed files
  378. fsOpenGate chan bool // send pre fs.Open; receive on close
  379. mu sync.Mutex // guards all the following
  380. sources bytes.Buffer // concatenated sources
  381. strings map[string]string // interned string
  382. packages map[Pak]*Pak // interned *Paks
  383. words map[string]*IndexResult // RunLists of Spots
  384. snippets []*Snippet // indices are stored in SpotInfos
  385. current *token.File // last file added to file set
  386. file *File // AST for current file
  387. decl ast.Decl // AST for current decl
  388. stats Statistics
  389. throttle *util.Throttle
  390. importCount map[string]int // package path ("net/http") => count
  391. packagePath map[string]map[string]bool // "template" => "text/template" => true
  392. exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
  393. curPkgExports map[string]SpotKind
  394. idents map[SpotKind]map[string][]Ident // kind => name => list of Idents
  395. }
  396. func (x *Indexer) intern(s string) string {
  397. if s, ok := x.strings[s]; ok {
  398. return s
  399. }
  400. x.strings[s] = s
  401. return s
  402. }
  403. func (x *Indexer) lookupPackage(path, name string) *Pak {
  404. // In the source directory tree, more than one package may
  405. // live in the same directory. For the packages map, construct
  406. // a key that includes both the directory path and the package
  407. // name.
  408. key := Pak{Path: x.intern(path), Name: x.intern(name)}
  409. pak := x.packages[key]
  410. if pak == nil {
  411. pak = &key
  412. x.packages[key] = pak
  413. }
  414. return pak
  415. }
  416. func (x *Indexer) addSnippet(s *Snippet) int {
  417. index := len(x.snippets)
  418. x.snippets = append(x.snippets, s)
  419. return index
  420. }
  421. func (x *Indexer) visitIdent(kind SpotKind, id *ast.Ident) {
  422. if id == nil {
  423. return
  424. }
  425. name := x.intern(id.Name)
  426. switch kind {
  427. case TypeDecl, FuncDecl, ConstDecl, VarDecl:
  428. x.curPkgExports[name] = kind
  429. }
  430. lists, found := x.words[name]
  431. if !found {
  432. lists = new(IndexResult)
  433. x.words[name] = lists
  434. }
  435. if kind == Use || x.decl == nil {
  436. if x.c.IndexGoCode {
  437. // not a declaration or no snippet required
  438. info := makeSpotInfo(kind, x.current.Line(id.Pos()), false)
  439. lists.Others = append(lists.Others, Spot{x.file, info})
  440. }
  441. } else {
  442. // a declaration with snippet
  443. index := x.addSnippet(NewSnippet(x.fset, x.decl, id))
  444. info := makeSpotInfo(kind, index, true)
  445. lists.Decls = append(lists.Decls, Spot{x.file, info})
  446. }
  447. x.stats.Spots++
  448. }
  449. func (x *Indexer) visitFieldList(kind SpotKind, flist *ast.FieldList) {
  450. for _, f := range flist.List {
  451. x.decl = nil // no snippets for fields
  452. for _, name := range f.Names {
  453. x.visitIdent(kind, name)
  454. }
  455. ast.Walk(x, f.Type)
  456. // ignore tag - not indexed at the moment
  457. }
  458. }
  459. func (x *Indexer) visitSpec(kind SpotKind, spec ast.Spec) {
  460. switch n := spec.(type) {
  461. case *ast.ImportSpec:
  462. x.visitIdent(ImportDecl, n.Name)
  463. if n.Path != nil {
  464. if imp, err := strconv.Unquote(n.Path.Value); err == nil {
  465. x.importCount[x.intern(imp)]++
  466. }
  467. }
  468. case *ast.ValueSpec:
  469. for _, n := range n.Names {
  470. x.visitIdent(kind, n)
  471. }
  472. ast.Walk(x, n.Type)
  473. for _, v := range n.Values {
  474. ast.Walk(x, v)
  475. }
  476. case *ast.TypeSpec:
  477. x.visitIdent(TypeDecl, n.Name)
  478. ast.Walk(x, n.Type)
  479. }
  480. }
  481. func (x *Indexer) visitGenDecl(decl *ast.GenDecl) {
  482. kind := VarDecl
  483. if decl.Tok == token.CONST {
  484. kind = ConstDecl
  485. }
  486. x.decl = decl
  487. for _, s := range decl.Specs {
  488. x.visitSpec(kind, s)
  489. }
  490. }
  491. func (x *Indexer) Visit(node ast.Node) ast.Visitor {
  492. switch n := node.(type) {
  493. case nil:
  494. // nothing to do
  495. case *ast.Ident:
  496. x.visitIdent(Use, n)
  497. case *ast.FieldList:
  498. x.visitFieldList(VarDecl, n)
  499. case *ast.InterfaceType:
  500. x.visitFieldList(MethodDecl, n.Methods)
  501. case *ast.DeclStmt:
  502. // local declarations should only be *ast.GenDecls;
  503. // ignore incorrect ASTs
  504. if decl, ok := n.Decl.(*ast.GenDecl); ok {
  505. x.decl = nil // no snippets for local declarations
  506. x.visitGenDecl(decl)
  507. }
  508. case *ast.GenDecl:
  509. x.decl = n
  510. x.visitGenDecl(n)
  511. case *ast.FuncDecl:
  512. kind := FuncDecl
  513. if n.Recv != nil {
  514. kind = MethodDecl
  515. ast.Walk(x, n.Recv)
  516. }
  517. x.decl = n
  518. x.visitIdent(kind, n.Name)
  519. ast.Walk(x, n.Type)
  520. if n.Body != nil {
  521. ast.Walk(x, n.Body)
  522. }
  523. case *ast.File:
  524. x.decl = nil
  525. x.visitIdent(PackageClause, n.Name)
  526. for _, d := range n.Decls {
  527. ast.Walk(x, d)
  528. }
  529. default:
  530. return x
  531. }
  532. return nil
  533. }
  534. // addFile adds a file to the index if possible and returns the file set file
  535. // and the file's AST if it was successfully parsed as a Go file. If addFile
  536. // failed (that is, if the file was not added), it returns file == nil.
  537. func (x *Indexer) addFile(f vfs.ReadSeekCloser, filename string, goFile bool) (file *token.File, ast *ast.File) {
  538. defer f.Close()
  539. // The file set's base offset and x.sources size must be in lock-step;
  540. // this permits the direct mapping of suffix array lookup results to
  541. // to corresponding Pos values.
  542. //
  543. // When a file is added to the file set, its offset base increases by
  544. // the size of the file + 1; and the initial base offset is 1. Add an
  545. // extra byte to the sources here.
  546. x.sources.WriteByte(0)
  547. // If the sources length doesn't match the file set base at this point
  548. // the file set implementation changed or we have another error.
  549. base := x.fset.Base()
  550. if x.sources.Len() != base {
  551. panic("internal error: file base incorrect")
  552. }
  553. // append file contents (src) to x.sources
  554. if _, err := x.sources.ReadFrom(f); err == nil {
  555. src := x.sources.Bytes()[base:]
  556. if goFile {
  557. // parse the file and in the process add it to the file set
  558. if ast, err = parser.ParseFile(x.fset, filename, src, parser.ParseComments); err == nil {
  559. file = x.fset.File(ast.Pos()) // ast.Pos() is inside the file
  560. return
  561. }
  562. // file has parse errors, and the AST may be incorrect -
  563. // set lines information explicitly and index as ordinary
  564. // text file (cannot fall through to the text case below
  565. // because the file has already been added to the file set
  566. // by the parser)
  567. file = x.fset.File(token.Pos(base)) // token.Pos(base) is inside the file
  568. file.SetLinesForContent(src)
  569. ast = nil
  570. return
  571. }
  572. if util.IsText(src) {
  573. // only add the file to the file set (for the full text index)
  574. file = x.fset.AddFile(filename, x.fset.Base(), len(src))
  575. file.SetLinesForContent(src)
  576. return
  577. }
  578. }
  579. // discard possibly added data
  580. x.sources.Truncate(base - 1) // -1 to remove added byte 0 since no file was added
  581. return
  582. }
  583. // Design note: Using an explicit white list of permitted files for indexing
  584. // makes sure that the important files are included and massively reduces the
  585. // number of files to index. The advantage over a blacklist is that unexpected
  586. // (non-blacklisted) files won't suddenly explode the index.
  587. // Files are whitelisted if they have a file name or extension
  588. // present as key in whitelisted.
  589. var whitelisted = map[string]bool{
  590. ".bash": true,
  591. ".c": true,
  592. ".cc": true,
  593. ".cpp": true,
  594. ".cxx": true,
  595. ".css": true,
  596. ".go": true,
  597. ".goc": true,
  598. ".h": true,
  599. ".hh": true,
  600. ".hpp": true,
  601. ".hxx": true,
  602. ".html": true,
  603. ".js": true,
  604. ".out": true,
  605. ".py": true,
  606. ".s": true,
  607. ".sh": true,
  608. ".txt": true,
  609. ".xml": true,
  610. "AUTHORS": true,
  611. "CONTRIBUTORS": true,
  612. "LICENSE": true,
  613. "Makefile": true,
  614. "PATENTS": true,
  615. "README": true,
  616. }
  617. // isWhitelisted returns true if a file is on the list
  618. // of "permitted" files for indexing. The filename must
  619. // be the directory-local name of the file.
  620. func isWhitelisted(filename string) bool {
  621. key := pathpkg.Ext(filename)
  622. if key == "" {
  623. // file has no extension - use entire filename
  624. key = filename
  625. }
  626. return whitelisted[key]
  627. }
  628. func (x *Indexer) indexDocs(dirname string, filename string, astFile *ast.File) {
  629. pkgName := x.intern(astFile.Name.Name)
  630. if pkgName == "main" {
  631. return
  632. }
  633. pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
  634. astPkg := ast.Package{
  635. Name: pkgName,
  636. Files: map[string]*ast.File{
  637. filename: astFile,
  638. },
  639. }
  640. var m doc.Mode
  641. docPkg := doc.New(&astPkg, dirname, m)
  642. addIdent := func(sk SpotKind, name string, docstr string) {
  643. if x.idents[sk] == nil {
  644. x.idents[sk] = make(map[string][]Ident)
  645. }
  646. name = x.intern(name)
  647. x.idents[sk][name] = append(x.idents[sk][name], Ident{
  648. Path: pkgPath,
  649. Package: pkgName,
  650. Name: name,
  651. Doc: doc.Synopsis(docstr),
  652. })
  653. }
  654. if x.idents[PackageClause] == nil {
  655. x.idents[PackageClause] = make(map[string][]Ident)
  656. }
  657. // List of words under which the package identifier will be stored.
  658. // This includes the package name and the components of the directory
  659. // in which it resides.
  660. words := strings.Split(pathpkg.Dir(pkgPath), "/")
  661. if words[0] == "." {
  662. words = []string{}
  663. }
  664. name := x.intern(docPkg.Name)
  665. synopsis := doc.Synopsis(docPkg.Doc)
  666. words = append(words, name)
  667. pkgIdent := Ident{
  668. Path: pkgPath,
  669. Package: pkgName,
  670. Name: name,
  671. Doc: synopsis,
  672. }
  673. for _, word := range words {
  674. word = x.intern(word)
  675. found := false
  676. pkgs := x.idents[PackageClause][word]
  677. for i, p := range pkgs {
  678. if p.Path == pkgPath {
  679. if docPkg.Doc != "" {
  680. p.Doc = synopsis
  681. pkgs[i] = p
  682. }
  683. found = true
  684. break
  685. }
  686. }
  687. if !found {
  688. x.idents[PackageClause][word] = append(x.idents[PackageClause][word], pkgIdent)
  689. }
  690. }
  691. for _, c := range docPkg.Consts {
  692. for _, name := range c.Names {
  693. addIdent(ConstDecl, name, c.Doc)
  694. }
  695. }
  696. for _, t := range docPkg.Types {
  697. addIdent(TypeDecl, t.Name, t.Doc)
  698. for _, c := range t.Consts {
  699. for _, name := range c.Names {
  700. addIdent(ConstDecl, name, c.Doc)
  701. }
  702. }
  703. for _, v := range t.Vars {
  704. for _, name := range v.Names {
  705. addIdent(VarDecl, name, v.Doc)
  706. }
  707. }
  708. for _, f := range t.Funcs {
  709. addIdent(FuncDecl, f.Name, f.Doc)
  710. }
  711. for _, f := range t.Methods {
  712. addIdent(MethodDecl, f.Name, f.Doc)
  713. // Change the name of methods to be "<typename>.<methodname>".
  714. // They will still be indexed as <methodname>.
  715. idents := x.idents[MethodDecl][f.Name]
  716. idents[len(idents)-1].Name = x.intern(t.Name + "." + f.Name)
  717. }
  718. }
  719. for _, v := range docPkg.Vars {
  720. for _, name := range v.Names {
  721. addIdent(VarDecl, name, v.Doc)
  722. }
  723. }
  724. for _, f := range docPkg.Funcs {
  725. addIdent(FuncDecl, f.Name, f.Doc)
  726. }
  727. }
  728. func (x *Indexer) indexGoFile(dirname string, filename string, file *token.File, astFile *ast.File) {
  729. pkgName := astFile.Name.Name
  730. if x.c.IndexGoCode {
  731. x.current = file
  732. pak := x.lookupPackage(dirname, pkgName)
  733. x.file = &File{filename, pak}
  734. ast.Walk(x, astFile)
  735. }
  736. if x.c.IndexDocs {
  737. // Test files are already filtered out in visitFile if IndexGoCode and
  738. // IndexFullText are false. Otherwise, check here.
  739. isTestFile := (x.c.IndexGoCode || x.c.IndexFullText) &&
  740. (strings.HasSuffix(filename, "_test.go") || strings.HasPrefix(dirname, "/test/"))
  741. if !isTestFile {
  742. x.indexDocs(dirname, filename, astFile)
  743. }
  744. }
  745. ppKey := x.intern(pkgName)
  746. if _, ok := x.packagePath[ppKey]; !ok {
  747. x.packagePath[ppKey] = make(map[string]bool)
  748. }
  749. pkgPath := x.intern(strings.TrimPrefix(strings.TrimPrefix(dirname, "/src/"), "pkg/"))
  750. x.packagePath[ppKey][pkgPath] = true
  751. // Merge in exported symbols found walking this file into
  752. // the map for that package.
  753. if len(x.curPkgExports) > 0 {
  754. dest, ok := x.exports[pkgPath]
  755. if !ok {
  756. dest = make(map[string]SpotKind)
  757. x.exports[pkgPath] = dest
  758. }
  759. for k, v := range x.curPkgExports {
  760. dest[k] = v
  761. }
  762. }
  763. }
  764. func (x *Indexer) visitFile(dirname string, fi os.FileInfo) {
  765. if fi.IsDir() || !x.c.IndexEnabled {
  766. return
  767. }
  768. filename := pathpkg.Join(dirname, fi.Name())
  769. goFile := isGoFile(fi)
  770. switch {
  771. case x.c.IndexFullText:
  772. if !isWhitelisted(fi.Name()) {
  773. return
  774. }
  775. case x.c.IndexGoCode:
  776. if !goFile {
  777. return
  778. }
  779. case x.c.IndexDocs:
  780. if !goFile ||
  781. strings.HasSuffix(fi.Name(), "_test.go") ||
  782. strings.HasPrefix(dirname, "/test/") {
  783. return
  784. }
  785. default:
  786. // No indexing turned on.
  787. return
  788. }
  789. x.fsOpenGate <- true
  790. defer func() { <-x.fsOpenGate }()
  791. // open file
  792. f, err := x.c.fs.Open(filename)
  793. if err != nil {
  794. return
  795. }
  796. x.mu.Lock()
  797. defer x.mu.Unlock()
  798. x.throttle.Throttle()
  799. x.curPkgExports = make(map[string]SpotKind)
  800. file, fast := x.addFile(f, filename, goFile)
  801. if file == nil {
  802. return // addFile failed
  803. }
  804. if fast != nil {
  805. x.indexGoFile(dirname, fi.Name(), file, fast)
  806. }
  807. // update statistics
  808. x.stats.Bytes += file.Size()
  809. x.stats.Files++
  810. x.stats.Lines += file.LineCount()
  811. }
  812. // indexOptions contains information that affects the contents of an index.
  813. type indexOptions struct {
  814. // Docs provides documentation search results.
  815. // It is only consulted if IndexEnabled is true.
  816. // The default values is true.
  817. Docs bool
  818. // GoCode provides Go source code search results.
  819. // It is only consulted if IndexEnabled is true.
  820. // The default values is true.
  821. GoCode bool
  822. // FullText provides search results from all files.
  823. // It is only consulted if IndexEnabled is true.
  824. // The default values is true.
  825. FullText bool
  826. // MaxResults optionally specifies the maximum results for indexing.
  827. // The default is 1000.
  828. MaxResults int
  829. }
  830. // ----------------------------------------------------------------------------
  831. // Index
  832. type LookupResult struct {
  833. Decls HitList // package-level declarations (with snippets)
  834. Others HitList // all other occurrences
  835. }
  836. type Index struct {
  837. fset *token.FileSet // file set used during indexing; nil if no textindex
  838. suffixes *suffixarray.Index // suffixes for concatenated sources; nil if no textindex
  839. words map[string]*LookupResult // maps words to hit lists
  840. alts map[string]*AltWords // maps canonical(words) to lists of alternative spellings
  841. snippets []*Snippet // all snippets, indexed by snippet index
  842. stats Statistics
  843. importCount map[string]int // package path ("net/http") => count
  844. packagePath map[string]map[string]bool // "template" => "text/template" => true
  845. exports map[string]map[string]SpotKind // "net/http" => "ListenAndServe" => FuncDecl
  846. idents map[SpotKind]map[string][]Ident
  847. opts indexOptions
  848. }
  849. func canonical(w string) string { return strings.ToLower(w) }
  850. // Somewhat arbitrary, but I figure low enough to not hurt disk-based filesystems
  851. // consuming file descriptors, where some systems have low 256 or 512 limits.
  852. // Go should have a built-in way to cap fd usage under the ulimit.
  853. const (
  854. maxOpenFiles = 200
  855. maxOpenDirs = 50
  856. )
  857. func (c *Corpus) throttle() float64 {
  858. if c.IndexThrottle <= 0 {
  859. return 0.9
  860. }
  861. if c.IndexThrottle > 1.0 {
  862. return 1.0
  863. }
  864. return c.IndexThrottle
  865. }
  866. // NewIndex creates a new index for the .go files provided by the corpus.
  867. func (c *Corpus) NewIndex() *Index {
  868. // initialize Indexer
  869. // (use some reasonably sized maps to start)
  870. x := &Indexer{
  871. c: c,
  872. fset: token.NewFileSet(),
  873. fsOpenGate: make(chan bool, maxOpenFiles),
  874. strings: make(map[string]string),
  875. packages: make(map[Pak]*Pak, 256),
  876. words: make(map[string]*IndexResult, 8192),
  877. throttle: util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
  878. importCount: make(map[string]int),
  879. packagePath: make(map[string]map[string]bool),
  880. exports: make(map[string]map[string]SpotKind),
  881. idents: make(map[SpotKind]map[string][]Ident, 4),
  882. }
  883. // index all files in the directories given by dirnames
  884. var wg sync.WaitGroup // outstanding ReadDir + visitFile
  885. dirGate := make(chan bool, maxOpenDirs)
  886. for dirname := range c.fsDirnames() {
  887. if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
  888. continue
  889. }
  890. dirGate <- true
  891. wg.Add(1)
  892. go func(dirname string) {
  893. defer func() { <-dirGate }()
  894. defer wg.Done()
  895. list, err := c.fs.ReadDir(dirname)
  896. if err != nil {
  897. log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
  898. return // ignore this directory
  899. }
  900. for _, fi := range list {
  901. wg.Add(1)
  902. go func(fi os.FileInfo) {
  903. defer wg.Done()
  904. x.visitFile(dirname, fi)
  905. }(fi)
  906. }
  907. }(dirname)
  908. }
  909. wg.Wait()
  910. if !c.IndexFullText {
  911. // the file set, the current file, and the sources are
  912. // not needed after indexing if no text index is built -
  913. // help GC and clear them
  914. x.fset = nil
  915. x.sources.Reset()
  916. x.current = nil // contains reference to fset!
  917. }
  918. // for each word, reduce the RunLists into a LookupResult;
  919. // also collect the word with its canonical spelling in a
  920. // word list for later computation of alternative spellings
  921. words := make(map[string]*LookupResult)
  922. var wlist RunList
  923. for w, h := range x.words {
  924. decls := reduce(h.Decls)
  925. others := reduce(h.Others)
  926. words[w] = &LookupResult{
  927. Decls: decls,
  928. Others: others,
  929. }
  930. wlist = append(wlist, &wordPair{canonical(w), w})
  931. x.throttle.Throttle()
  932. }
  933. x.stats.Words = len(words)
  934. // reduce the word list {canonical(w), w} into
  935. // a list of AltWords runs {canonical(w), {w}}
  936. alist := wlist.reduce(lessWordPair, newAltWords)
  937. // convert alist into a map of alternative spellings
  938. alts := make(map[string]*AltWords)
  939. for i := 0; i < len(alist); i++ {
  940. a := alist[i].(*AltWords)
  941. alts[a.Canon] = a
  942. }
  943. // create text index
  944. var suffixes *suffixarray.Index
  945. if c.IndexFullText {
  946. suffixes = suffixarray.New(x.sources.Bytes())
  947. }
  948. // sort idents by the number of imports of their respective packages
  949. for _, idMap := range x.idents {
  950. for _, ir := range idMap {
  951. sort.Sort(byImportCount{ir, x.importCount})
  952. }
  953. }
  954. return &Index{
  955. fset: x.fset,
  956. suffixes: suffixes,
  957. words: words,
  958. alts: alts,
  959. snippets: x.snippets,
  960. stats: x.stats,
  961. importCount: x.importCount,
  962. packagePath: x.packagePath,
  963. exports: x.exports,
  964. idents: x.idents,
  965. opts: indexOptions{
  966. Docs: x.c.IndexDocs,
  967. GoCode: x.c.IndexGoCode,
  968. FullText: x.c.IndexFullText,
  969. MaxResults: x.c.MaxResults,
  970. },
  971. }
  972. }
  973. var ErrFileIndexVersion = errors.New("file index version out of date")
  974. const fileIndexVersion = 3
  975. // fileIndex is the subset of Index that's gob-encoded for use by
  976. // Index.Write and Index.Read.
  977. type fileIndex struct {
  978. Version int
  979. Words map[string]*LookupResult
  980. Alts map[string]*AltWords
  981. Snippets []*Snippet
  982. Fulltext bool
  983. Stats Statistics
  984. ImportCount map[string]int
  985. PackagePath map[string]map[string]bool
  986. Exports map[string]map[string]SpotKind
  987. Idents map[SpotKind]map[string][]Ident
  988. Opts indexOptions
  989. }
  990. func (x *fileIndex) Write(w io.Writer) error {
  991. return gob.NewEncoder(w).Encode(x)
  992. }
  993. func (x *fileIndex) Read(r io.Reader) error {
  994. return gob.NewDecoder(r).Decode(x)
  995. }
  996. // WriteTo writes the index x to w.
  997. func (x *Index) WriteTo(w io.Writer) (n int64, err error) {
  998. w = countingWriter{&n, w}
  999. fulltext := false
  1000. if x.suffixes != nil {
  1001. fulltext = true
  1002. }
  1003. fx := fileIndex{
  1004. Version: fileIndexVersion,
  1005. Words: x.words,
  1006. Alts: x.alts,
  1007. Snippets: x.snippets,
  1008. Fulltext: fulltext,
  1009. Stats: x.stats,
  1010. ImportCount: x.importCount,
  1011. PackagePath: x.packagePath,
  1012. Exports: x.exports,
  1013. Idents: x.idents,
  1014. Opts: x.opts,
  1015. }
  1016. if err := fx.Write(w); err != nil {
  1017. return 0, err
  1018. }
  1019. if fulltext {
  1020. encode := func(x interface{}) error {
  1021. return gob.NewEncoder(w).Encode(x)
  1022. }
  1023. if err := x.fset.Write(encode); err != nil {
  1024. return 0, err
  1025. }
  1026. if err := x.suffixes.Write(w); err != nil {
  1027. return 0, err
  1028. }
  1029. }
  1030. return n, nil
  1031. }
  1032. // ReadFrom reads the index from r into x; x must not be nil.
  1033. // If r does not also implement io.ByteReader, it will be wrapped in a bufio.Reader.
  1034. // If the index is from an old version, the error is ErrFileIndexVersion.
  1035. func (x *Index) ReadFrom(r io.Reader) (n int64, err error) {
  1036. // We use the ability to read bytes as a plausible surrogate for buffering.
  1037. if _, ok := r.(io.ByteReader); !ok {
  1038. r = bufio.NewReader(r)
  1039. }
  1040. r = countingReader{&n, r.(byteReader)}
  1041. var fx fileIndex
  1042. if err := fx.Read(r); err != nil {
  1043. return n, err
  1044. }
  1045. if fx.Version != fileIndexVersion {
  1046. return 0, ErrFileIndexVersion
  1047. }
  1048. x.words = fx.Words
  1049. x.alts = fx.Alts
  1050. x.snippets = fx.Snippets
  1051. x.stats = fx.Stats
  1052. x.importCount = fx.ImportCount
  1053. x.packagePath = fx.PackagePath
  1054. x.exports = fx.Exports
  1055. x.idents = fx.Idents
  1056. x.opts = fx.Opts
  1057. if fx.Fulltext {
  1058. x.fset = token.NewFileSet()
  1059. decode := func(x interface{}) error {
  1060. return gob.NewDecoder(r).Decode(x)
  1061. }
  1062. if err := x.fset.Read(decode); err != nil {
  1063. return n, err
  1064. }
  1065. x.suffixes = new(suffixarray.Index)
  1066. if err := x.suffixes.Read(r); err != nil {
  1067. return n, err
  1068. }
  1069. }
  1070. return n, nil
  1071. }
  1072. // Stats returns index statistics.
  1073. func (x *Index) Stats() Statistics {
  1074. return x.stats
  1075. }
  1076. // ImportCount returns a map from import paths to how many times they were seen.
  1077. func (x *Index) ImportCount() map[string]int {
  1078. return x.importCount
  1079. }
  1080. // PackagePath returns a map from short package name to a set
  1081. // of full package path names that use that short package name.
  1082. func (x *Index) PackagePath() map[string]map[string]bool {
  1083. return x.packagePath
  1084. }
  1085. // Exports returns a map from full package path to exported
  1086. // symbol name to its type.
  1087. func (x *Index) Exports() map[string]map[string]SpotKind {
  1088. return x.exports
  1089. }
  1090. // Idents returns a map from identifier type to exported
  1091. // symbol name to the list of identifiers matching that name.
  1092. func (x *Index) Idents() map[SpotKind]map[string][]Ident {
  1093. return x.idents
  1094. }
  1095. func (x *Index) lookupWord(w string) (match *LookupResult, alt *AltWords) {
  1096. match = x.words[w]
  1097. alt = x.alts[canonical(w)]
  1098. // remove current spelling from alternatives
  1099. // (if there is no match, the alternatives do
  1100. // not contain the current spelling)
  1101. if match != nil && alt != nil {
  1102. alt = alt.filter(w)
  1103. }
  1104. return
  1105. }
  1106. // isIdentifier reports whether s is a Go identifier.
  1107. func isIdentifier(s string) bool {
  1108. for i, ch := range s {
  1109. if unicode.IsLetter(ch) || ch == '_' || i > 0 && unicode.IsDigit(ch) {
  1110. continue
  1111. }
  1112. return false
  1113. }
  1114. return len(s) > 0
  1115. }
  1116. // For a given query, which is either a single identifier or a qualified
  1117. // identifier, Lookup returns a SearchResult containing packages, a LookupResult, a
  1118. // list of alternative spellings, and identifiers, if any. Any and all results
  1119. // may be nil. If the query syntax is wrong, an error is reported.
  1120. func (x *Index) Lookup(query string) (*SearchResult, error) {
  1121. ss := strings.Split(query, ".")
  1122. // check query syntax
  1123. for _, s := range ss {
  1124. if !isIdentifier(s) {
  1125. return nil, errors.New("all query parts must be identifiers")
  1126. }
  1127. }
  1128. rslt := &SearchResult{
  1129. Query: query,
  1130. Idents: make(map[SpotKind][]Ident, 5),
  1131. }
  1132. // handle simple and qualified identifiers
  1133. switch len(ss) {
  1134. case 1:
  1135. ident := ss[0]
  1136. rslt.Hit, rslt.Alt = x.lookupWord(ident)
  1137. if rslt.Hit != nil {
  1138. // found a match - filter packages with same name
  1139. // for the list of packages called ident, if any
  1140. rslt.Pak = rslt.Hit.Others.filter(ident)
  1141. }
  1142. for k, v := range x.idents {
  1143. const rsltLimit = 50
  1144. ids := byImportCount{v[ident], x.importCount}
  1145. rslt.Idents[k] = ids.top(rsltLimit)
  1146. }
  1147. case 2:
  1148. pakname, ident := ss[0], ss[1]
  1149. rslt.Hit, rslt.Alt = x.lookupWord(ident)
  1150. if rslt.Hit != nil {
  1151. // found a match - filter by package name
  1152. // (no paks - package names are not qualified)
  1153. decls := rslt.Hit.Decls.filter(pakname)
  1154. others := rslt.Hit.Others.filter(pakname)
  1155. rslt.Hit = &LookupResult{decls, others}
  1156. }
  1157. for k, v := range x.idents {
  1158. ids := byImportCount{v[ident], x.importCount}
  1159. rslt.Idents[k] = ids.filter(pakname)
  1160. }
  1161. default:
  1162. return nil, errors.New("query is not a (qualified) identifier")
  1163. }
  1164. return rslt, nil
  1165. }
  1166. func (x *Index) Snippet(i int) *Snippet {
  1167. // handle illegal snippet indices gracefully
  1168. if 0 <= i && i < len(x.snippets) {
  1169. return x.snippets[i]
  1170. }
  1171. return nil
  1172. }
  1173. type positionList []struct {
  1174. filename string
  1175. line int
  1176. }
  1177. func (list positionList) Len() int { return len(list) }
  1178. func (list positionList) Less(i, j int) bool { return list[i].filename < list[j].filename }
  1179. func (list positionList) Swap(i, j int) { list[i], list[j] = list[j], list[i] }
  1180. // unique returns the list sorted and with duplicate entries removed
  1181. func unique(list []int) []int {
  1182. sort.Ints(list)
  1183. var last int
  1184. i := 0
  1185. for _, x := range list {
  1186. if i == 0 || x != last {
  1187. last = x
  1188. list[i] = x
  1189. i++
  1190. }
  1191. }
  1192. return list[0:i]
  1193. }
  1194. // A FileLines value specifies a file and line numbers within that file.
  1195. type FileLines struct {
  1196. Filename string
  1197. Lines []int
  1198. }
  1199. // LookupRegexp returns the number of matches and the matches where a regular
  1200. // expression r is found in the full text index. At most n matches are
  1201. // returned (thus found <= n).
  1202. //
  1203. func (x *Index) LookupRegexp(r *regexp.Regexp, n int) (found int, result []FileLines) {
  1204. if x.suffixes == nil || n <= 0 {
  1205. return
  1206. }
  1207. // n > 0
  1208. var list positionList
  1209. // FindAllIndex may returns matches that span across file boundaries.
  1210. // Such matches are unlikely, buf after eliminating them we may end up
  1211. // with fewer than n matches. If we don't have enough at the end, redo
  1212. // the search with an increased value n1, but only if FindAllIndex
  1213. // returned all the requested matches in the first place (if it
  1214. // returned fewer than that there cannot be more).
  1215. for n1 := n; found < n; n1 += n - found {
  1216. found = 0
  1217. matches := x.suffixes.FindAllIndex(r, n1)
  1218. // compute files, exclude matches that span file boundaries,
  1219. // and map offsets to file-local offsets
  1220. list = make(positionList, len(matches))
  1221. for _, m := range matches {
  1222. // by construction, an offset corresponds to the Pos value
  1223. // for the file set - use it to get the file and line
  1224. p := token.Pos(m[0])
  1225. if file := x.fset.File(p); file != nil {
  1226. if base := file.Base(); base <= m[1] && m[1] <= base+file.Size() {
  1227. // match [m[0], m[1]) is within the file boundaries
  1228. list[found].filename = file.Name()
  1229. list[found].line = file.Line(p)
  1230. found++
  1231. }
  1232. }
  1233. }
  1234. if found == n || len(matches) < n1 {
  1235. // found all matches or there's no chance to find more
  1236. break
  1237. }
  1238. }
  1239. list = list[0:found]
  1240. sort.Sort(list) // sort by filename
  1241. // collect matches belonging to the same file
  1242. var last string
  1243. var lines []int
  1244. addLines := func() {
  1245. if len(lines) > 0 {
  1246. // remove duplicate lines
  1247. result = append(result, FileLines{last, unique(lines)})
  1248. lines = nil
  1249. }
  1250. }
  1251. for _, m := range list {
  1252. if m.filename != last {
  1253. addLines()
  1254. last = m.filename
  1255. }
  1256. lines = append(lines, m.line)
  1257. }
  1258. addLines()
  1259. return
  1260. }
  1261. // InvalidateIndex should be called whenever any of the file systems
  1262. // under godoc's observation change so that the indexer is kicked on.
  1263. func (c *Corpus) invalidateIndex() {
  1264. c.fsModified.Set(nil)
  1265. c.refreshMetadata()
  1266. }
  1267. // feedDirnames feeds the directory names of all directories
  1268. // under the file system given by root to channel c.
  1269. //
  1270. func (c *Corpus) feedDirnames(ch chan<- string) {
  1271. if dir, _ := c.fsTree.Get(); dir != nil {
  1272. for d := range dir.(*Directory).iter(false) {
  1273. ch <- d.Path
  1274. }
  1275. }
  1276. }
  1277. // fsDirnames() returns a channel sending all directory names
  1278. // of all the file systems under godoc's observation.
  1279. //
  1280. func (c *Corpus) fsDirnames() <-chan string {
  1281. ch := make(chan string, 256) // buffered for fewer context switches
  1282. go func() {
  1283. c.feedDirnames(ch)
  1284. close(ch)
  1285. }()
  1286. return ch
  1287. }
  1288. // CompatibleWith reports whether the Index x is compatible with the corpus
  1289. // indexing options set in c.
  1290. func (x *Index) CompatibleWith(c *Corpus) bool {
  1291. return x.opts.Docs == c.IndexDocs &&
  1292. x.opts.GoCode == c.IndexGoCode &&
  1293. x.opts.FullText == c.IndexFullText &&
  1294. x.opts.MaxResults == c.MaxResults
  1295. }
  1296. func (c *Corpus) readIndex(filenames string) error {
  1297. matches, err := filepath.Glob(filenames)
  1298. if err != nil {
  1299. return err
  1300. } else if matches == nil {
  1301. return fmt.Errorf("no index files match %q", filenames)
  1302. }
  1303. sort.Strings(matches) // make sure files are in the right order
  1304. files := make([]io.Reader, 0, len(matches))
  1305. for _, filename := range matches {
  1306. f, err := os.Open(filename)
  1307. if err != nil {
  1308. return err
  1309. }
  1310. defer f.Close()
  1311. files = append(files, f)
  1312. }
  1313. return c.ReadIndexFrom(io.MultiReader(files...))
  1314. }
  1315. // ReadIndexFrom sets the current index from the serialized version found in r.
  1316. func (c *Corpus) ReadIndexFrom(r io.Reader) error {
  1317. x := new(Index)
  1318. if _, err := x.ReadFrom(r); err != nil {
  1319. return err
  1320. }
  1321. if !x.CompatibleWith(c) {
  1322. return fmt.Errorf("index file options are incompatible: %v", x.opts)
  1323. }
  1324. c.searchIndex.Set(x)
  1325. return nil
  1326. }
  1327. func (c *Corpus) UpdateIndex() {
  1328. if c.Verbose {
  1329. log.Printf("updating index...")
  1330. }
  1331. start := time.Now()
  1332. index := c.NewIndex()
  1333. stop := time.Now()
  1334. c.searchIndex.Set(index)
  1335. if c.Verbose {
  1336. secs := stop.Sub(start).Seconds()
  1337. stats := index.Stats()
  1338. log.Printf("index updated (%gs, %d bytes of source, %d files, %d lines, %d unique words, %d spots)",
  1339. secs, stats.Bytes, stats.Files, stats.Lines, stats.Words, stats.Spots)
  1340. }
  1341. memstats := new(runtime.MemStats)
  1342. runtime.ReadMemStats(memstats)
  1343. if c.Verbose {
  1344. log.Printf("before GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
  1345. }
  1346. runtime.GC()
  1347. runtime.ReadMemStats(memstats)
  1348. if c.Verbose {
  1349. log.Printf("after GC: bytes = %d footprint = %d", memstats.HeapAlloc, memstats.Sys)
  1350. }
  1351. }
  1352. // RunIndexer runs forever, indexing.
  1353. func (c *Corpus) RunIndexer() {
  1354. // initialize the index from disk if possible
  1355. if c.IndexFiles != "" {
  1356. c.initFSTree()
  1357. if err := c.readIndex(c.IndexFiles); err != nil {
  1358. log.Printf("error reading index from file %s: %v", c.IndexFiles, err)
  1359. }
  1360. return
  1361. }
  1362. // Repeatedly update the package directory tree and index.
  1363. // TODO(bgarcia): Use fsnotify to only update when notified of a filesystem change.
  1364. for {
  1365. c.initFSTree()
  1366. c.UpdateIndex()
  1367. if c.IndexInterval < 0 {
  1368. return
  1369. }
  1370. delay := 5 * time.Minute // by default, reindex every 5 minutes
  1371. if c.IndexInterval > 0 {
  1372. delay = c.IndexInterval
  1373. }
  1374. time.Sleep(delay)
  1375. }
  1376. }
  1377. type countingWriter struct {
  1378. n *int64
  1379. w io.Writer
  1380. }
  1381. func (c countingWriter) Write(p []byte) (n int, err error) {
  1382. n, err = c.w.Write(p)
  1383. *c.n += int64(n)
  1384. return
  1385. }
  1386. type byteReader interface {
  1387. io.Reader
  1388. io.ByteReader
  1389. }
  1390. type countingReader struct {
  1391. n *int64
  1392. r byteReader
  1393. }
  1394. func (c countingReader) Read(p []byte) (n int, err error) {
  1395. n, err = c.r.Read(p)
  1396. *c.n += int64(n)
  1397. return
  1398. }
  1399. func (c countingReader) ReadByte() (b byte, err error) {
  1400. b, err = c.r.ReadByte()
  1401. *c.n += 1
  1402. return
  1403. }