You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

371 lines
10 KiB

  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // This file implements FormatSelections and FormatText.
  5. // FormatText is used to HTML-format Go and non-Go source
  6. // text with line numbers and highlighted sections. It is
  7. // built on top of FormatSelections, a generic formatter
  8. // for "selected" text.
  9. package godoc
  10. import (
  11. "fmt"
  12. "go/scanner"
  13. "go/token"
  14. "io"
  15. "regexp"
  16. "strconv"
  17. "text/template"
  18. )
  19. // ----------------------------------------------------------------------------
  20. // Implementation of FormatSelections
  21. // A Segment describes a text segment [start, end).
  22. // The zero value of a Segment is a ready-to-use empty segment.
  23. //
  24. type Segment struct {
  25. start, end int
  26. }
  27. func (seg *Segment) isEmpty() bool { return seg.start >= seg.end }
  28. // A Selection is an "iterator" function returning a text segment.
  29. // Repeated calls to a selection return consecutive, non-overlapping,
  30. // non-empty segments, followed by an infinite sequence of empty
  31. // segments. The first empty segment marks the end of the selection.
  32. //
  33. type Selection func() Segment
  34. // A LinkWriter writes some start or end "tag" to w for the text offset offs.
  35. // It is called by FormatSelections at the start or end of each link segment.
  36. //
  37. type LinkWriter func(w io.Writer, offs int, start bool)
  38. // A SegmentWriter formats a text according to selections and writes it to w.
  39. // The selections parameter is a bit set indicating which selections provided
  40. // to FormatSelections overlap with the text segment: If the n'th bit is set
  41. // in selections, the n'th selection provided to FormatSelections is overlapping
  42. // with the text.
  43. //
  44. type SegmentWriter func(w io.Writer, text []byte, selections int)
  45. // FormatSelections takes a text and writes it to w using link and segment
  46. // writers lw and sw as follows: lw is invoked for consecutive segment starts
  47. // and ends as specified through the links selection, and sw is invoked for
  48. // consecutive segments of text overlapped by the same selections as specified
  49. // by selections. The link writer lw may be nil, in which case the links
  50. // Selection is ignored.
  51. //
  52. func FormatSelections(w io.Writer, text []byte, lw LinkWriter, links Selection, sw SegmentWriter, selections ...Selection) {
  53. // If we have a link writer, make the links
  54. // selection the last entry in selections
  55. if lw != nil {
  56. selections = append(selections, links)
  57. }
  58. // compute the sequence of consecutive segment changes
  59. changes := newMerger(selections)
  60. // The i'th bit in bitset indicates that the text
  61. // at the current offset is covered by selections[i].
  62. bitset := 0
  63. lastOffs := 0
  64. // Text segments are written in a delayed fashion
  65. // such that consecutive segments belonging to the
  66. // same selection can be combined (peephole optimization).
  67. // last describes the last segment which has not yet been written.
  68. var last struct {
  69. begin, end int // valid if begin < end
  70. bitset int
  71. }
  72. // flush writes the last delayed text segment
  73. flush := func() {
  74. if last.begin < last.end {
  75. sw(w, text[last.begin:last.end], last.bitset)
  76. }
  77. last.begin = last.end // invalidate last
  78. }
  79. // segment runs the segment [lastOffs, end) with the selection
  80. // indicated by bitset through the segment peephole optimizer.
  81. segment := func(end int) {
  82. if lastOffs < end { // ignore empty segments
  83. if last.end != lastOffs || last.bitset != bitset {
  84. // the last segment is not adjacent to or
  85. // differs from the new one
  86. flush()
  87. // start a new segment
  88. last.begin = lastOffs
  89. }
  90. last.end = end
  91. last.bitset = bitset
  92. }
  93. }
  94. for {
  95. // get the next segment change
  96. index, offs, start := changes.next()
  97. if index < 0 || offs > len(text) {
  98. // no more segment changes or the next change
  99. // is past the end of the text - we're done
  100. break
  101. }
  102. // determine the kind of segment change
  103. if lw != nil && index == len(selections)-1 {
  104. // we have a link segment change (see start of this function):
  105. // format the previous selection segment, write the
  106. // link tag and start a new selection segment
  107. segment(offs)
  108. flush()
  109. lastOffs = offs
  110. lw(w, offs, start)
  111. } else {
  112. // we have a selection change:
  113. // format the previous selection segment, determine
  114. // the new selection bitset and start a new segment
  115. segment(offs)
  116. lastOffs = offs
  117. mask := 1 << uint(index)
  118. if start {
  119. bitset |= mask
  120. } else {
  121. bitset &^= mask
  122. }
  123. }
  124. }
  125. segment(len(text))
  126. flush()
  127. }
  128. // A merger merges a slice of Selections and produces a sequence of
  129. // consecutive segment change events through repeated next() calls.
  130. //
  131. type merger struct {
  132. selections []Selection
  133. segments []Segment // segments[i] is the next segment of selections[i]
  134. }
  135. const infinity int = 2e9
  136. func newMerger(selections []Selection) *merger {
  137. segments := make([]Segment, len(selections))
  138. for i, sel := range selections {
  139. segments[i] = Segment{infinity, infinity}
  140. if sel != nil {
  141. if seg := sel(); !seg.isEmpty() {
  142. segments[i] = seg
  143. }
  144. }
  145. }
  146. return &merger{selections, segments}
  147. }
  148. // next returns the next segment change: index specifies the Selection
  149. // to which the segment belongs, offs is the segment start or end offset
  150. // as determined by the start value. If there are no more segment changes,
  151. // next returns an index value < 0.
  152. //
  153. func (m *merger) next() (index, offs int, start bool) {
  154. // find the next smallest offset where a segment starts or ends
  155. offs = infinity
  156. index = -1
  157. for i, seg := range m.segments {
  158. switch {
  159. case seg.start < offs:
  160. offs = seg.start
  161. index = i
  162. start = true
  163. case seg.end < offs:
  164. offs = seg.end
  165. index = i
  166. start = false
  167. }
  168. }
  169. if index < 0 {
  170. // no offset found => all selections merged
  171. return
  172. }
  173. // offset found - it's either the start or end offset but
  174. // either way it is ok to consume the start offset: set it
  175. // to infinity so it won't be considered in the following
  176. // next call
  177. m.segments[index].start = infinity
  178. if start {
  179. return
  180. }
  181. // end offset found - consume it
  182. m.segments[index].end = infinity
  183. // advance to the next segment for that selection
  184. seg := m.selections[index]()
  185. if !seg.isEmpty() {
  186. m.segments[index] = seg
  187. }
  188. return
  189. }
  190. // ----------------------------------------------------------------------------
  191. // Implementation of FormatText
  192. // lineSelection returns the line segments for text as a Selection.
  193. func lineSelection(text []byte) Selection {
  194. i, j := 0, 0
  195. return func() (seg Segment) {
  196. // find next newline, if any
  197. for j < len(text) {
  198. j++
  199. if text[j-1] == '\n' {
  200. break
  201. }
  202. }
  203. if i < j {
  204. // text[i:j] constitutes a line
  205. seg = Segment{i, j}
  206. i = j
  207. }
  208. return
  209. }
  210. }
  211. // tokenSelection returns, as a selection, the sequence of
  212. // consecutive occurrences of token sel in the Go src text.
  213. //
  214. func tokenSelection(src []byte, sel token.Token) Selection {
  215. var s scanner.Scanner
  216. fset := token.NewFileSet()
  217. file := fset.AddFile("", fset.Base(), len(src))
  218. s.Init(file, src, nil, scanner.ScanComments)
  219. return func() (seg Segment) {
  220. for {
  221. pos, tok, lit := s.Scan()
  222. if tok == token.EOF {
  223. break
  224. }
  225. offs := file.Offset(pos)
  226. if tok == sel {
  227. seg = Segment{offs, offs + len(lit)}
  228. break
  229. }
  230. }
  231. return
  232. }
  233. }
  234. // makeSelection is a helper function to make a Selection from a slice of pairs.
  235. // Pairs describing empty segments are ignored.
  236. //
  237. func makeSelection(matches [][]int) Selection {
  238. i := 0
  239. return func() Segment {
  240. for i < len(matches) {
  241. m := matches[i]
  242. i++
  243. if m[0] < m[1] {
  244. // non-empty segment
  245. return Segment{m[0], m[1]}
  246. }
  247. }
  248. return Segment{}
  249. }
  250. }
  251. // regexpSelection computes the Selection for the regular expression expr in text.
  252. func regexpSelection(text []byte, expr string) Selection {
  253. var matches [][]int
  254. if rx, err := regexp.Compile(expr); err == nil {
  255. matches = rx.FindAllIndex(text, -1)
  256. }
  257. return makeSelection(matches)
  258. }
  259. var selRx = regexp.MustCompile(`^([0-9]+):([0-9]+)`)
  260. // RangeSelection computes the Selection for a text range described
  261. // by the argument str; the range description must match the selRx
  262. // regular expression.
  263. func RangeSelection(str string) Selection {
  264. m := selRx.FindStringSubmatch(str)
  265. if len(m) >= 2 {
  266. from, _ := strconv.Atoi(m[1])
  267. to, _ := strconv.Atoi(m[2])
  268. if from < to {
  269. return makeSelection([][]int{{from, to}})
  270. }
  271. }
  272. return nil
  273. }
  274. // Span tags for all the possible selection combinations that may
  275. // be generated by FormatText. Selections are indicated by a bitset,
  276. // and the value of the bitset specifies the tag to be used.
  277. //
  278. // bit 0: comments
  279. // bit 1: highlights
  280. // bit 2: selections
  281. //
  282. var startTags = [][]byte{
  283. /* 000 */ []byte(``),
  284. /* 001 */ []byte(`<span class="comment">`),
  285. /* 010 */ []byte(`<span class="highlight">`),
  286. /* 011 */ []byte(`<span class="highlight-comment">`),
  287. /* 100 */ []byte(`<span class="selection">`),
  288. /* 101 */ []byte(`<span class="selection-comment">`),
  289. /* 110 */ []byte(`<span class="selection-highlight">`),
  290. /* 111 */ []byte(`<span class="selection-highlight-comment">`),
  291. }
  292. var endTag = []byte(`</span>`)
  293. func selectionTag(w io.Writer, text []byte, selections int) {
  294. if selections < len(startTags) {
  295. if tag := startTags[selections]; len(tag) > 0 {
  296. w.Write(tag)
  297. template.HTMLEscape(w, text)
  298. w.Write(endTag)
  299. return
  300. }
  301. }
  302. template.HTMLEscape(w, text)
  303. }
  304. // FormatText HTML-escapes text and writes it to w.
  305. // Consecutive text segments are wrapped in HTML spans (with tags as
  306. // defined by startTags and endTag) as follows:
  307. //
  308. // - if line >= 0, line number (ln) spans are inserted before each line,
  309. // starting with the value of line
  310. // - if the text is Go source, comments get the "comment" span class
  311. // - each occurrence of the regular expression pattern gets the "highlight"
  312. // span class
  313. // - text segments covered by selection get the "selection" span class
  314. //
  315. // Comments, highlights, and selections may overlap arbitrarily; the respective
  316. // HTML span classes are specified in the startTags variable.
  317. //
  318. func FormatText(w io.Writer, text []byte, line int, goSource bool, pattern string, selection Selection) {
  319. var comments, highlights Selection
  320. if goSource {
  321. comments = tokenSelection(text, token.COMMENT)
  322. }
  323. if pattern != "" {
  324. highlights = regexpSelection(text, pattern)
  325. }
  326. if line >= 0 || comments != nil || highlights != nil || selection != nil {
  327. var lineTag LinkWriter
  328. if line >= 0 {
  329. lineTag = func(w io.Writer, _ int, start bool) {
  330. if start {
  331. fmt.Fprintf(w, "<span id=\"L%d\" class=\"ln\">%6d</span>\t", line, line)
  332. line++
  333. }
  334. }
  335. }
  336. FormatSelections(w, text, lineTag, lineSelection(text), selectionTag, comments, highlights, selection)
  337. } else {
  338. template.HTMLEscape(w, text)
  339. }
  340. }