You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

229 lines
4.9 KiB

  1. // Copyright 2012 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package present
  5. import (
  6. "errors"
  7. "regexp"
  8. "strconv"
  9. "unicode/utf8"
  10. )
  11. // This file is stolen from go/src/cmd/godoc/codewalk.go.
  12. // It's an evaluator for the file address syntax implemented by acme and sam,
  13. // but using Go-native regular expressions.
  14. // To keep things reasonably close, this version uses (?m:re) for all user-provided
  15. // regular expressions. That is the only change to the code from codewalk.go.
  16. // See http://plan9.bell-labs.com/sys/doc/sam/sam.html Table II
  17. // for details on the syntax.
  18. // addrToByte evaluates the given address starting at offset start in data.
  19. // It returns the lo and hi byte offset of the matched region within data.
  20. func addrToByteRange(addr string, start int, data []byte) (lo, hi int, err error) {
  21. if addr == "" {
  22. lo, hi = start, len(data)
  23. return
  24. }
  25. var (
  26. dir byte
  27. prevc byte
  28. charOffset bool
  29. )
  30. lo = start
  31. hi = start
  32. for addr != "" && err == nil {
  33. c := addr[0]
  34. switch c {
  35. default:
  36. err = errors.New("invalid address syntax near " + string(c))
  37. case ',':
  38. if len(addr) == 1 {
  39. hi = len(data)
  40. } else {
  41. _, hi, err = addrToByteRange(addr[1:], hi, data)
  42. }
  43. return
  44. case '+', '-':
  45. if prevc == '+' || prevc == '-' {
  46. lo, hi, err = addrNumber(data, lo, hi, prevc, 1, charOffset)
  47. }
  48. dir = c
  49. case '$':
  50. lo = len(data)
  51. hi = len(data)
  52. if len(addr) > 1 {
  53. dir = '+'
  54. }
  55. case '#':
  56. charOffset = true
  57. case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
  58. var i int
  59. for i = 1; i < len(addr); i++ {
  60. if addr[i] < '0' || addr[i] > '9' {
  61. break
  62. }
  63. }
  64. var n int
  65. n, err = strconv.Atoi(addr[0:i])
  66. if err != nil {
  67. break
  68. }
  69. lo, hi, err = addrNumber(data, lo, hi, dir, n, charOffset)
  70. dir = 0
  71. charOffset = false
  72. prevc = c
  73. addr = addr[i:]
  74. continue
  75. case '/':
  76. var i, j int
  77. Regexp:
  78. for i = 1; i < len(addr); i++ {
  79. switch addr[i] {
  80. case '\\':
  81. i++
  82. case '/':
  83. j = i + 1
  84. break Regexp
  85. }
  86. }
  87. if j == 0 {
  88. j = i
  89. }
  90. pattern := addr[1:i]
  91. lo, hi, err = addrRegexp(data, lo, hi, dir, pattern)
  92. prevc = c
  93. addr = addr[j:]
  94. continue
  95. }
  96. prevc = c
  97. addr = addr[1:]
  98. }
  99. if err == nil && dir != 0 {
  100. lo, hi, err = addrNumber(data, lo, hi, dir, 1, charOffset)
  101. }
  102. if err != nil {
  103. return 0, 0, err
  104. }
  105. return lo, hi, nil
  106. }
  107. // addrNumber applies the given dir, n, and charOffset to the address lo, hi.
  108. // dir is '+' or '-', n is the count, and charOffset is true if the syntax
  109. // used was #n. Applying +n (or +#n) means to advance n lines
  110. // (or characters) after hi. Applying -n (or -#n) means to back up n lines
  111. // (or characters) before lo.
  112. // The return value is the new lo, hi.
  113. func addrNumber(data []byte, lo, hi int, dir byte, n int, charOffset bool) (int, int, error) {
  114. switch dir {
  115. case 0:
  116. lo = 0
  117. hi = 0
  118. fallthrough
  119. case '+':
  120. if charOffset {
  121. pos := hi
  122. for ; n > 0 && pos < len(data); n-- {
  123. _, size := utf8.DecodeRune(data[pos:])
  124. pos += size
  125. }
  126. if n == 0 {
  127. return pos, pos, nil
  128. }
  129. break
  130. }
  131. // find next beginning of line
  132. if hi > 0 {
  133. for hi < len(data) && data[hi-1] != '\n' {
  134. hi++
  135. }
  136. }
  137. lo = hi
  138. if n == 0 {
  139. return lo, hi, nil
  140. }
  141. for ; hi < len(data); hi++ {
  142. if data[hi] != '\n' {
  143. continue
  144. }
  145. switch n--; n {
  146. case 1:
  147. lo = hi + 1
  148. case 0:
  149. return lo, hi + 1, nil
  150. }
  151. }
  152. case '-':
  153. if charOffset {
  154. // Scan backward for bytes that are not UTF-8 continuation bytes.
  155. pos := lo
  156. for ; pos > 0 && n > 0; pos-- {
  157. if data[pos]&0xc0 != 0x80 {
  158. n--
  159. }
  160. }
  161. if n == 0 {
  162. return pos, pos, nil
  163. }
  164. break
  165. }
  166. // find earlier beginning of line
  167. for lo > 0 && data[lo-1] != '\n' {
  168. lo--
  169. }
  170. hi = lo
  171. if n == 0 {
  172. return lo, hi, nil
  173. }
  174. for ; lo >= 0; lo-- {
  175. if lo > 0 && data[lo-1] != '\n' {
  176. continue
  177. }
  178. switch n--; n {
  179. case 1:
  180. hi = lo
  181. case 0:
  182. return lo, hi, nil
  183. }
  184. }
  185. }
  186. return 0, 0, errors.New("address out of range")
  187. }
  188. // addrRegexp searches for pattern in the given direction starting at lo, hi.
  189. // The direction dir is '+' (search forward from hi) or '-' (search backward from lo).
  190. // Backward searches are unimplemented.
  191. func addrRegexp(data []byte, lo, hi int, dir byte, pattern string) (int, int, error) {
  192. // We want ^ and $ to work as in sam/acme, so use ?m.
  193. re, err := regexp.Compile("(?m:" + pattern + ")")
  194. if err != nil {
  195. return 0, 0, err
  196. }
  197. if dir == '-' {
  198. // Could implement reverse search using binary search
  199. // through file, but that seems like overkill.
  200. return 0, 0, errors.New("reverse search not implemented")
  201. }
  202. m := re.FindIndex(data[hi:])
  203. if len(m) > 0 {
  204. m[0] += hi
  205. m[1] += hi
  206. } else if hi > 0 {
  207. // No match. Wrap to beginning of data.
  208. m = re.FindIndex(data)
  209. }
  210. if len(m) == 0 {
  211. return 0, 0, errors.New("no match for " + pattern)
  212. }
  213. return m[0], m[1], nil
  214. }