You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

273 lines
7.0 KiB

  1. // Copyright 2011 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package html
  5. import (
  6. "bufio"
  7. "errors"
  8. "fmt"
  9. "io"
  10. "strings"
  11. )
  12. type writer interface {
  13. io.Writer
  14. io.ByteWriter
  15. WriteString(string) (int, error)
  16. }
  17. // Render renders the parse tree n to the given writer.
  18. //
  19. // Rendering is done on a 'best effort' basis: calling Parse on the output of
  20. // Render will always result in something similar to the original tree, but it
  21. // is not necessarily an exact clone unless the original tree was 'well-formed'.
  22. // 'Well-formed' is not easily specified; the HTML5 specification is
  23. // complicated.
  24. //
  25. // Calling Parse on arbitrary input typically results in a 'well-formed' parse
  26. // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
  27. // For example, in a 'well-formed' parse tree, no <a> element is a child of
  28. // another <a> element: parsing "<a><a>" results in two sibling elements.
  29. // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
  30. // <table> element: parsing "<p><table><a>" results in a <p> with two sibling
  31. // children; the <a> is reparented to the <table>'s parent. However, calling
  32. // Parse on "<a><table><a>" does not return an error, but the result has an <a>
  33. // element with an <a> child, and is therefore not 'well-formed'.
  34. //
  35. // Programmatically constructed trees are typically also 'well-formed', but it
  36. // is possible to construct a tree that looks innocuous but, when rendered and
  37. // re-parsed, results in a different tree. A simple example is that a solitary
  38. // text node would become a tree containing <html>, <head> and <body> elements.
  39. // Another example is that the programmatic equivalent of "a<head>b</head>c"
  40. // becomes "<html><head><head/><body>abc</body></html>".
  41. func Render(w io.Writer, n *Node) error {
  42. if x, ok := w.(writer); ok {
  43. return render(x, n)
  44. }
  45. buf := bufio.NewWriter(w)
  46. if err := render(buf, n); err != nil {
  47. return err
  48. }
  49. return buf.Flush()
  50. }
  51. // plaintextAbort is returned from render1 when a <plaintext> element
  52. // has been rendered. No more end tags should be rendered after that.
  53. var plaintextAbort = errors.New("html: internal error (plaintext abort)")
  54. func render(w writer, n *Node) error {
  55. err := render1(w, n)
  56. if err == plaintextAbort {
  57. err = nil
  58. }
  59. return err
  60. }
  61. func render1(w writer, n *Node) error {
  62. // Render non-element nodes; these are the easy cases.
  63. switch n.Type {
  64. case ErrorNode:
  65. return errors.New("html: cannot render an ErrorNode node")
  66. case TextNode:
  67. return escape(w, n.Data)
  68. case DocumentNode:
  69. for c := n.FirstChild; c != nil; c = c.NextSibling {
  70. if err := render1(w, c); err != nil {
  71. return err
  72. }
  73. }
  74. return nil
  75. case ElementNode:
  76. // No-op.
  77. case CommentNode:
  78. if _, err := w.WriteString("<!--"); err != nil {
  79. return err
  80. }
  81. if _, err := w.WriteString(n.Data); err != nil {
  82. return err
  83. }
  84. if _, err := w.WriteString("-->"); err != nil {
  85. return err
  86. }
  87. return nil
  88. case DoctypeNode:
  89. if _, err := w.WriteString("<!DOCTYPE "); err != nil {
  90. return err
  91. }
  92. if _, err := w.WriteString(n.Data); err != nil {
  93. return err
  94. }
  95. if n.Attr != nil {
  96. var p, s string
  97. for _, a := range n.Attr {
  98. switch a.Key {
  99. case "public":
  100. p = a.Val
  101. case "system":
  102. s = a.Val
  103. }
  104. }
  105. if p != "" {
  106. if _, err := w.WriteString(" PUBLIC "); err != nil {
  107. return err
  108. }
  109. if err := writeQuoted(w, p); err != nil {
  110. return err
  111. }
  112. if s != "" {
  113. if err := w.WriteByte(' '); err != nil {
  114. return err
  115. }
  116. if err := writeQuoted(w, s); err != nil {
  117. return err
  118. }
  119. }
  120. } else if s != "" {
  121. if _, err := w.WriteString(" SYSTEM "); err != nil {
  122. return err
  123. }
  124. if err := writeQuoted(w, s); err != nil {
  125. return err
  126. }
  127. }
  128. }
  129. return w.WriteByte('>')
  130. case RawNode:
  131. _, err := w.WriteString(n.Data)
  132. return err
  133. default:
  134. return errors.New("html: unknown node type")
  135. }
  136. // Render the <xxx> opening tag.
  137. if err := w.WriteByte('<'); err != nil {
  138. return err
  139. }
  140. if _, err := w.WriteString(n.Data); err != nil {
  141. return err
  142. }
  143. for _, a := range n.Attr {
  144. if err := w.WriteByte(' '); err != nil {
  145. return err
  146. }
  147. if a.Namespace != "" {
  148. if _, err := w.WriteString(a.Namespace); err != nil {
  149. return err
  150. }
  151. if err := w.WriteByte(':'); err != nil {
  152. return err
  153. }
  154. }
  155. if _, err := w.WriteString(a.Key); err != nil {
  156. return err
  157. }
  158. if _, err := w.WriteString(`="`); err != nil {
  159. return err
  160. }
  161. if err := escape(w, a.Val); err != nil {
  162. return err
  163. }
  164. if err := w.WriteByte('"'); err != nil {
  165. return err
  166. }
  167. }
  168. if voidElements[n.Data] {
  169. if n.FirstChild != nil {
  170. return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
  171. }
  172. _, err := w.WriteString("/>")
  173. return err
  174. }
  175. if err := w.WriteByte('>'); err != nil {
  176. return err
  177. }
  178. // Add initial newline where there is danger of a newline beging ignored.
  179. if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
  180. switch n.Data {
  181. case "pre", "listing", "textarea":
  182. if err := w.WriteByte('\n'); err != nil {
  183. return err
  184. }
  185. }
  186. }
  187. // Render any child nodes.
  188. switch n.Data {
  189. case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
  190. for c := n.FirstChild; c != nil; c = c.NextSibling {
  191. if c.Type == TextNode {
  192. if _, err := w.WriteString(c.Data); err != nil {
  193. return err
  194. }
  195. } else {
  196. if err := render1(w, c); err != nil {
  197. return err
  198. }
  199. }
  200. }
  201. if n.Data == "plaintext" {
  202. // Don't render anything else. <plaintext> must be the
  203. // last element in the file, with no closing tag.
  204. return plaintextAbort
  205. }
  206. default:
  207. for c := n.FirstChild; c != nil; c = c.NextSibling {
  208. if err := render1(w, c); err != nil {
  209. return err
  210. }
  211. }
  212. }
  213. // Render the </xxx> closing tag.
  214. if _, err := w.WriteString("</"); err != nil {
  215. return err
  216. }
  217. if _, err := w.WriteString(n.Data); err != nil {
  218. return err
  219. }
  220. return w.WriteByte('>')
  221. }
  222. // writeQuoted writes s to w surrounded by quotes. Normally it will use double
  223. // quotes, but if s contains a double quote, it will use single quotes.
  224. // It is used for writing the identifiers in a doctype declaration.
  225. // In valid HTML, they can't contain both types of quotes.
  226. func writeQuoted(w writer, s string) error {
  227. var q byte = '"'
  228. if strings.Contains(s, `"`) {
  229. q = '\''
  230. }
  231. if err := w.WriteByte(q); err != nil {
  232. return err
  233. }
  234. if _, err := w.WriteString(s); err != nil {
  235. return err
  236. }
  237. if err := w.WriteByte(q); err != nil {
  238. return err
  239. }
  240. return nil
  241. }
  242. // Section 12.1.2, "Elements", gives this list of void elements. Void elements
  243. // are those that can't have any contents.
  244. var voidElements = map[string]bool{
  245. "area": true,
  246. "base": true,
  247. "br": true,
  248. "col": true,
  249. "embed": true,
  250. "hr": true,
  251. "img": true,
  252. "input": true,
  253. "keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
  254. "link": true,
  255. "meta": true,
  256. "param": true,
  257. "source": true,
  258. "track": true,
  259. "wbr": true,
  260. }