|
|
// Copyright 2011 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package html
import ( "bufio" "errors" "fmt" "io" "strings" )
type writer interface { io.Writer io.ByteWriter WriteString(string) (int, error) }
// Render renders the parse tree n to the given writer.
//
// Rendering is done on a 'best effort' basis: calling Parse on the output of
// Render will always result in something similar to the original tree, but it
// is not necessarily an exact clone unless the original tree was 'well-formed'.
// 'Well-formed' is not easily specified; the HTML5 specification is
// complicated.
//
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
// For example, in a 'well-formed' parse tree, no <a> element is a child of
// another <a> element: parsing "<a><a>" results in two sibling elements.
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
// children; the <a> is reparented to the <table>'s parent. However, calling
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
// element with an <a> child, and is therefore not 'well-formed'.
//
// Programmatically constructed trees are typically also 'well-formed', but it
// is possible to construct a tree that looks innocuous but, when rendered and
// re-parsed, results in a different tree. A simple example is that a solitary
// text node would become a tree containing <html>, <head> and <body> elements.
// Another example is that the programmatic equivalent of "a<head>b</head>c"
// becomes "<html><head><head/><body>abc</body></html>".
func Render(w io.Writer, n *Node) error { if x, ok := w.(writer); ok { return render(x, n) } buf := bufio.NewWriter(w) if err := render(buf, n); err != nil { return err } return buf.Flush() }
// plaintextAbort is returned from render1 when a <plaintext> element
// has been rendered. No more end tags should be rendered after that.
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
func render(w writer, n *Node) error { err := render1(w, n) if err == plaintextAbort { err = nil } return err }
func render1(w writer, n *Node) error { // Render non-element nodes; these are the easy cases.
switch n.Type { case ErrorNode: return errors.New("html: cannot render an ErrorNode node") case TextNode: return escape(w, n.Data) case DocumentNode: for c := n.FirstChild; c != nil; c = c.NextSibling { if err := render1(w, c); err != nil { return err } } return nil case ElementNode: // No-op.
case CommentNode: if _, err := w.WriteString("<!--"); err != nil { return err } if _, err := w.WriteString(n.Data); err != nil { return err } if _, err := w.WriteString("-->"); err != nil { return err } return nil case DoctypeNode: if _, err := w.WriteString("<!DOCTYPE "); err != nil { return err } if _, err := w.WriteString(n.Data); err != nil { return err } if n.Attr != nil { var p, s string for _, a := range n.Attr { switch a.Key { case "public": p = a.Val case "system": s = a.Val } } if p != "" { if _, err := w.WriteString(" PUBLIC "); err != nil { return err } if err := writeQuoted(w, p); err != nil { return err } if s != "" { if err := w.WriteByte(' '); err != nil { return err } if err := writeQuoted(w, s); err != nil { return err } } } else if s != "" { if _, err := w.WriteString(" SYSTEM "); err != nil { return err } if err := writeQuoted(w, s); err != nil { return err } } } return w.WriteByte('>') default: return errors.New("html: unknown node type") }
// Render the <xxx> opening tag.
if err := w.WriteByte('<'); err != nil { return err } if _, err := w.WriteString(n.Data); err != nil { return err } for _, a := range n.Attr { if err := w.WriteByte(' '); err != nil { return err } if a.Namespace != "" { if _, err := w.WriteString(a.Namespace); err != nil { return err } if err := w.WriteByte(':'); err != nil { return err } } if _, err := w.WriteString(a.Key); err != nil { return err } if _, err := w.WriteString(`="`); err != nil { return err } if err := escape(w, a.Val); err != nil { return err } if err := w.WriteByte('"'); err != nil { return err } } if voidElements[n.Data] { if n.FirstChild != nil { return fmt.Errorf("html: void element <%s> has child nodes", n.Data) } _, err := w.WriteString("/>") return err } if err := w.WriteByte('>'); err != nil { return err }
// Add initial newline where there is danger of a newline beging ignored.
if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { switch n.Data { case "pre", "listing", "textarea": if err := w.WriteByte('\n'); err != nil { return err } } }
// Render any child nodes.
switch n.Data { case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": for c := n.FirstChild; c != nil; c = c.NextSibling { if c.Type == TextNode { if _, err := w.WriteString(c.Data); err != nil { return err } } else { if err := render1(w, c); err != nil { return err } } } if n.Data == "plaintext" { // Don't render anything else. <plaintext> must be the
// last element in the file, with no closing tag.
return plaintextAbort } default: for c := n.FirstChild; c != nil; c = c.NextSibling { if err := render1(w, c); err != nil { return err } } }
// Render the </xxx> closing tag.
if _, err := w.WriteString("</"); err != nil { return err } if _, err := w.WriteString(n.Data); err != nil { return err } return w.WriteByte('>') }
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
// quotes, but if s contains a double quote, it will use single quotes.
// It is used for writing the identifiers in a doctype declaration.
// In valid HTML, they can't contain both types of quotes.
func writeQuoted(w writer, s string) error { var q byte = '"' if strings.Contains(s, `"`) { q = '\'' } if err := w.WriteByte(q); err != nil { return err } if _, err := w.WriteString(s); err != nil { return err } if err := w.WriteByte(q); err != nil { return err } return nil }
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
// are those that can't have any contents.
var voidElements = map[string]bool{ "area": true, "base": true, "br": true, "col": true, "command": true, "embed": true, "hr": true, "img": true, "input": true, "keygen": true, "link": true, "meta": true, "param": true, "source": true, "track": true, "wbr": true, }
|