]>
Commit | Line | Data |
---|---|---|
c680a8e1 RS |
1 | // Copyright 2011 The Go Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style | |
3 | // license that can be found in the LICENSE file. | |
4 | ||
5 | package html | |
6 | ||
7 | import ( | |
8 | "bufio" | |
9 | "errors" | |
10 | "fmt" | |
11 | "io" | |
12 | "strings" | |
13 | ) | |
14 | ||
15 | type writer interface { | |
16 | io.Writer | |
17 | io.ByteWriter | |
18 | WriteString(string) (int, error) | |
19 | } | |
20 | ||
21 | // Render renders the parse tree n to the given writer. | |
22 | // | |
23 | // Rendering is done on a 'best effort' basis: calling Parse on the output of | |
24 | // Render will always result in something similar to the original tree, but it | |
25 | // is not necessarily an exact clone unless the original tree was 'well-formed'. | |
26 | // 'Well-formed' is not easily specified; the HTML5 specification is | |
27 | // complicated. | |
28 | // | |
29 | // Calling Parse on arbitrary input typically results in a 'well-formed' parse | |
30 | // tree. However, it is possible for Parse to yield a 'badly-formed' parse tree. | |
31 | // For example, in a 'well-formed' parse tree, no <a> element is a child of | |
32 | // another <a> element: parsing "<a><a>" results in two sibling elements. | |
33 | // Similarly, in a 'well-formed' parse tree, no <a> element is a child of a | |
34 | // <table> element: parsing "<p><table><a>" results in a <p> with two sibling | |
35 | // children; the <a> is reparented to the <table>'s parent. However, calling | |
36 | // Parse on "<a><table><a>" does not return an error, but the result has an <a> | |
37 | // element with an <a> child, and is therefore not 'well-formed'. | |
38 | // | |
39 | // Programmatically constructed trees are typically also 'well-formed', but it | |
40 | // is possible to construct a tree that looks innocuous but, when rendered and | |
41 | // re-parsed, results in a different tree. A simple example is that a solitary | |
42 | // text node would become a tree containing <html>, <head> and <body> elements. | |
43 | // Another example is that the programmatic equivalent of "a<head>b</head>c" | |
44 | // becomes "<html><head><head/><body>abc</body></html>". | |
45 | func Render(w io.Writer, n *Node) error { | |
46 | if x, ok := w.(writer); ok { | |
47 | return render(x, n) | |
48 | } | |
49 | buf := bufio.NewWriter(w) | |
50 | if err := render(buf, n); err != nil { | |
51 | return err | |
52 | } | |
53 | return buf.Flush() | |
54 | } | |
55 | ||
56 | // plaintextAbort is returned from render1 when a <plaintext> element | |
57 | // has been rendered. No more end tags should be rendered after that. | |
58 | var plaintextAbort = errors.New("html: internal error (plaintext abort)") | |
59 | ||
60 | func render(w writer, n *Node) error { | |
61 | err := render1(w, n) | |
62 | if err == plaintextAbort { | |
63 | err = nil | |
64 | } | |
65 | return err | |
66 | } | |
67 | ||
68 | func render1(w writer, n *Node) error { | |
69 | // Render non-element nodes; these are the easy cases. | |
70 | switch n.Type { | |
71 | case ErrorNode: | |
72 | return errors.New("html: cannot render an ErrorNode node") | |
73 | case TextNode: | |
74 | return escape(w, n.Data) | |
75 | case DocumentNode: | |
76 | for c := n.FirstChild; c != nil; c = c.NextSibling { | |
77 | if err := render1(w, c); err != nil { | |
78 | return err | |
79 | } | |
80 | } | |
81 | return nil | |
82 | case ElementNode: | |
83 | // No-op. | |
84 | case CommentNode: | |
85 | if _, err := w.WriteString("<!--"); err != nil { | |
86 | return err | |
87 | } | |
88 | if _, err := w.WriteString(n.Data); err != nil { | |
89 | return err | |
90 | } | |
91 | if _, err := w.WriteString("-->"); err != nil { | |
92 | return err | |
93 | } | |
94 | return nil | |
95 | case DoctypeNode: | |
96 | if _, err := w.WriteString("<!DOCTYPE "); err != nil { | |
97 | return err | |
98 | } | |
99 | if _, err := w.WriteString(n.Data); err != nil { | |
100 | return err | |
101 | } | |
102 | if n.Attr != nil { | |
103 | var p, s string | |
104 | for _, a := range n.Attr { | |
105 | switch a.Key { | |
106 | case "public": | |
107 | p = a.Val | |
108 | case "system": | |
109 | s = a.Val | |
110 | } | |
111 | } | |
112 | if p != "" { | |
113 | if _, err := w.WriteString(" PUBLIC "); err != nil { | |
114 | return err | |
115 | } | |
116 | if err := writeQuoted(w, p); err != nil { | |
117 | return err | |
118 | } | |
119 | if s != "" { | |
120 | if err := w.WriteByte(' '); err != nil { | |
121 | return err | |
122 | } | |
123 | if err := writeQuoted(w, s); err != nil { | |
124 | return err | |
125 | } | |
126 | } | |
127 | } else if s != "" { | |
128 | if _, err := w.WriteString(" SYSTEM "); err != nil { | |
129 | return err | |
130 | } | |
131 | if err := writeQuoted(w, s); err != nil { | |
132 | return err | |
133 | } | |
134 | } | |
135 | } | |
136 | return w.WriteByte('>') | |
137 | default: | |
138 | return errors.New("html: unknown node type") | |
139 | } | |
140 | ||
141 | // Render the <xxx> opening tag. | |
142 | if err := w.WriteByte('<'); err != nil { | |
143 | return err | |
144 | } | |
145 | if _, err := w.WriteString(n.Data); err != nil { | |
146 | return err | |
147 | } | |
148 | for _, a := range n.Attr { | |
149 | if err := w.WriteByte(' '); err != nil { | |
150 | return err | |
151 | } | |
152 | if a.Namespace != "" { | |
153 | if _, err := w.WriteString(a.Namespace); err != nil { | |
154 | return err | |
155 | } | |
156 | if err := w.WriteByte(':'); err != nil { | |
157 | return err | |
158 | } | |
159 | } | |
160 | if _, err := w.WriteString(a.Key); err != nil { | |
161 | return err | |
162 | } | |
163 | if _, err := w.WriteString(`="`); err != nil { | |
164 | return err | |
165 | } | |
166 | if err := escape(w, a.Val); err != nil { | |
167 | return err | |
168 | } | |
169 | if err := w.WriteByte('"'); err != nil { | |
170 | return err | |
171 | } | |
172 | } | |
173 | if voidElements[n.Data] { | |
174 | if n.FirstChild != nil { | |
175 | return fmt.Errorf("html: void element <%s> has child nodes", n.Data) | |
176 | } | |
177 | _, err := w.WriteString("/>") | |
178 | return err | |
179 | } | |
180 | if err := w.WriteByte('>'); err != nil { | |
181 | return err | |
182 | } | |
183 | ||
184 | // Add initial newline where there is danger of a newline beging ignored. | |
185 | if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") { | |
186 | switch n.Data { | |
187 | case "pre", "listing", "textarea": | |
188 | if err := w.WriteByte('\n'); err != nil { | |
189 | return err | |
190 | } | |
191 | } | |
192 | } | |
193 | ||
194 | // Render any child nodes. | |
195 | switch n.Data { | |
196 | case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp": | |
197 | for c := n.FirstChild; c != nil; c = c.NextSibling { | |
198 | if c.Type == TextNode { | |
199 | if _, err := w.WriteString(c.Data); err != nil { | |
200 | return err | |
201 | } | |
202 | } else { | |
203 | if err := render1(w, c); err != nil { | |
204 | return err | |
205 | } | |
206 | } | |
207 | } | |
208 | if n.Data == "plaintext" { | |
209 | // Don't render anything else. <plaintext> must be the | |
210 | // last element in the file, with no closing tag. | |
211 | return plaintextAbort | |
212 | } | |
213 | default: | |
214 | for c := n.FirstChild; c != nil; c = c.NextSibling { | |
215 | if err := render1(w, c); err != nil { | |
216 | return err | |
217 | } | |
218 | } | |
219 | } | |
220 | ||
221 | // Render the </xxx> closing tag. | |
222 | if _, err := w.WriteString("</"); err != nil { | |
223 | return err | |
224 | } | |
225 | if _, err := w.WriteString(n.Data); err != nil { | |
226 | return err | |
227 | } | |
228 | return w.WriteByte('>') | |
229 | } | |
230 | ||
231 | // writeQuoted writes s to w surrounded by quotes. Normally it will use double | |
232 | // quotes, but if s contains a double quote, it will use single quotes. | |
233 | // It is used for writing the identifiers in a doctype declaration. | |
234 | // In valid HTML, they can't contain both types of quotes. | |
235 | func writeQuoted(w writer, s string) error { | |
236 | var q byte = '"' | |
237 | if strings.Contains(s, `"`) { | |
238 | q = '\'' | |
239 | } | |
240 | if err := w.WriteByte(q); err != nil { | |
241 | return err | |
242 | } | |
243 | if _, err := w.WriteString(s); err != nil { | |
244 | return err | |
245 | } | |
246 | if err := w.WriteByte(q); err != nil { | |
247 | return err | |
248 | } | |
249 | return nil | |
250 | } | |
251 | ||
252 | // Section 12.1.2, "Elements", gives this list of void elements. Void elements | |
253 | // are those that can't have any contents. | |
254 | var voidElements = map[string]bool{ | |
255 | "area": true, | |
256 | "base": true, | |
257 | "br": true, | |
258 | "col": true, | |
259 | "command": true, | |
260 | "embed": true, | |
261 | "hr": true, | |
262 | "img": true, | |
263 | "input": true, | |
264 | "keygen": true, | |
265 | "link": true, | |
266 | "meta": true, | |
267 | "param": true, | |
268 | "source": true, | |
269 | "track": true, | |
270 | "wbr": true, | |
271 | } |