diff options
author | Radek Simko <radek.simko@gmail.com> | 2017-08-10 14:38:14 +0200 |
---|---|---|
committer | Radek Simko <radek.simko@gmail.com> | 2017-08-10 14:38:14 +0200 |
commit | c680a8e1622ed0f18751d9d167c836ee24f5e897 (patch) | |
tree | 864f925049d422033dd25a73bafce32b361c8827 /vendor/golang.org/x/net/html/parse.go | |
parent | 38f8880ac81bfabc6d7f82e4dc89661f20fc559e (diff) | |
download | terraform-provider-statuscake-c680a8e1622ed0f18751d9d167c836ee24f5e897.tar.gz terraform-provider-statuscake-c680a8e1622ed0f18751d9d167c836ee24f5e897.tar.zst terraform-provider-statuscake-c680a8e1622ed0f18751d9d167c836ee24f5e897.zip |
vendor: github.com/hashicorp/terraform/...@v0.10.0
Diffstat (limited to 'vendor/golang.org/x/net/html/parse.go')
-rw-r--r-- | vendor/golang.org/x/net/html/parse.go | 2094 |
1 files changed, 2094 insertions, 0 deletions
diff --git a/vendor/golang.org/x/net/html/parse.go b/vendor/golang.org/x/net/html/parse.go new file mode 100644 index 0000000..be4b2bf --- /dev/null +++ b/vendor/golang.org/x/net/html/parse.go | |||
@@ -0,0 +1,2094 @@ | |||
1 | // Copyright 2010 The Go Authors. All rights reserved. | ||
2 | // Use of this source code is governed by a BSD-style | ||
3 | // license that can be found in the LICENSE file. | ||
4 | |||
5 | package html | ||
6 | |||
7 | import ( | ||
8 | "errors" | ||
9 | "fmt" | ||
10 | "io" | ||
11 | "strings" | ||
12 | |||
13 | a "golang.org/x/net/html/atom" | ||
14 | ) | ||
15 | |||
16 | // A parser implements the HTML5 parsing algorithm: | ||
17 | // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction | ||
18 | type parser struct { | ||
19 | // tokenizer provides the tokens for the parser. | ||
20 | tokenizer *Tokenizer | ||
21 | // tok is the most recently read token. | ||
22 | tok Token | ||
23 | // Self-closing tags like <hr/> are treated as start tags, except that | ||
24 | // hasSelfClosingToken is set while they are being processed. | ||
25 | hasSelfClosingToken bool | ||
26 | // doc is the document root element. | ||
27 | doc *Node | ||
28 | // The stack of open elements (section 12.2.3.2) and active formatting | ||
29 | // elements (section 12.2.3.3). | ||
30 | oe, afe nodeStack | ||
31 | // Element pointers (section 12.2.3.4). | ||
32 | head, form *Node | ||
33 | // Other parsing state flags (section 12.2.3.5). | ||
34 | scripting, framesetOK bool | ||
35 | // im is the current insertion mode. | ||
36 | im insertionMode | ||
37 | // originalIM is the insertion mode to go back to after completing a text | ||
38 | // or inTableText insertion mode. | ||
39 | originalIM insertionMode | ||
40 | // fosterParenting is whether new elements should be inserted according to | ||
41 | // the foster parenting rules (section 12.2.5.3). | ||
42 | fosterParenting bool | ||
43 | // quirks is whether the parser is operating in "quirks mode." | ||
44 | quirks bool | ||
45 | // fragment is whether the parser is parsing an HTML fragment. | ||
46 | fragment bool | ||
47 | // context is the context element when parsing an HTML fragment | ||
48 | // (section 12.4). | ||
49 | context *Node | ||
50 | } | ||
51 | |||
52 | func (p *parser) top() *Node { | ||
53 | if n := p.oe.top(); n != nil { | ||
54 | return n | ||
55 | } | ||
56 | return p.doc | ||
57 | } | ||
58 | |||
59 | // Stop tags for use in popUntil. These come from section 12.2.3.2. | ||
60 | var ( | ||
61 | defaultScopeStopTags = map[string][]a.Atom{ | ||
62 | "": {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template}, | ||
63 | "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext}, | ||
64 | "svg": {a.Desc, a.ForeignObject, a.Title}, | ||
65 | } | ||
66 | ) | ||
67 | |||
68 | type scope int | ||
69 | |||
70 | const ( | ||
71 | defaultScope scope = iota | ||
72 | listItemScope | ||
73 | buttonScope | ||
74 | tableScope | ||
75 | tableRowScope | ||
76 | tableBodyScope | ||
77 | selectScope | ||
78 | ) | ||
79 | |||
80 | // popUntil pops the stack of open elements at the highest element whose tag | ||
81 | // is in matchTags, provided there is no higher element in the scope's stop | ||
82 | // tags (as defined in section 12.2.3.2). It returns whether or not there was | ||
83 | // such an element. If there was not, popUntil leaves the stack unchanged. | ||
84 | // | ||
85 | // For example, the set of stop tags for table scope is: "html", "table". If | ||
86 | // the stack was: | ||
87 | // ["html", "body", "font", "table", "b", "i", "u"] | ||
88 | // then popUntil(tableScope, "font") would return false, but | ||
89 | // popUntil(tableScope, "i") would return true and the stack would become: | ||
90 | // ["html", "body", "font", "table", "b"] | ||
91 | // | ||
92 | // If an element's tag is in both the stop tags and matchTags, then the stack | ||
93 | // will be popped and the function returns true (provided, of course, there was | ||
94 | // no higher element in the stack that was also in the stop tags). For example, | ||
95 | // popUntil(tableScope, "table") returns true and leaves: | ||
96 | // ["html", "body", "font"] | ||
97 | func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool { | ||
98 | if i := p.indexOfElementInScope(s, matchTags...); i != -1 { | ||
99 | p.oe = p.oe[:i] | ||
100 | return true | ||
101 | } | ||
102 | return false | ||
103 | } | ||
104 | |||
105 | // indexOfElementInScope returns the index in p.oe of the highest element whose | ||
106 | // tag is in matchTags that is in scope. If no matching element is in scope, it | ||
107 | // returns -1. | ||
108 | func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int { | ||
109 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
110 | tagAtom := p.oe[i].DataAtom | ||
111 | if p.oe[i].Namespace == "" { | ||
112 | for _, t := range matchTags { | ||
113 | if t == tagAtom { | ||
114 | return i | ||
115 | } | ||
116 | } | ||
117 | switch s { | ||
118 | case defaultScope: | ||
119 | // No-op. | ||
120 | case listItemScope: | ||
121 | if tagAtom == a.Ol || tagAtom == a.Ul { | ||
122 | return -1 | ||
123 | } | ||
124 | case buttonScope: | ||
125 | if tagAtom == a.Button { | ||
126 | return -1 | ||
127 | } | ||
128 | case tableScope: | ||
129 | if tagAtom == a.Html || tagAtom == a.Table { | ||
130 | return -1 | ||
131 | } | ||
132 | case selectScope: | ||
133 | if tagAtom != a.Optgroup && tagAtom != a.Option { | ||
134 | return -1 | ||
135 | } | ||
136 | default: | ||
137 | panic("unreachable") | ||
138 | } | ||
139 | } | ||
140 | switch s { | ||
141 | case defaultScope, listItemScope, buttonScope: | ||
142 | for _, t := range defaultScopeStopTags[p.oe[i].Namespace] { | ||
143 | if t == tagAtom { | ||
144 | return -1 | ||
145 | } | ||
146 | } | ||
147 | } | ||
148 | } | ||
149 | return -1 | ||
150 | } | ||
151 | |||
152 | // elementInScope is like popUntil, except that it doesn't modify the stack of | ||
153 | // open elements. | ||
154 | func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool { | ||
155 | return p.indexOfElementInScope(s, matchTags...) != -1 | ||
156 | } | ||
157 | |||
158 | // clearStackToContext pops elements off the stack of open elements until a | ||
159 | // scope-defined element is found. | ||
160 | func (p *parser) clearStackToContext(s scope) { | ||
161 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
162 | tagAtom := p.oe[i].DataAtom | ||
163 | switch s { | ||
164 | case tableScope: | ||
165 | if tagAtom == a.Html || tagAtom == a.Table { | ||
166 | p.oe = p.oe[:i+1] | ||
167 | return | ||
168 | } | ||
169 | case tableRowScope: | ||
170 | if tagAtom == a.Html || tagAtom == a.Tr { | ||
171 | p.oe = p.oe[:i+1] | ||
172 | return | ||
173 | } | ||
174 | case tableBodyScope: | ||
175 | if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead { | ||
176 | p.oe = p.oe[:i+1] | ||
177 | return | ||
178 | } | ||
179 | default: | ||
180 | panic("unreachable") | ||
181 | } | ||
182 | } | ||
183 | } | ||
184 | |||
185 | // generateImpliedEndTags pops nodes off the stack of open elements as long as | ||
186 | // the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt. | ||
187 | // If exceptions are specified, nodes with that name will not be popped off. | ||
188 | func (p *parser) generateImpliedEndTags(exceptions ...string) { | ||
189 | var i int | ||
190 | loop: | ||
191 | for i = len(p.oe) - 1; i >= 0; i-- { | ||
192 | n := p.oe[i] | ||
193 | if n.Type == ElementNode { | ||
194 | switch n.DataAtom { | ||
195 | case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt: | ||
196 | for _, except := range exceptions { | ||
197 | if n.Data == except { | ||
198 | break loop | ||
199 | } | ||
200 | } | ||
201 | continue | ||
202 | } | ||
203 | } | ||
204 | break | ||
205 | } | ||
206 | |||
207 | p.oe = p.oe[:i+1] | ||
208 | } | ||
209 | |||
210 | // addChild adds a child node n to the top element, and pushes n onto the stack | ||
211 | // of open elements if it is an element node. | ||
212 | func (p *parser) addChild(n *Node) { | ||
213 | if p.shouldFosterParent() { | ||
214 | p.fosterParent(n) | ||
215 | } else { | ||
216 | p.top().AppendChild(n) | ||
217 | } | ||
218 | |||
219 | if n.Type == ElementNode { | ||
220 | p.oe = append(p.oe, n) | ||
221 | } | ||
222 | } | ||
223 | |||
224 | // shouldFosterParent returns whether the next node to be added should be | ||
225 | // foster parented. | ||
226 | func (p *parser) shouldFosterParent() bool { | ||
227 | if p.fosterParenting { | ||
228 | switch p.top().DataAtom { | ||
229 | case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: | ||
230 | return true | ||
231 | } | ||
232 | } | ||
233 | return false | ||
234 | } | ||
235 | |||
236 | // fosterParent adds a child node according to the foster parenting rules. | ||
237 | // Section 12.2.5.3, "foster parenting". | ||
238 | func (p *parser) fosterParent(n *Node) { | ||
239 | var table, parent, prev *Node | ||
240 | var i int | ||
241 | for i = len(p.oe) - 1; i >= 0; i-- { | ||
242 | if p.oe[i].DataAtom == a.Table { | ||
243 | table = p.oe[i] | ||
244 | break | ||
245 | } | ||
246 | } | ||
247 | |||
248 | if table == nil { | ||
249 | // The foster parent is the html element. | ||
250 | parent = p.oe[0] | ||
251 | } else { | ||
252 | parent = table.Parent | ||
253 | } | ||
254 | if parent == nil { | ||
255 | parent = p.oe[i-1] | ||
256 | } | ||
257 | |||
258 | if table != nil { | ||
259 | prev = table.PrevSibling | ||
260 | } else { | ||
261 | prev = parent.LastChild | ||
262 | } | ||
263 | if prev != nil && prev.Type == TextNode && n.Type == TextNode { | ||
264 | prev.Data += n.Data | ||
265 | return | ||
266 | } | ||
267 | |||
268 | parent.InsertBefore(n, table) | ||
269 | } | ||
270 | |||
271 | // addText adds text to the preceding node if it is a text node, or else it | ||
272 | // calls addChild with a new text node. | ||
273 | func (p *parser) addText(text string) { | ||
274 | if text == "" { | ||
275 | return | ||
276 | } | ||
277 | |||
278 | if p.shouldFosterParent() { | ||
279 | p.fosterParent(&Node{ | ||
280 | Type: TextNode, | ||
281 | Data: text, | ||
282 | }) | ||
283 | return | ||
284 | } | ||
285 | |||
286 | t := p.top() | ||
287 | if n := t.LastChild; n != nil && n.Type == TextNode { | ||
288 | n.Data += text | ||
289 | return | ||
290 | } | ||
291 | p.addChild(&Node{ | ||
292 | Type: TextNode, | ||
293 | Data: text, | ||
294 | }) | ||
295 | } | ||
296 | |||
297 | // addElement adds a child element based on the current token. | ||
298 | func (p *parser) addElement() { | ||
299 | p.addChild(&Node{ | ||
300 | Type: ElementNode, | ||
301 | DataAtom: p.tok.DataAtom, | ||
302 | Data: p.tok.Data, | ||
303 | Attr: p.tok.Attr, | ||
304 | }) | ||
305 | } | ||
306 | |||
307 | // Section 12.2.3.3. | ||
308 | func (p *parser) addFormattingElement() { | ||
309 | tagAtom, attr := p.tok.DataAtom, p.tok.Attr | ||
310 | p.addElement() | ||
311 | |||
312 | // Implement the Noah's Ark clause, but with three per family instead of two. | ||
313 | identicalElements := 0 | ||
314 | findIdenticalElements: | ||
315 | for i := len(p.afe) - 1; i >= 0; i-- { | ||
316 | n := p.afe[i] | ||
317 | if n.Type == scopeMarkerNode { | ||
318 | break | ||
319 | } | ||
320 | if n.Type != ElementNode { | ||
321 | continue | ||
322 | } | ||
323 | if n.Namespace != "" { | ||
324 | continue | ||
325 | } | ||
326 | if n.DataAtom != tagAtom { | ||
327 | continue | ||
328 | } | ||
329 | if len(n.Attr) != len(attr) { | ||
330 | continue | ||
331 | } | ||
332 | compareAttributes: | ||
333 | for _, t0 := range n.Attr { | ||
334 | for _, t1 := range attr { | ||
335 | if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val { | ||
336 | // Found a match for this attribute, continue with the next attribute. | ||
337 | continue compareAttributes | ||
338 | } | ||
339 | } | ||
340 | // If we get here, there is no attribute that matches a. | ||
341 | // Therefore the element is not identical to the new one. | ||
342 | continue findIdenticalElements | ||
343 | } | ||
344 | |||
345 | identicalElements++ | ||
346 | if identicalElements >= 3 { | ||
347 | p.afe.remove(n) | ||
348 | } | ||
349 | } | ||
350 | |||
351 | p.afe = append(p.afe, p.top()) | ||
352 | } | ||
353 | |||
354 | // Section 12.2.3.3. | ||
355 | func (p *parser) clearActiveFormattingElements() { | ||
356 | for { | ||
357 | n := p.afe.pop() | ||
358 | if len(p.afe) == 0 || n.Type == scopeMarkerNode { | ||
359 | return | ||
360 | } | ||
361 | } | ||
362 | } | ||
363 | |||
364 | // Section 12.2.3.3. | ||
365 | func (p *parser) reconstructActiveFormattingElements() { | ||
366 | n := p.afe.top() | ||
367 | if n == nil { | ||
368 | return | ||
369 | } | ||
370 | if n.Type == scopeMarkerNode || p.oe.index(n) != -1 { | ||
371 | return | ||
372 | } | ||
373 | i := len(p.afe) - 1 | ||
374 | for n.Type != scopeMarkerNode && p.oe.index(n) == -1 { | ||
375 | if i == 0 { | ||
376 | i = -1 | ||
377 | break | ||
378 | } | ||
379 | i-- | ||
380 | n = p.afe[i] | ||
381 | } | ||
382 | for { | ||
383 | i++ | ||
384 | clone := p.afe[i].clone() | ||
385 | p.addChild(clone) | ||
386 | p.afe[i] = clone | ||
387 | if i == len(p.afe)-1 { | ||
388 | break | ||
389 | } | ||
390 | } | ||
391 | } | ||
392 | |||
393 | // Section 12.2.4. | ||
394 | func (p *parser) acknowledgeSelfClosingTag() { | ||
395 | p.hasSelfClosingToken = false | ||
396 | } | ||
397 | |||
398 | // An insertion mode (section 12.2.3.1) is the state transition function from | ||
399 | // a particular state in the HTML5 parser's state machine. It updates the | ||
400 | // parser's fields depending on parser.tok (where ErrorToken means EOF). | ||
401 | // It returns whether the token was consumed. | ||
402 | type insertionMode func(*parser) bool | ||
403 | |||
404 | // setOriginalIM sets the insertion mode to return to after completing a text or | ||
405 | // inTableText insertion mode. | ||
406 | // Section 12.2.3.1, "using the rules for". | ||
407 | func (p *parser) setOriginalIM() { | ||
408 | if p.originalIM != nil { | ||
409 | panic("html: bad parser state: originalIM was set twice") | ||
410 | } | ||
411 | p.originalIM = p.im | ||
412 | } | ||
413 | |||
414 | // Section 12.2.3.1, "reset the insertion mode". | ||
415 | func (p *parser) resetInsertionMode() { | ||
416 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
417 | n := p.oe[i] | ||
418 | if i == 0 && p.context != nil { | ||
419 | n = p.context | ||
420 | } | ||
421 | |||
422 | switch n.DataAtom { | ||
423 | case a.Select: | ||
424 | p.im = inSelectIM | ||
425 | case a.Td, a.Th: | ||
426 | p.im = inCellIM | ||
427 | case a.Tr: | ||
428 | p.im = inRowIM | ||
429 | case a.Tbody, a.Thead, a.Tfoot: | ||
430 | p.im = inTableBodyIM | ||
431 | case a.Caption: | ||
432 | p.im = inCaptionIM | ||
433 | case a.Colgroup: | ||
434 | p.im = inColumnGroupIM | ||
435 | case a.Table: | ||
436 | p.im = inTableIM | ||
437 | case a.Head: | ||
438 | p.im = inBodyIM | ||
439 | case a.Body: | ||
440 | p.im = inBodyIM | ||
441 | case a.Frameset: | ||
442 | p.im = inFramesetIM | ||
443 | case a.Html: | ||
444 | p.im = beforeHeadIM | ||
445 | default: | ||
446 | continue | ||
447 | } | ||
448 | return | ||
449 | } | ||
450 | p.im = inBodyIM | ||
451 | } | ||
452 | |||
453 | const whitespace = " \t\r\n\f" | ||
454 | |||
455 | // Section 12.2.5.4.1. | ||
456 | func initialIM(p *parser) bool { | ||
457 | switch p.tok.Type { | ||
458 | case TextToken: | ||
459 | p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) | ||
460 | if len(p.tok.Data) == 0 { | ||
461 | // It was all whitespace, so ignore it. | ||
462 | return true | ||
463 | } | ||
464 | case CommentToken: | ||
465 | p.doc.AppendChild(&Node{ | ||
466 | Type: CommentNode, | ||
467 | Data: p.tok.Data, | ||
468 | }) | ||
469 | return true | ||
470 | case DoctypeToken: | ||
471 | n, quirks := parseDoctype(p.tok.Data) | ||
472 | p.doc.AppendChild(n) | ||
473 | p.quirks = quirks | ||
474 | p.im = beforeHTMLIM | ||
475 | return true | ||
476 | } | ||
477 | p.quirks = true | ||
478 | p.im = beforeHTMLIM | ||
479 | return false | ||
480 | } | ||
481 | |||
482 | // Section 12.2.5.4.2. | ||
483 | func beforeHTMLIM(p *parser) bool { | ||
484 | switch p.tok.Type { | ||
485 | case DoctypeToken: | ||
486 | // Ignore the token. | ||
487 | return true | ||
488 | case TextToken: | ||
489 | p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) | ||
490 | if len(p.tok.Data) == 0 { | ||
491 | // It was all whitespace, so ignore it. | ||
492 | return true | ||
493 | } | ||
494 | case StartTagToken: | ||
495 | if p.tok.DataAtom == a.Html { | ||
496 | p.addElement() | ||
497 | p.im = beforeHeadIM | ||
498 | return true | ||
499 | } | ||
500 | case EndTagToken: | ||
501 | switch p.tok.DataAtom { | ||
502 | case a.Head, a.Body, a.Html, a.Br: | ||
503 | p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) | ||
504 | return false | ||
505 | default: | ||
506 | // Ignore the token. | ||
507 | return true | ||
508 | } | ||
509 | case CommentToken: | ||
510 | p.doc.AppendChild(&Node{ | ||
511 | Type: CommentNode, | ||
512 | Data: p.tok.Data, | ||
513 | }) | ||
514 | return true | ||
515 | } | ||
516 | p.parseImpliedToken(StartTagToken, a.Html, a.Html.String()) | ||
517 | return false | ||
518 | } | ||
519 | |||
520 | // Section 12.2.5.4.3. | ||
521 | func beforeHeadIM(p *parser) bool { | ||
522 | switch p.tok.Type { | ||
523 | case TextToken: | ||
524 | p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace) | ||
525 | if len(p.tok.Data) == 0 { | ||
526 | // It was all whitespace, so ignore it. | ||
527 | return true | ||
528 | } | ||
529 | case StartTagToken: | ||
530 | switch p.tok.DataAtom { | ||
531 | case a.Head: | ||
532 | p.addElement() | ||
533 | p.head = p.top() | ||
534 | p.im = inHeadIM | ||
535 | return true | ||
536 | case a.Html: | ||
537 | return inBodyIM(p) | ||
538 | } | ||
539 | case EndTagToken: | ||
540 | switch p.tok.DataAtom { | ||
541 | case a.Head, a.Body, a.Html, a.Br: | ||
542 | p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) | ||
543 | return false | ||
544 | default: | ||
545 | // Ignore the token. | ||
546 | return true | ||
547 | } | ||
548 | case CommentToken: | ||
549 | p.addChild(&Node{ | ||
550 | Type: CommentNode, | ||
551 | Data: p.tok.Data, | ||
552 | }) | ||
553 | return true | ||
554 | case DoctypeToken: | ||
555 | // Ignore the token. | ||
556 | return true | ||
557 | } | ||
558 | |||
559 | p.parseImpliedToken(StartTagToken, a.Head, a.Head.String()) | ||
560 | return false | ||
561 | } | ||
562 | |||
563 | // Section 12.2.5.4.4. | ||
564 | func inHeadIM(p *parser) bool { | ||
565 | switch p.tok.Type { | ||
566 | case TextToken: | ||
567 | s := strings.TrimLeft(p.tok.Data, whitespace) | ||
568 | if len(s) < len(p.tok.Data) { | ||
569 | // Add the initial whitespace to the current node. | ||
570 | p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) | ||
571 | if s == "" { | ||
572 | return true | ||
573 | } | ||
574 | p.tok.Data = s | ||
575 | } | ||
576 | case StartTagToken: | ||
577 | switch p.tok.DataAtom { | ||
578 | case a.Html: | ||
579 | return inBodyIM(p) | ||
580 | case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta: | ||
581 | p.addElement() | ||
582 | p.oe.pop() | ||
583 | p.acknowledgeSelfClosingTag() | ||
584 | return true | ||
585 | case a.Script, a.Title, a.Noscript, a.Noframes, a.Style: | ||
586 | p.addElement() | ||
587 | p.setOriginalIM() | ||
588 | p.im = textIM | ||
589 | return true | ||
590 | case a.Head: | ||
591 | // Ignore the token. | ||
592 | return true | ||
593 | } | ||
594 | case EndTagToken: | ||
595 | switch p.tok.DataAtom { | ||
596 | case a.Head: | ||
597 | n := p.oe.pop() | ||
598 | if n.DataAtom != a.Head { | ||
599 | panic("html: bad parser state: <head> element not found, in the in-head insertion mode") | ||
600 | } | ||
601 | p.im = afterHeadIM | ||
602 | return true | ||
603 | case a.Body, a.Html, a.Br: | ||
604 | p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) | ||
605 | return false | ||
606 | default: | ||
607 | // Ignore the token. | ||
608 | return true | ||
609 | } | ||
610 | case CommentToken: | ||
611 | p.addChild(&Node{ | ||
612 | Type: CommentNode, | ||
613 | Data: p.tok.Data, | ||
614 | }) | ||
615 | return true | ||
616 | case DoctypeToken: | ||
617 | // Ignore the token. | ||
618 | return true | ||
619 | } | ||
620 | |||
621 | p.parseImpliedToken(EndTagToken, a.Head, a.Head.String()) | ||
622 | return false | ||
623 | } | ||
624 | |||
625 | // Section 12.2.5.4.6. | ||
626 | func afterHeadIM(p *parser) bool { | ||
627 | switch p.tok.Type { | ||
628 | case TextToken: | ||
629 | s := strings.TrimLeft(p.tok.Data, whitespace) | ||
630 | if len(s) < len(p.tok.Data) { | ||
631 | // Add the initial whitespace to the current node. | ||
632 | p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) | ||
633 | if s == "" { | ||
634 | return true | ||
635 | } | ||
636 | p.tok.Data = s | ||
637 | } | ||
638 | case StartTagToken: | ||
639 | switch p.tok.DataAtom { | ||
640 | case a.Html: | ||
641 | return inBodyIM(p) | ||
642 | case a.Body: | ||
643 | p.addElement() | ||
644 | p.framesetOK = false | ||
645 | p.im = inBodyIM | ||
646 | return true | ||
647 | case a.Frameset: | ||
648 | p.addElement() | ||
649 | p.im = inFramesetIM | ||
650 | return true | ||
651 | case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title: | ||
652 | p.oe = append(p.oe, p.head) | ||
653 | defer p.oe.remove(p.head) | ||
654 | return inHeadIM(p) | ||
655 | case a.Head: | ||
656 | // Ignore the token. | ||
657 | return true | ||
658 | } | ||
659 | case EndTagToken: | ||
660 | switch p.tok.DataAtom { | ||
661 | case a.Body, a.Html, a.Br: | ||
662 | // Drop down to creating an implied <body> tag. | ||
663 | default: | ||
664 | // Ignore the token. | ||
665 | return true | ||
666 | } | ||
667 | case CommentToken: | ||
668 | p.addChild(&Node{ | ||
669 | Type: CommentNode, | ||
670 | Data: p.tok.Data, | ||
671 | }) | ||
672 | return true | ||
673 | case DoctypeToken: | ||
674 | // Ignore the token. | ||
675 | return true | ||
676 | } | ||
677 | |||
678 | p.parseImpliedToken(StartTagToken, a.Body, a.Body.String()) | ||
679 | p.framesetOK = true | ||
680 | return false | ||
681 | } | ||
682 | |||
683 | // copyAttributes copies attributes of src not found on dst to dst. | ||
684 | func copyAttributes(dst *Node, src Token) { | ||
685 | if len(src.Attr) == 0 { | ||
686 | return | ||
687 | } | ||
688 | attr := map[string]string{} | ||
689 | for _, t := range dst.Attr { | ||
690 | attr[t.Key] = t.Val | ||
691 | } | ||
692 | for _, t := range src.Attr { | ||
693 | if _, ok := attr[t.Key]; !ok { | ||
694 | dst.Attr = append(dst.Attr, t) | ||
695 | attr[t.Key] = t.Val | ||
696 | } | ||
697 | } | ||
698 | } | ||
699 | |||
700 | // Section 12.2.5.4.7. | ||
701 | func inBodyIM(p *parser) bool { | ||
702 | switch p.tok.Type { | ||
703 | case TextToken: | ||
704 | d := p.tok.Data | ||
705 | switch n := p.oe.top(); n.DataAtom { | ||
706 | case a.Pre, a.Listing: | ||
707 | if n.FirstChild == nil { | ||
708 | // Ignore a newline at the start of a <pre> block. | ||
709 | if d != "" && d[0] == '\r' { | ||
710 | d = d[1:] | ||
711 | } | ||
712 | if d != "" && d[0] == '\n' { | ||
713 | d = d[1:] | ||
714 | } | ||
715 | } | ||
716 | } | ||
717 | d = strings.Replace(d, "\x00", "", -1) | ||
718 | if d == "" { | ||
719 | return true | ||
720 | } | ||
721 | p.reconstructActiveFormattingElements() | ||
722 | p.addText(d) | ||
723 | if p.framesetOK && strings.TrimLeft(d, whitespace) != "" { | ||
724 | // There were non-whitespace characters inserted. | ||
725 | p.framesetOK = false | ||
726 | } | ||
727 | case StartTagToken: | ||
728 | switch p.tok.DataAtom { | ||
729 | case a.Html: | ||
730 | copyAttributes(p.oe[0], p.tok) | ||
731 | case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title: | ||
732 | return inHeadIM(p) | ||
733 | case a.Body: | ||
734 | if len(p.oe) >= 2 { | ||
735 | body := p.oe[1] | ||
736 | if body.Type == ElementNode && body.DataAtom == a.Body { | ||
737 | p.framesetOK = false | ||
738 | copyAttributes(body, p.tok) | ||
739 | } | ||
740 | } | ||
741 | case a.Frameset: | ||
742 | if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body { | ||
743 | // Ignore the token. | ||
744 | return true | ||
745 | } | ||
746 | body := p.oe[1] | ||
747 | if body.Parent != nil { | ||
748 | body.Parent.RemoveChild(body) | ||
749 | } | ||
750 | p.oe = p.oe[:1] | ||
751 | p.addElement() | ||
752 | p.im = inFramesetIM | ||
753 | return true | ||
754 | case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul: | ||
755 | p.popUntil(buttonScope, a.P) | ||
756 | p.addElement() | ||
757 | case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: | ||
758 | p.popUntil(buttonScope, a.P) | ||
759 | switch n := p.top(); n.DataAtom { | ||
760 | case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: | ||
761 | p.oe.pop() | ||
762 | } | ||
763 | p.addElement() | ||
764 | case a.Pre, a.Listing: | ||
765 | p.popUntil(buttonScope, a.P) | ||
766 | p.addElement() | ||
767 | // The newline, if any, will be dealt with by the TextToken case. | ||
768 | p.framesetOK = false | ||
769 | case a.Form: | ||
770 | if p.form == nil { | ||
771 | p.popUntil(buttonScope, a.P) | ||
772 | p.addElement() | ||
773 | p.form = p.top() | ||
774 | } | ||
775 | case a.Li: | ||
776 | p.framesetOK = false | ||
777 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
778 | node := p.oe[i] | ||
779 | switch node.DataAtom { | ||
780 | case a.Li: | ||
781 | p.oe = p.oe[:i] | ||
782 | case a.Address, a.Div, a.P: | ||
783 | continue | ||
784 | default: | ||
785 | if !isSpecialElement(node) { | ||
786 | continue | ||
787 | } | ||
788 | } | ||
789 | break | ||
790 | } | ||
791 | p.popUntil(buttonScope, a.P) | ||
792 | p.addElement() | ||
793 | case a.Dd, a.Dt: | ||
794 | p.framesetOK = false | ||
795 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
796 | node := p.oe[i] | ||
797 | switch node.DataAtom { | ||
798 | case a.Dd, a.Dt: | ||
799 | p.oe = p.oe[:i] | ||
800 | case a.Address, a.Div, a.P: | ||
801 | continue | ||
802 | default: | ||
803 | if !isSpecialElement(node) { | ||
804 | continue | ||
805 | } | ||
806 | } | ||
807 | break | ||
808 | } | ||
809 | p.popUntil(buttonScope, a.P) | ||
810 | p.addElement() | ||
811 | case a.Plaintext: | ||
812 | p.popUntil(buttonScope, a.P) | ||
813 | p.addElement() | ||
814 | case a.Button: | ||
815 | p.popUntil(defaultScope, a.Button) | ||
816 | p.reconstructActiveFormattingElements() | ||
817 | p.addElement() | ||
818 | p.framesetOK = false | ||
819 | case a.A: | ||
820 | for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- { | ||
821 | if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A { | ||
822 | p.inBodyEndTagFormatting(a.A) | ||
823 | p.oe.remove(n) | ||
824 | p.afe.remove(n) | ||
825 | break | ||
826 | } | ||
827 | } | ||
828 | p.reconstructActiveFormattingElements() | ||
829 | p.addFormattingElement() | ||
830 | case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: | ||
831 | p.reconstructActiveFormattingElements() | ||
832 | p.addFormattingElement() | ||
833 | case a.Nobr: | ||
834 | p.reconstructActiveFormattingElements() | ||
835 | if p.elementInScope(defaultScope, a.Nobr) { | ||
836 | p.inBodyEndTagFormatting(a.Nobr) | ||
837 | p.reconstructActiveFormattingElements() | ||
838 | } | ||
839 | p.addFormattingElement() | ||
840 | case a.Applet, a.Marquee, a.Object: | ||
841 | p.reconstructActiveFormattingElements() | ||
842 | p.addElement() | ||
843 | p.afe = append(p.afe, &scopeMarker) | ||
844 | p.framesetOK = false | ||
845 | case a.Table: | ||
846 | if !p.quirks { | ||
847 | p.popUntil(buttonScope, a.P) | ||
848 | } | ||
849 | p.addElement() | ||
850 | p.framesetOK = false | ||
851 | p.im = inTableIM | ||
852 | return true | ||
853 | case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr: | ||
854 | p.reconstructActiveFormattingElements() | ||
855 | p.addElement() | ||
856 | p.oe.pop() | ||
857 | p.acknowledgeSelfClosingTag() | ||
858 | if p.tok.DataAtom == a.Input { | ||
859 | for _, t := range p.tok.Attr { | ||
860 | if t.Key == "type" { | ||
861 | if strings.ToLower(t.Val) == "hidden" { | ||
862 | // Skip setting framesetOK = false | ||
863 | return true | ||
864 | } | ||
865 | } | ||
866 | } | ||
867 | } | ||
868 | p.framesetOK = false | ||
869 | case a.Param, a.Source, a.Track: | ||
870 | p.addElement() | ||
871 | p.oe.pop() | ||
872 | p.acknowledgeSelfClosingTag() | ||
873 | case a.Hr: | ||
874 | p.popUntil(buttonScope, a.P) | ||
875 | p.addElement() | ||
876 | p.oe.pop() | ||
877 | p.acknowledgeSelfClosingTag() | ||
878 | p.framesetOK = false | ||
879 | case a.Image: | ||
880 | p.tok.DataAtom = a.Img | ||
881 | p.tok.Data = a.Img.String() | ||
882 | return false | ||
883 | case a.Isindex: | ||
884 | if p.form != nil { | ||
885 | // Ignore the token. | ||
886 | return true | ||
887 | } | ||
888 | action := "" | ||
889 | prompt := "This is a searchable index. Enter search keywords: " | ||
890 | attr := []Attribute{{Key: "name", Val: "isindex"}} | ||
891 | for _, t := range p.tok.Attr { | ||
892 | switch t.Key { | ||
893 | case "action": | ||
894 | action = t.Val | ||
895 | case "name": | ||
896 | // Ignore the attribute. | ||
897 | case "prompt": | ||
898 | prompt = t.Val | ||
899 | default: | ||
900 | attr = append(attr, t) | ||
901 | } | ||
902 | } | ||
903 | p.acknowledgeSelfClosingTag() | ||
904 | p.popUntil(buttonScope, a.P) | ||
905 | p.parseImpliedToken(StartTagToken, a.Form, a.Form.String()) | ||
906 | if action != "" { | ||
907 | p.form.Attr = []Attribute{{Key: "action", Val: action}} | ||
908 | } | ||
909 | p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) | ||
910 | p.parseImpliedToken(StartTagToken, a.Label, a.Label.String()) | ||
911 | p.addText(prompt) | ||
912 | p.addChild(&Node{ | ||
913 | Type: ElementNode, | ||
914 | DataAtom: a.Input, | ||
915 | Data: a.Input.String(), | ||
916 | Attr: attr, | ||
917 | }) | ||
918 | p.oe.pop() | ||
919 | p.parseImpliedToken(EndTagToken, a.Label, a.Label.String()) | ||
920 | p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String()) | ||
921 | p.parseImpliedToken(EndTagToken, a.Form, a.Form.String()) | ||
922 | case a.Textarea: | ||
923 | p.addElement() | ||
924 | p.setOriginalIM() | ||
925 | p.framesetOK = false | ||
926 | p.im = textIM | ||
927 | case a.Xmp: | ||
928 | p.popUntil(buttonScope, a.P) | ||
929 | p.reconstructActiveFormattingElements() | ||
930 | p.framesetOK = false | ||
931 | p.addElement() | ||
932 | p.setOriginalIM() | ||
933 | p.im = textIM | ||
934 | case a.Iframe: | ||
935 | p.framesetOK = false | ||
936 | p.addElement() | ||
937 | p.setOriginalIM() | ||
938 | p.im = textIM | ||
939 | case a.Noembed, a.Noscript: | ||
940 | p.addElement() | ||
941 | p.setOriginalIM() | ||
942 | p.im = textIM | ||
943 | case a.Select: | ||
944 | p.reconstructActiveFormattingElements() | ||
945 | p.addElement() | ||
946 | p.framesetOK = false | ||
947 | p.im = inSelectIM | ||
948 | return true | ||
949 | case a.Optgroup, a.Option: | ||
950 | if p.top().DataAtom == a.Option { | ||
951 | p.oe.pop() | ||
952 | } | ||
953 | p.reconstructActiveFormattingElements() | ||
954 | p.addElement() | ||
955 | case a.Rp, a.Rt: | ||
956 | if p.elementInScope(defaultScope, a.Ruby) { | ||
957 | p.generateImpliedEndTags() | ||
958 | } | ||
959 | p.addElement() | ||
960 | case a.Math, a.Svg: | ||
961 | p.reconstructActiveFormattingElements() | ||
962 | if p.tok.DataAtom == a.Math { | ||
963 | adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) | ||
964 | } else { | ||
965 | adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments) | ||
966 | } | ||
967 | adjustForeignAttributes(p.tok.Attr) | ||
968 | p.addElement() | ||
969 | p.top().Namespace = p.tok.Data | ||
970 | if p.hasSelfClosingToken { | ||
971 | p.oe.pop() | ||
972 | p.acknowledgeSelfClosingTag() | ||
973 | } | ||
974 | return true | ||
975 | case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: | ||
976 | // Ignore the token. | ||
977 | default: | ||
978 | p.reconstructActiveFormattingElements() | ||
979 | p.addElement() | ||
980 | } | ||
981 | case EndTagToken: | ||
982 | switch p.tok.DataAtom { | ||
983 | case a.Body: | ||
984 | if p.elementInScope(defaultScope, a.Body) { | ||
985 | p.im = afterBodyIM | ||
986 | } | ||
987 | case a.Html: | ||
988 | if p.elementInScope(defaultScope, a.Body) { | ||
989 | p.parseImpliedToken(EndTagToken, a.Body, a.Body.String()) | ||
990 | return false | ||
991 | } | ||
992 | return true | ||
993 | case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul: | ||
994 | p.popUntil(defaultScope, p.tok.DataAtom) | ||
995 | case a.Form: | ||
996 | node := p.form | ||
997 | p.form = nil | ||
998 | i := p.indexOfElementInScope(defaultScope, a.Form) | ||
999 | if node == nil || i == -1 || p.oe[i] != node { | ||
1000 | // Ignore the token. | ||
1001 | return true | ||
1002 | } | ||
1003 | p.generateImpliedEndTags() | ||
1004 | p.oe.remove(node) | ||
1005 | case a.P: | ||
1006 | if !p.elementInScope(buttonScope, a.P) { | ||
1007 | p.parseImpliedToken(StartTagToken, a.P, a.P.String()) | ||
1008 | } | ||
1009 | p.popUntil(buttonScope, a.P) | ||
1010 | case a.Li: | ||
1011 | p.popUntil(listItemScope, a.Li) | ||
1012 | case a.Dd, a.Dt: | ||
1013 | p.popUntil(defaultScope, p.tok.DataAtom) | ||
1014 | case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6: | ||
1015 | p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6) | ||
1016 | case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U: | ||
1017 | p.inBodyEndTagFormatting(p.tok.DataAtom) | ||
1018 | case a.Applet, a.Marquee, a.Object: | ||
1019 | if p.popUntil(defaultScope, p.tok.DataAtom) { | ||
1020 | p.clearActiveFormattingElements() | ||
1021 | } | ||
1022 | case a.Br: | ||
1023 | p.tok.Type = StartTagToken | ||
1024 | return false | ||
1025 | default: | ||
1026 | p.inBodyEndTagOther(p.tok.DataAtom) | ||
1027 | } | ||
1028 | case CommentToken: | ||
1029 | p.addChild(&Node{ | ||
1030 | Type: CommentNode, | ||
1031 | Data: p.tok.Data, | ||
1032 | }) | ||
1033 | } | ||
1034 | |||
1035 | return true | ||
1036 | } | ||
1037 | |||
1038 | func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) { | ||
1039 | // This is the "adoption agency" algorithm, described at | ||
1040 | // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency | ||
1041 | |||
1042 | // TODO: this is a fairly literal line-by-line translation of that algorithm. | ||
1043 | // Once the code successfully parses the comprehensive test suite, we should | ||
1044 | // refactor this code to be more idiomatic. | ||
1045 | |||
1046 | // Steps 1-4. The outer loop. | ||
1047 | for i := 0; i < 8; i++ { | ||
1048 | // Step 5. Find the formatting element. | ||
1049 | var formattingElement *Node | ||
1050 | for j := len(p.afe) - 1; j >= 0; j-- { | ||
1051 | if p.afe[j].Type == scopeMarkerNode { | ||
1052 | break | ||
1053 | } | ||
1054 | if p.afe[j].DataAtom == tagAtom { | ||
1055 | formattingElement = p.afe[j] | ||
1056 | break | ||
1057 | } | ||
1058 | } | ||
1059 | if formattingElement == nil { | ||
1060 | p.inBodyEndTagOther(tagAtom) | ||
1061 | return | ||
1062 | } | ||
1063 | feIndex := p.oe.index(formattingElement) | ||
1064 | if feIndex == -1 { | ||
1065 | p.afe.remove(formattingElement) | ||
1066 | return | ||
1067 | } | ||
1068 | if !p.elementInScope(defaultScope, tagAtom) { | ||
1069 | // Ignore the tag. | ||
1070 | return | ||
1071 | } | ||
1072 | |||
1073 | // Steps 9-10. Find the furthest block. | ||
1074 | var furthestBlock *Node | ||
1075 | for _, e := range p.oe[feIndex:] { | ||
1076 | if isSpecialElement(e) { | ||
1077 | furthestBlock = e | ||
1078 | break | ||
1079 | } | ||
1080 | } | ||
1081 | if furthestBlock == nil { | ||
1082 | e := p.oe.pop() | ||
1083 | for e != formattingElement { | ||
1084 | e = p.oe.pop() | ||
1085 | } | ||
1086 | p.afe.remove(e) | ||
1087 | return | ||
1088 | } | ||
1089 | |||
1090 | // Steps 11-12. Find the common ancestor and bookmark node. | ||
1091 | commonAncestor := p.oe[feIndex-1] | ||
1092 | bookmark := p.afe.index(formattingElement) | ||
1093 | |||
1094 | // Step 13. The inner loop. Find the lastNode to reparent. | ||
1095 | lastNode := furthestBlock | ||
1096 | node := furthestBlock | ||
1097 | x := p.oe.index(node) | ||
1098 | // Steps 13.1-13.2 | ||
1099 | for j := 0; j < 3; j++ { | ||
1100 | // Step 13.3. | ||
1101 | x-- | ||
1102 | node = p.oe[x] | ||
1103 | // Step 13.4 - 13.5. | ||
1104 | if p.afe.index(node) == -1 { | ||
1105 | p.oe.remove(node) | ||
1106 | continue | ||
1107 | } | ||
1108 | // Step 13.6. | ||
1109 | if node == formattingElement { | ||
1110 | break | ||
1111 | } | ||
1112 | // Step 13.7. | ||
1113 | clone := node.clone() | ||
1114 | p.afe[p.afe.index(node)] = clone | ||
1115 | p.oe[p.oe.index(node)] = clone | ||
1116 | node = clone | ||
1117 | // Step 13.8. | ||
1118 | if lastNode == furthestBlock { | ||
1119 | bookmark = p.afe.index(node) + 1 | ||
1120 | } | ||
1121 | // Step 13.9. | ||
1122 | if lastNode.Parent != nil { | ||
1123 | lastNode.Parent.RemoveChild(lastNode) | ||
1124 | } | ||
1125 | node.AppendChild(lastNode) | ||
1126 | // Step 13.10. | ||
1127 | lastNode = node | ||
1128 | } | ||
1129 | |||
1130 | // Step 14. Reparent lastNode to the common ancestor, | ||
1131 | // or for misnested table nodes, to the foster parent. | ||
1132 | if lastNode.Parent != nil { | ||
1133 | lastNode.Parent.RemoveChild(lastNode) | ||
1134 | } | ||
1135 | switch commonAncestor.DataAtom { | ||
1136 | case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: | ||
1137 | p.fosterParent(lastNode) | ||
1138 | default: | ||
1139 | commonAncestor.AppendChild(lastNode) | ||
1140 | } | ||
1141 | |||
1142 | // Steps 15-17. Reparent nodes from the furthest block's children | ||
1143 | // to a clone of the formatting element. | ||
1144 | clone := formattingElement.clone() | ||
1145 | reparentChildren(clone, furthestBlock) | ||
1146 | furthestBlock.AppendChild(clone) | ||
1147 | |||
1148 | // Step 18. Fix up the list of active formatting elements. | ||
1149 | if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark { | ||
1150 | // Move the bookmark with the rest of the list. | ||
1151 | bookmark-- | ||
1152 | } | ||
1153 | p.afe.remove(formattingElement) | ||
1154 | p.afe.insert(bookmark, clone) | ||
1155 | |||
1156 | // Step 19. Fix up the stack of open elements. | ||
1157 | p.oe.remove(formattingElement) | ||
1158 | p.oe.insert(p.oe.index(furthestBlock)+1, clone) | ||
1159 | } | ||
1160 | } | ||
1161 | |||
1162 | // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM. | ||
1163 | // "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content | ||
1164 | // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign | ||
1165 | func (p *parser) inBodyEndTagOther(tagAtom a.Atom) { | ||
1166 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
1167 | if p.oe[i].DataAtom == tagAtom { | ||
1168 | p.oe = p.oe[:i] | ||
1169 | break | ||
1170 | } | ||
1171 | if isSpecialElement(p.oe[i]) { | ||
1172 | break | ||
1173 | } | ||
1174 | } | ||
1175 | } | ||
1176 | |||
1177 | // Section 12.2.5.4.8. | ||
1178 | func textIM(p *parser) bool { | ||
1179 | switch p.tok.Type { | ||
1180 | case ErrorToken: | ||
1181 | p.oe.pop() | ||
1182 | case TextToken: | ||
1183 | d := p.tok.Data | ||
1184 | if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil { | ||
1185 | // Ignore a newline at the start of a <textarea> block. | ||
1186 | if d != "" && d[0] == '\r' { | ||
1187 | d = d[1:] | ||
1188 | } | ||
1189 | if d != "" && d[0] == '\n' { | ||
1190 | d = d[1:] | ||
1191 | } | ||
1192 | } | ||
1193 | if d == "" { | ||
1194 | return true | ||
1195 | } | ||
1196 | p.addText(d) | ||
1197 | return true | ||
1198 | case EndTagToken: | ||
1199 | p.oe.pop() | ||
1200 | } | ||
1201 | p.im = p.originalIM | ||
1202 | p.originalIM = nil | ||
1203 | return p.tok.Type == EndTagToken | ||
1204 | } | ||
1205 | |||
1206 | // Section 12.2.5.4.9. | ||
1207 | func inTableIM(p *parser) bool { | ||
1208 | switch p.tok.Type { | ||
1209 | case ErrorToken: | ||
1210 | // Stop parsing. | ||
1211 | return true | ||
1212 | case TextToken: | ||
1213 | p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1) | ||
1214 | switch p.oe.top().DataAtom { | ||
1215 | case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: | ||
1216 | if strings.Trim(p.tok.Data, whitespace) == "" { | ||
1217 | p.addText(p.tok.Data) | ||
1218 | return true | ||
1219 | } | ||
1220 | } | ||
1221 | case StartTagToken: | ||
1222 | switch p.tok.DataAtom { | ||
1223 | case a.Caption: | ||
1224 | p.clearStackToContext(tableScope) | ||
1225 | p.afe = append(p.afe, &scopeMarker) | ||
1226 | p.addElement() | ||
1227 | p.im = inCaptionIM | ||
1228 | return true | ||
1229 | case a.Colgroup: | ||
1230 | p.clearStackToContext(tableScope) | ||
1231 | p.addElement() | ||
1232 | p.im = inColumnGroupIM | ||
1233 | return true | ||
1234 | case a.Col: | ||
1235 | p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String()) | ||
1236 | return false | ||
1237 | case a.Tbody, a.Tfoot, a.Thead: | ||
1238 | p.clearStackToContext(tableScope) | ||
1239 | p.addElement() | ||
1240 | p.im = inTableBodyIM | ||
1241 | return true | ||
1242 | case a.Td, a.Th, a.Tr: | ||
1243 | p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String()) | ||
1244 | return false | ||
1245 | case a.Table: | ||
1246 | if p.popUntil(tableScope, a.Table) { | ||
1247 | p.resetInsertionMode() | ||
1248 | return false | ||
1249 | } | ||
1250 | // Ignore the token. | ||
1251 | return true | ||
1252 | case a.Style, a.Script: | ||
1253 | return inHeadIM(p) | ||
1254 | case a.Input: | ||
1255 | for _, t := range p.tok.Attr { | ||
1256 | if t.Key == "type" && strings.ToLower(t.Val) == "hidden" { | ||
1257 | p.addElement() | ||
1258 | p.oe.pop() | ||
1259 | return true | ||
1260 | } | ||
1261 | } | ||
1262 | // Otherwise drop down to the default action. | ||
1263 | case a.Form: | ||
1264 | if p.form != nil { | ||
1265 | // Ignore the token. | ||
1266 | return true | ||
1267 | } | ||
1268 | p.addElement() | ||
1269 | p.form = p.oe.pop() | ||
1270 | case a.Select: | ||
1271 | p.reconstructActiveFormattingElements() | ||
1272 | switch p.top().DataAtom { | ||
1273 | case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: | ||
1274 | p.fosterParenting = true | ||
1275 | } | ||
1276 | p.addElement() | ||
1277 | p.fosterParenting = false | ||
1278 | p.framesetOK = false | ||
1279 | p.im = inSelectInTableIM | ||
1280 | return true | ||
1281 | } | ||
1282 | case EndTagToken: | ||
1283 | switch p.tok.DataAtom { | ||
1284 | case a.Table: | ||
1285 | if p.popUntil(tableScope, a.Table) { | ||
1286 | p.resetInsertionMode() | ||
1287 | return true | ||
1288 | } | ||
1289 | // Ignore the token. | ||
1290 | return true | ||
1291 | case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: | ||
1292 | // Ignore the token. | ||
1293 | return true | ||
1294 | } | ||
1295 | case CommentToken: | ||
1296 | p.addChild(&Node{ | ||
1297 | Type: CommentNode, | ||
1298 | Data: p.tok.Data, | ||
1299 | }) | ||
1300 | return true | ||
1301 | case DoctypeToken: | ||
1302 | // Ignore the token. | ||
1303 | return true | ||
1304 | } | ||
1305 | |||
1306 | p.fosterParenting = true | ||
1307 | defer func() { p.fosterParenting = false }() | ||
1308 | |||
1309 | return inBodyIM(p) | ||
1310 | } | ||
1311 | |||
1312 | // Section 12.2.5.4.11. | ||
1313 | func inCaptionIM(p *parser) bool { | ||
1314 | switch p.tok.Type { | ||
1315 | case StartTagToken: | ||
1316 | switch p.tok.DataAtom { | ||
1317 | case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr: | ||
1318 | if p.popUntil(tableScope, a.Caption) { | ||
1319 | p.clearActiveFormattingElements() | ||
1320 | p.im = inTableIM | ||
1321 | return false | ||
1322 | } else { | ||
1323 | // Ignore the token. | ||
1324 | return true | ||
1325 | } | ||
1326 | case a.Select: | ||
1327 | p.reconstructActiveFormattingElements() | ||
1328 | p.addElement() | ||
1329 | p.framesetOK = false | ||
1330 | p.im = inSelectInTableIM | ||
1331 | return true | ||
1332 | } | ||
1333 | case EndTagToken: | ||
1334 | switch p.tok.DataAtom { | ||
1335 | case a.Caption: | ||
1336 | if p.popUntil(tableScope, a.Caption) { | ||
1337 | p.clearActiveFormattingElements() | ||
1338 | p.im = inTableIM | ||
1339 | } | ||
1340 | return true | ||
1341 | case a.Table: | ||
1342 | if p.popUntil(tableScope, a.Caption) { | ||
1343 | p.clearActiveFormattingElements() | ||
1344 | p.im = inTableIM | ||
1345 | return false | ||
1346 | } else { | ||
1347 | // Ignore the token. | ||
1348 | return true | ||
1349 | } | ||
1350 | case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: | ||
1351 | // Ignore the token. | ||
1352 | return true | ||
1353 | } | ||
1354 | } | ||
1355 | return inBodyIM(p) | ||
1356 | } | ||
1357 | |||
1358 | // Section 12.2.5.4.12. | ||
1359 | func inColumnGroupIM(p *parser) bool { | ||
1360 | switch p.tok.Type { | ||
1361 | case TextToken: | ||
1362 | s := strings.TrimLeft(p.tok.Data, whitespace) | ||
1363 | if len(s) < len(p.tok.Data) { | ||
1364 | // Add the initial whitespace to the current node. | ||
1365 | p.addText(p.tok.Data[:len(p.tok.Data)-len(s)]) | ||
1366 | if s == "" { | ||
1367 | return true | ||
1368 | } | ||
1369 | p.tok.Data = s | ||
1370 | } | ||
1371 | case CommentToken: | ||
1372 | p.addChild(&Node{ | ||
1373 | Type: CommentNode, | ||
1374 | Data: p.tok.Data, | ||
1375 | }) | ||
1376 | return true | ||
1377 | case DoctypeToken: | ||
1378 | // Ignore the token. | ||
1379 | return true | ||
1380 | case StartTagToken: | ||
1381 | switch p.tok.DataAtom { | ||
1382 | case a.Html: | ||
1383 | return inBodyIM(p) | ||
1384 | case a.Col: | ||
1385 | p.addElement() | ||
1386 | p.oe.pop() | ||
1387 | p.acknowledgeSelfClosingTag() | ||
1388 | return true | ||
1389 | } | ||
1390 | case EndTagToken: | ||
1391 | switch p.tok.DataAtom { | ||
1392 | case a.Colgroup: | ||
1393 | if p.oe.top().DataAtom != a.Html { | ||
1394 | p.oe.pop() | ||
1395 | p.im = inTableIM | ||
1396 | } | ||
1397 | return true | ||
1398 | case a.Col: | ||
1399 | // Ignore the token. | ||
1400 | return true | ||
1401 | } | ||
1402 | } | ||
1403 | if p.oe.top().DataAtom != a.Html { | ||
1404 | p.oe.pop() | ||
1405 | p.im = inTableIM | ||
1406 | return false | ||
1407 | } | ||
1408 | return true | ||
1409 | } | ||
1410 | |||
1411 | // Section 12.2.5.4.13. | ||
1412 | func inTableBodyIM(p *parser) bool { | ||
1413 | switch p.tok.Type { | ||
1414 | case StartTagToken: | ||
1415 | switch p.tok.DataAtom { | ||
1416 | case a.Tr: | ||
1417 | p.clearStackToContext(tableBodyScope) | ||
1418 | p.addElement() | ||
1419 | p.im = inRowIM | ||
1420 | return true | ||
1421 | case a.Td, a.Th: | ||
1422 | p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String()) | ||
1423 | return false | ||
1424 | case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead: | ||
1425 | if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) { | ||
1426 | p.im = inTableIM | ||
1427 | return false | ||
1428 | } | ||
1429 | // Ignore the token. | ||
1430 | return true | ||
1431 | } | ||
1432 | case EndTagToken: | ||
1433 | switch p.tok.DataAtom { | ||
1434 | case a.Tbody, a.Tfoot, a.Thead: | ||
1435 | if p.elementInScope(tableScope, p.tok.DataAtom) { | ||
1436 | p.clearStackToContext(tableBodyScope) | ||
1437 | p.oe.pop() | ||
1438 | p.im = inTableIM | ||
1439 | } | ||
1440 | return true | ||
1441 | case a.Table: | ||
1442 | if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) { | ||
1443 | p.im = inTableIM | ||
1444 | return false | ||
1445 | } | ||
1446 | // Ignore the token. | ||
1447 | return true | ||
1448 | case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr: | ||
1449 | // Ignore the token. | ||
1450 | return true | ||
1451 | } | ||
1452 | case CommentToken: | ||
1453 | p.addChild(&Node{ | ||
1454 | Type: CommentNode, | ||
1455 | Data: p.tok.Data, | ||
1456 | }) | ||
1457 | return true | ||
1458 | } | ||
1459 | |||
1460 | return inTableIM(p) | ||
1461 | } | ||
1462 | |||
1463 | // Section 12.2.5.4.14. | ||
1464 | func inRowIM(p *parser) bool { | ||
1465 | switch p.tok.Type { | ||
1466 | case StartTagToken: | ||
1467 | switch p.tok.DataAtom { | ||
1468 | case a.Td, a.Th: | ||
1469 | p.clearStackToContext(tableRowScope) | ||
1470 | p.addElement() | ||
1471 | p.afe = append(p.afe, &scopeMarker) | ||
1472 | p.im = inCellIM | ||
1473 | return true | ||
1474 | case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr: | ||
1475 | if p.popUntil(tableScope, a.Tr) { | ||
1476 | p.im = inTableBodyIM | ||
1477 | return false | ||
1478 | } | ||
1479 | // Ignore the token. | ||
1480 | return true | ||
1481 | } | ||
1482 | case EndTagToken: | ||
1483 | switch p.tok.DataAtom { | ||
1484 | case a.Tr: | ||
1485 | if p.popUntil(tableScope, a.Tr) { | ||
1486 | p.im = inTableBodyIM | ||
1487 | return true | ||
1488 | } | ||
1489 | // Ignore the token. | ||
1490 | return true | ||
1491 | case a.Table: | ||
1492 | if p.popUntil(tableScope, a.Tr) { | ||
1493 | p.im = inTableBodyIM | ||
1494 | return false | ||
1495 | } | ||
1496 | // Ignore the token. | ||
1497 | return true | ||
1498 | case a.Tbody, a.Tfoot, a.Thead: | ||
1499 | if p.elementInScope(tableScope, p.tok.DataAtom) { | ||
1500 | p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String()) | ||
1501 | return false | ||
1502 | } | ||
1503 | // Ignore the token. | ||
1504 | return true | ||
1505 | case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th: | ||
1506 | // Ignore the token. | ||
1507 | return true | ||
1508 | } | ||
1509 | } | ||
1510 | |||
1511 | return inTableIM(p) | ||
1512 | } | ||
1513 | |||
1514 | // Section 12.2.5.4.15. | ||
1515 | func inCellIM(p *parser) bool { | ||
1516 | switch p.tok.Type { | ||
1517 | case StartTagToken: | ||
1518 | switch p.tok.DataAtom { | ||
1519 | case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr: | ||
1520 | if p.popUntil(tableScope, a.Td, a.Th) { | ||
1521 | // Close the cell and reprocess. | ||
1522 | p.clearActiveFormattingElements() | ||
1523 | p.im = inRowIM | ||
1524 | return false | ||
1525 | } | ||
1526 | // Ignore the token. | ||
1527 | return true | ||
1528 | case a.Select: | ||
1529 | p.reconstructActiveFormattingElements() | ||
1530 | p.addElement() | ||
1531 | p.framesetOK = false | ||
1532 | p.im = inSelectInTableIM | ||
1533 | return true | ||
1534 | } | ||
1535 | case EndTagToken: | ||
1536 | switch p.tok.DataAtom { | ||
1537 | case a.Td, a.Th: | ||
1538 | if !p.popUntil(tableScope, p.tok.DataAtom) { | ||
1539 | // Ignore the token. | ||
1540 | return true | ||
1541 | } | ||
1542 | p.clearActiveFormattingElements() | ||
1543 | p.im = inRowIM | ||
1544 | return true | ||
1545 | case a.Body, a.Caption, a.Col, a.Colgroup, a.Html: | ||
1546 | // Ignore the token. | ||
1547 | return true | ||
1548 | case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr: | ||
1549 | if !p.elementInScope(tableScope, p.tok.DataAtom) { | ||
1550 | // Ignore the token. | ||
1551 | return true | ||
1552 | } | ||
1553 | // Close the cell and reprocess. | ||
1554 | p.popUntil(tableScope, a.Td, a.Th) | ||
1555 | p.clearActiveFormattingElements() | ||
1556 | p.im = inRowIM | ||
1557 | return false | ||
1558 | } | ||
1559 | } | ||
1560 | return inBodyIM(p) | ||
1561 | } | ||
1562 | |||
1563 | // Section 12.2.5.4.16. | ||
1564 | func inSelectIM(p *parser) bool { | ||
1565 | switch p.tok.Type { | ||
1566 | case ErrorToken: | ||
1567 | // Stop parsing. | ||
1568 | return true | ||
1569 | case TextToken: | ||
1570 | p.addText(strings.Replace(p.tok.Data, "\x00", "", -1)) | ||
1571 | case StartTagToken: | ||
1572 | switch p.tok.DataAtom { | ||
1573 | case a.Html: | ||
1574 | return inBodyIM(p) | ||
1575 | case a.Option: | ||
1576 | if p.top().DataAtom == a.Option { | ||
1577 | p.oe.pop() | ||
1578 | } | ||
1579 | p.addElement() | ||
1580 | case a.Optgroup: | ||
1581 | if p.top().DataAtom == a.Option { | ||
1582 | p.oe.pop() | ||
1583 | } | ||
1584 | if p.top().DataAtom == a.Optgroup { | ||
1585 | p.oe.pop() | ||
1586 | } | ||
1587 | p.addElement() | ||
1588 | case a.Select: | ||
1589 | p.tok.Type = EndTagToken | ||
1590 | return false | ||
1591 | case a.Input, a.Keygen, a.Textarea: | ||
1592 | if p.elementInScope(selectScope, a.Select) { | ||
1593 | p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) | ||
1594 | return false | ||
1595 | } | ||
1596 | // In order to properly ignore <textarea>, we need to change the tokenizer mode. | ||
1597 | p.tokenizer.NextIsNotRawText() | ||
1598 | // Ignore the token. | ||
1599 | return true | ||
1600 | case a.Script: | ||
1601 | return inHeadIM(p) | ||
1602 | } | ||
1603 | case EndTagToken: | ||
1604 | switch p.tok.DataAtom { | ||
1605 | case a.Option: | ||
1606 | if p.top().DataAtom == a.Option { | ||
1607 | p.oe.pop() | ||
1608 | } | ||
1609 | case a.Optgroup: | ||
1610 | i := len(p.oe) - 1 | ||
1611 | if p.oe[i].DataAtom == a.Option { | ||
1612 | i-- | ||
1613 | } | ||
1614 | if p.oe[i].DataAtom == a.Optgroup { | ||
1615 | p.oe = p.oe[:i] | ||
1616 | } | ||
1617 | case a.Select: | ||
1618 | if p.popUntil(selectScope, a.Select) { | ||
1619 | p.resetInsertionMode() | ||
1620 | } | ||
1621 | } | ||
1622 | case CommentToken: | ||
1623 | p.addChild(&Node{ | ||
1624 | Type: CommentNode, | ||
1625 | Data: p.tok.Data, | ||
1626 | }) | ||
1627 | case DoctypeToken: | ||
1628 | // Ignore the token. | ||
1629 | return true | ||
1630 | } | ||
1631 | |||
1632 | return true | ||
1633 | } | ||
1634 | |||
1635 | // Section 12.2.5.4.17. | ||
1636 | func inSelectInTableIM(p *parser) bool { | ||
1637 | switch p.tok.Type { | ||
1638 | case StartTagToken, EndTagToken: | ||
1639 | switch p.tok.DataAtom { | ||
1640 | case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th: | ||
1641 | if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) { | ||
1642 | p.parseImpliedToken(EndTagToken, a.Select, a.Select.String()) | ||
1643 | return false | ||
1644 | } else { | ||
1645 | // Ignore the token. | ||
1646 | return true | ||
1647 | } | ||
1648 | } | ||
1649 | } | ||
1650 | return inSelectIM(p) | ||
1651 | } | ||
1652 | |||
1653 | // Section 12.2.5.4.18. | ||
1654 | func afterBodyIM(p *parser) bool { | ||
1655 | switch p.tok.Type { | ||
1656 | case ErrorToken: | ||
1657 | // Stop parsing. | ||
1658 | return true | ||
1659 | case TextToken: | ||
1660 | s := strings.TrimLeft(p.tok.Data, whitespace) | ||
1661 | if len(s) == 0 { | ||
1662 | // It was all whitespace. | ||
1663 | return inBodyIM(p) | ||
1664 | } | ||
1665 | case StartTagToken: | ||
1666 | if p.tok.DataAtom == a.Html { | ||
1667 | return inBodyIM(p) | ||
1668 | } | ||
1669 | case EndTagToken: | ||
1670 | if p.tok.DataAtom == a.Html { | ||
1671 | if !p.fragment { | ||
1672 | p.im = afterAfterBodyIM | ||
1673 | } | ||
1674 | return true | ||
1675 | } | ||
1676 | case CommentToken: | ||
1677 | // The comment is attached to the <html> element. | ||
1678 | if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html { | ||
1679 | panic("html: bad parser state: <html> element not found, in the after-body insertion mode") | ||
1680 | } | ||
1681 | p.oe[0].AppendChild(&Node{ | ||
1682 | Type: CommentNode, | ||
1683 | Data: p.tok.Data, | ||
1684 | }) | ||
1685 | return true | ||
1686 | } | ||
1687 | p.im = inBodyIM | ||
1688 | return false | ||
1689 | } | ||
1690 | |||
1691 | // Section 12.2.5.4.19. | ||
1692 | func inFramesetIM(p *parser) bool { | ||
1693 | switch p.tok.Type { | ||
1694 | case CommentToken: | ||
1695 | p.addChild(&Node{ | ||
1696 | Type: CommentNode, | ||
1697 | Data: p.tok.Data, | ||
1698 | }) | ||
1699 | case TextToken: | ||
1700 | // Ignore all text but whitespace. | ||
1701 | s := strings.Map(func(c rune) rune { | ||
1702 | switch c { | ||
1703 | case ' ', '\t', '\n', '\f', '\r': | ||
1704 | return c | ||
1705 | } | ||
1706 | return -1 | ||
1707 | }, p.tok.Data) | ||
1708 | if s != "" { | ||
1709 | p.addText(s) | ||
1710 | } | ||
1711 | case StartTagToken: | ||
1712 | switch p.tok.DataAtom { | ||
1713 | case a.Html: | ||
1714 | return inBodyIM(p) | ||
1715 | case a.Frameset: | ||
1716 | p.addElement() | ||
1717 | case a.Frame: | ||
1718 | p.addElement() | ||
1719 | p.oe.pop() | ||
1720 | p.acknowledgeSelfClosingTag() | ||
1721 | case a.Noframes: | ||
1722 | return inHeadIM(p) | ||
1723 | } | ||
1724 | case EndTagToken: | ||
1725 | switch p.tok.DataAtom { | ||
1726 | case a.Frameset: | ||
1727 | if p.oe.top().DataAtom != a.Html { | ||
1728 | p.oe.pop() | ||
1729 | if p.oe.top().DataAtom != a.Frameset { | ||
1730 | p.im = afterFramesetIM | ||
1731 | return true | ||
1732 | } | ||
1733 | } | ||
1734 | } | ||
1735 | default: | ||
1736 | // Ignore the token. | ||
1737 | } | ||
1738 | return true | ||
1739 | } | ||
1740 | |||
1741 | // Section 12.2.5.4.20. | ||
1742 | func afterFramesetIM(p *parser) bool { | ||
1743 | switch p.tok.Type { | ||
1744 | case CommentToken: | ||
1745 | p.addChild(&Node{ | ||
1746 | Type: CommentNode, | ||
1747 | Data: p.tok.Data, | ||
1748 | }) | ||
1749 | case TextToken: | ||
1750 | // Ignore all text but whitespace. | ||
1751 | s := strings.Map(func(c rune) rune { | ||
1752 | switch c { | ||
1753 | case ' ', '\t', '\n', '\f', '\r': | ||
1754 | return c | ||
1755 | } | ||
1756 | return -1 | ||
1757 | }, p.tok.Data) | ||
1758 | if s != "" { | ||
1759 | p.addText(s) | ||
1760 | } | ||
1761 | case StartTagToken: | ||
1762 | switch p.tok.DataAtom { | ||
1763 | case a.Html: | ||
1764 | return inBodyIM(p) | ||
1765 | case a.Noframes: | ||
1766 | return inHeadIM(p) | ||
1767 | } | ||
1768 | case EndTagToken: | ||
1769 | switch p.tok.DataAtom { | ||
1770 | case a.Html: | ||
1771 | p.im = afterAfterFramesetIM | ||
1772 | return true | ||
1773 | } | ||
1774 | default: | ||
1775 | // Ignore the token. | ||
1776 | } | ||
1777 | return true | ||
1778 | } | ||
1779 | |||
1780 | // Section 12.2.5.4.21. | ||
1781 | func afterAfterBodyIM(p *parser) bool { | ||
1782 | switch p.tok.Type { | ||
1783 | case ErrorToken: | ||
1784 | // Stop parsing. | ||
1785 | return true | ||
1786 | case TextToken: | ||
1787 | s := strings.TrimLeft(p.tok.Data, whitespace) | ||
1788 | if len(s) == 0 { | ||
1789 | // It was all whitespace. | ||
1790 | return inBodyIM(p) | ||
1791 | } | ||
1792 | case StartTagToken: | ||
1793 | if p.tok.DataAtom == a.Html { | ||
1794 | return inBodyIM(p) | ||
1795 | } | ||
1796 | case CommentToken: | ||
1797 | p.doc.AppendChild(&Node{ | ||
1798 | Type: CommentNode, | ||
1799 | Data: p.tok.Data, | ||
1800 | }) | ||
1801 | return true | ||
1802 | case DoctypeToken: | ||
1803 | return inBodyIM(p) | ||
1804 | } | ||
1805 | p.im = inBodyIM | ||
1806 | return false | ||
1807 | } | ||
1808 | |||
1809 | // Section 12.2.5.4.22. | ||
1810 | func afterAfterFramesetIM(p *parser) bool { | ||
1811 | switch p.tok.Type { | ||
1812 | case CommentToken: | ||
1813 | p.doc.AppendChild(&Node{ | ||
1814 | Type: CommentNode, | ||
1815 | Data: p.tok.Data, | ||
1816 | }) | ||
1817 | case TextToken: | ||
1818 | // Ignore all text but whitespace. | ||
1819 | s := strings.Map(func(c rune) rune { | ||
1820 | switch c { | ||
1821 | case ' ', '\t', '\n', '\f', '\r': | ||
1822 | return c | ||
1823 | } | ||
1824 | return -1 | ||
1825 | }, p.tok.Data) | ||
1826 | if s != "" { | ||
1827 | p.tok.Data = s | ||
1828 | return inBodyIM(p) | ||
1829 | } | ||
1830 | case StartTagToken: | ||
1831 | switch p.tok.DataAtom { | ||
1832 | case a.Html: | ||
1833 | return inBodyIM(p) | ||
1834 | case a.Noframes: | ||
1835 | return inHeadIM(p) | ||
1836 | } | ||
1837 | case DoctypeToken: | ||
1838 | return inBodyIM(p) | ||
1839 | default: | ||
1840 | // Ignore the token. | ||
1841 | } | ||
1842 | return true | ||
1843 | } | ||
1844 | |||
1845 | const whitespaceOrNUL = whitespace + "\x00" | ||
1846 | |||
1847 | // Section 12.2.5.5. | ||
1848 | func parseForeignContent(p *parser) bool { | ||
1849 | switch p.tok.Type { | ||
1850 | case TextToken: | ||
1851 | if p.framesetOK { | ||
1852 | p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == "" | ||
1853 | } | ||
1854 | p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1) | ||
1855 | p.addText(p.tok.Data) | ||
1856 | case CommentToken: | ||
1857 | p.addChild(&Node{ | ||
1858 | Type: CommentNode, | ||
1859 | Data: p.tok.Data, | ||
1860 | }) | ||
1861 | case StartTagToken: | ||
1862 | b := breakout[p.tok.Data] | ||
1863 | if p.tok.DataAtom == a.Font { | ||
1864 | loop: | ||
1865 | for _, attr := range p.tok.Attr { | ||
1866 | switch attr.Key { | ||
1867 | case "color", "face", "size": | ||
1868 | b = true | ||
1869 | break loop | ||
1870 | } | ||
1871 | } | ||
1872 | } | ||
1873 | if b { | ||
1874 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
1875 | n := p.oe[i] | ||
1876 | if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) { | ||
1877 | p.oe = p.oe[:i+1] | ||
1878 | break | ||
1879 | } | ||
1880 | } | ||
1881 | return false | ||
1882 | } | ||
1883 | switch p.top().Namespace { | ||
1884 | case "math": | ||
1885 | adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments) | ||
1886 | case "svg": | ||
1887 | // Adjust SVG tag names. The tokenizer lower-cases tag names, but | ||
1888 | // SVG wants e.g. "foreignObject" with a capital second "O". | ||
1889 | if x := svgTagNameAdjustments[p.tok.Data]; x != "" { | ||
1890 | p.tok.DataAtom = a.Lookup([]byte(x)) | ||
1891 | p.tok.Data = x | ||
1892 | } | ||
1893 | adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments) | ||
1894 | default: | ||
1895 | panic("html: bad parser state: unexpected namespace") | ||
1896 | } | ||
1897 | adjustForeignAttributes(p.tok.Attr) | ||
1898 | namespace := p.top().Namespace | ||
1899 | p.addElement() | ||
1900 | p.top().Namespace = namespace | ||
1901 | if namespace != "" { | ||
1902 | // Don't let the tokenizer go into raw text mode in foreign content | ||
1903 | // (e.g. in an SVG <title> tag). | ||
1904 | p.tokenizer.NextIsNotRawText() | ||
1905 | } | ||
1906 | if p.hasSelfClosingToken { | ||
1907 | p.oe.pop() | ||
1908 | p.acknowledgeSelfClosingTag() | ||
1909 | } | ||
1910 | case EndTagToken: | ||
1911 | for i := len(p.oe) - 1; i >= 0; i-- { | ||
1912 | if p.oe[i].Namespace == "" { | ||
1913 | return p.im(p) | ||
1914 | } | ||
1915 | if strings.EqualFold(p.oe[i].Data, p.tok.Data) { | ||
1916 | p.oe = p.oe[:i] | ||
1917 | break | ||
1918 | } | ||
1919 | } | ||
1920 | return true | ||
1921 | default: | ||
1922 | // Ignore the token. | ||
1923 | } | ||
1924 | return true | ||
1925 | } | ||
1926 | |||
1927 | // Section 12.2.5. | ||
1928 | func (p *parser) inForeignContent() bool { | ||
1929 | if len(p.oe) == 0 { | ||
1930 | return false | ||
1931 | } | ||
1932 | n := p.oe[len(p.oe)-1] | ||
1933 | if n.Namespace == "" { | ||
1934 | return false | ||
1935 | } | ||
1936 | if mathMLTextIntegrationPoint(n) { | ||
1937 | if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark { | ||
1938 | return false | ||
1939 | } | ||
1940 | if p.tok.Type == TextToken { | ||
1941 | return false | ||
1942 | } | ||
1943 | } | ||
1944 | if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg { | ||
1945 | return false | ||
1946 | } | ||
1947 | if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) { | ||
1948 | return false | ||
1949 | } | ||
1950 | if p.tok.Type == ErrorToken { | ||
1951 | return false | ||
1952 | } | ||
1953 | return true | ||
1954 | } | ||
1955 | |||
1956 | // parseImpliedToken parses a token as though it had appeared in the parser's | ||
1957 | // input. | ||
1958 | func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) { | ||
1959 | realToken, selfClosing := p.tok, p.hasSelfClosingToken | ||
1960 | p.tok = Token{ | ||
1961 | Type: t, | ||
1962 | DataAtom: dataAtom, | ||
1963 | Data: data, | ||
1964 | } | ||
1965 | p.hasSelfClosingToken = false | ||
1966 | p.parseCurrentToken() | ||
1967 | p.tok, p.hasSelfClosingToken = realToken, selfClosing | ||
1968 | } | ||
1969 | |||
1970 | // parseCurrentToken runs the current token through the parsing routines | ||
1971 | // until it is consumed. | ||
1972 | func (p *parser) parseCurrentToken() { | ||
1973 | if p.tok.Type == SelfClosingTagToken { | ||
1974 | p.hasSelfClosingToken = true | ||
1975 | p.tok.Type = StartTagToken | ||
1976 | } | ||
1977 | |||
1978 | consumed := false | ||
1979 | for !consumed { | ||
1980 | if p.inForeignContent() { | ||
1981 | consumed = parseForeignContent(p) | ||
1982 | } else { | ||
1983 | consumed = p.im(p) | ||
1984 | } | ||
1985 | } | ||
1986 | |||
1987 | if p.hasSelfClosingToken { | ||
1988 | // This is a parse error, but ignore it. | ||
1989 | p.hasSelfClosingToken = false | ||
1990 | } | ||
1991 | } | ||
1992 | |||
1993 | func (p *parser) parse() error { | ||
1994 | // Iterate until EOF. Any other error will cause an early return. | ||
1995 | var err error | ||
1996 | for err != io.EOF { | ||
1997 | // CDATA sections are allowed only in foreign content. | ||
1998 | n := p.oe.top() | ||
1999 | p.tokenizer.AllowCDATA(n != nil && n.Namespace != "") | ||
2000 | // Read and parse the next token. | ||
2001 | p.tokenizer.Next() | ||
2002 | p.tok = p.tokenizer.Token() | ||
2003 | if p.tok.Type == ErrorToken { | ||
2004 | err = p.tokenizer.Err() | ||
2005 | if err != nil && err != io.EOF { | ||
2006 | return err | ||
2007 | } | ||
2008 | } | ||
2009 | p.parseCurrentToken() | ||
2010 | } | ||
2011 | return nil | ||
2012 | } | ||
2013 | |||
2014 | // Parse returns the parse tree for the HTML from the given Reader. | ||
2015 | // The input is assumed to be UTF-8 encoded. | ||
2016 | func Parse(r io.Reader) (*Node, error) { | ||
2017 | p := &parser{ | ||
2018 | tokenizer: NewTokenizer(r), | ||
2019 | doc: &Node{ | ||
2020 | Type: DocumentNode, | ||
2021 | }, | ||
2022 | scripting: true, | ||
2023 | framesetOK: true, | ||
2024 | im: initialIM, | ||
2025 | } | ||
2026 | err := p.parse() | ||
2027 | if err != nil { | ||
2028 | return nil, err | ||
2029 | } | ||
2030 | return p.doc, nil | ||
2031 | } | ||
2032 | |||
2033 | // ParseFragment parses a fragment of HTML and returns the nodes that were | ||
2034 | // found. If the fragment is the InnerHTML for an existing element, pass that | ||
2035 | // element in context. | ||
2036 | func ParseFragment(r io.Reader, context *Node) ([]*Node, error) { | ||
2037 | contextTag := "" | ||
2038 | if context != nil { | ||
2039 | if context.Type != ElementNode { | ||
2040 | return nil, errors.New("html: ParseFragment of non-element Node") | ||
2041 | } | ||
2042 | // The next check isn't just context.DataAtom.String() == context.Data because | ||
2043 | // it is valid to pass an element whose tag isn't a known atom. For example, | ||
2044 | // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent. | ||
2045 | if context.DataAtom != a.Lookup([]byte(context.Data)) { | ||
2046 | return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data) | ||
2047 | } | ||
2048 | contextTag = context.DataAtom.String() | ||
2049 | } | ||
2050 | p := &parser{ | ||
2051 | tokenizer: NewTokenizerFragment(r, contextTag), | ||
2052 | doc: &Node{ | ||
2053 | Type: DocumentNode, | ||
2054 | }, | ||
2055 | scripting: true, | ||
2056 | fragment: true, | ||
2057 | context: context, | ||
2058 | } | ||
2059 | |||
2060 | root := &Node{ | ||
2061 | Type: ElementNode, | ||
2062 | DataAtom: a.Html, | ||
2063 | Data: a.Html.String(), | ||
2064 | } | ||
2065 | p.doc.AppendChild(root) | ||
2066 | p.oe = nodeStack{root} | ||
2067 | p.resetInsertionMode() | ||
2068 | |||
2069 | for n := context; n != nil; n = n.Parent { | ||
2070 | if n.Type == ElementNode && n.DataAtom == a.Form { | ||
2071 | p.form = n | ||
2072 | break | ||
2073 | } | ||
2074 | } | ||
2075 | |||
2076 | err := p.parse() | ||
2077 | if err != nil { | ||
2078 | return nil, err | ||
2079 | } | ||
2080 | |||
2081 | parent := p.doc | ||
2082 | if context != nil { | ||
2083 | parent = root | ||
2084 | } | ||
2085 | |||
2086 | var result []*Node | ||
2087 | for c := parent.FirstChild; c != nil; { | ||
2088 | next := c.NextSibling | ||
2089 | parent.RemoveChild(c) | ||
2090 | result = append(result, c) | ||
2091 | c = next | ||
2092 | } | ||
2093 | return result, nil | ||
2094 | } | ||