vendor/golang.org/x/net/html/parse.go

   1 // Copyright 2010 The Go Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style
   3 // license that can be found in the LICENSE file.
   4
   5 package html
   6
   7 import (
   8         "errors"
   9         "fmt"
  10         "io"
  11         "strings"
  12
  13         a "golang.org/x/net/html/atom"
  14 )
  15
  16 // A parser implements the HTML5 parsing algorithm:
  17 // https://html.spec.whatwg.org/multipage/syntax.html#tree-construction
  18 type parser struct {
  19         // tokenizer provides the tokens for the parser.
  20         tokenizer *Tokenizer
  21         // tok is the most recently read token.
  22         tok Token
  23         // Self-closing tags like <hr/> are treated as start tags, except that
  24         // hasSelfClosingToken is set while they are being processed.
  25         hasSelfClosingToken bool
  26         // doc is the document root element.
  27         doc *Node
  28         // The stack of open elements (section 12.2.3.2) and active formatting
  29         // elements (section 12.2.3.3).
  30         oe, afe nodeStack
  31         // Element pointers (section 12.2.3.4).
  32         head, form *Node
  33         // Other parsing state flags (section 12.2.3.5).
  34         scripting, framesetOK bool
  35         // im is the current insertion mode.
  36         im insertionMode
  37         // originalIM is the insertion mode to go back to after completing a text
  38         // or inTableText insertion mode.
  39         originalIM insertionMode
  40         // fosterParenting is whether new elements should be inserted according to
  41         // the foster parenting rules (section 12.2.5.3).
  42         fosterParenting bool
  43         // quirks is whether the parser is operating in "quirks mode."
  44         quirks bool
  45         // fragment is whether the parser is parsing an HTML fragment.
  46         fragment bool
  47         // context is the context element when parsing an HTML fragment
  48         // (section 12.4).
  49         context *Node
  50 }
  51
  52 func (p *parser) top() *Node {
  53         if n := p.oe.top(); n != nil {
  54                 return n
  55         }
  56         return p.doc
  57 }
  58
  59 // Stop tags for use in popUntil. These come from section 12.2.3.2.
  60 var (
  61         defaultScopeStopTags = map[string][]a.Atom{
  62                 "":     {a.Applet, a.Caption, a.Html, a.Table, a.Td, a.Th, a.Marquee, a.Object, a.Template},
  63                 "math": {a.AnnotationXml, a.Mi, a.Mn, a.Mo, a.Ms, a.Mtext},
  64                 "svg":  {a.Desc, a.ForeignObject, a.Title},
  65         }
  66 )
  67
  68 type scope int
  69
  70 const (
  71         defaultScope scope = iota
  72         listItemScope
  73         buttonScope
  74         tableScope
  75         tableRowScope
  76         tableBodyScope
  77         selectScope
  78 )
  79
  80 // popUntil pops the stack of open elements at the highest element whose tag
  81 // is in matchTags, provided there is no higher element in the scope's stop
  82 // tags (as defined in section 12.2.3.2). It returns whether or not there was
  83 // such an element. If there was not, popUntil leaves the stack unchanged.
  84 //
  85 // For example, the set of stop tags for table scope is: "html", "table". If
  86 // the stack was:
  87 // ["html", "body", "font", "table", "b", "i", "u"]
  88 // then popUntil(tableScope, "font") would return false, but
  89 // popUntil(tableScope, "i") would return true and the stack would become:
  90 // ["html", "body", "font", "table", "b"]
  91 //
  92 // If an element's tag is in both the stop tags and matchTags, then the stack
  93 // will be popped and the function returns true (provided, of course, there was
  94 // no higher element in the stack that was also in the stop tags). For example,
  95 // popUntil(tableScope, "table") returns true and leaves:
  96 // ["html", "body", "font"]
  97 func (p *parser) popUntil(s scope, matchTags ...a.Atom) bool {
  98         if i := p.indexOfElementInScope(s, matchTags...); i != -1 {
  99                 p.oe = p.oe[:i]
 100                 return true
 101         }
 102         return false
 103 }
 104
 105 // indexOfElementInScope returns the index in p.oe of the highest element whose
 106 // tag is in matchTags that is in scope. If no matching element is in scope, it
 107 // returns -1.
 108 func (p *parser) indexOfElementInScope(s scope, matchTags ...a.Atom) int {
 109         for i := len(p.oe) - 1; i >= 0; i-- {
 110                 tagAtom := p.oe[i].DataAtom
 111                 if p.oe[i].Namespace == "" {
 112                         for _, t := range matchTags {
 113                                 if t == tagAtom {
 114                                         return i
 115                                 }
 116                         }
 117                         switch s {
 118                         case defaultScope:
 119                                 // No-op.
 120                         case listItemScope:
 121                                 if tagAtom == a.Ol || tagAtom == a.Ul {
 122                                         return -1
 123                                 }
 124                         case buttonScope:
 125                                 if tagAtom == a.Button {
 126                                         return -1
 127                                 }
 128                         case tableScope:
 129                                 if tagAtom == a.Html || tagAtom == a.Table {
 130                                         return -1
 131                                 }
 132                         case selectScope:
 133                                 if tagAtom != a.Optgroup && tagAtom != a.Option {
 134                                         return -1
 135                                 }
 136                         default:
 137                                 panic("unreachable")
 138                         }
 139                 }
 140                 switch s {
 141                 case defaultScope, listItemScope, buttonScope:
 142                         for _, t := range defaultScopeStopTags[p.oe[i].Namespace] {
 143                                 if t == tagAtom {
 144                                         return -1
 145                                 }
 146                         }
 147                 }
 148         }
 149         return -1
 150 }
 151
 152 // elementInScope is like popUntil, except that it doesn't modify the stack of
 153 // open elements.
 154 func (p *parser) elementInScope(s scope, matchTags ...a.Atom) bool {
 155         return p.indexOfElementInScope(s, matchTags...) != -1
 156 }
 157
 158 // clearStackToContext pops elements off the stack of open elements until a
 159 // scope-defined element is found.
 160 func (p *parser) clearStackToContext(s scope) {
 161         for i := len(p.oe) - 1; i >= 0; i-- {
 162                 tagAtom := p.oe[i].DataAtom
 163                 switch s {
 164                 case tableScope:
 165                         if tagAtom == a.Html || tagAtom == a.Table {
 166                                 p.oe = p.oe[:i+1]
 167                                 return
 168                         }
 169                 case tableRowScope:
 170                         if tagAtom == a.Html || tagAtom == a.Tr {
 171                                 p.oe = p.oe[:i+1]
 172                                 return
 173                         }
 174                 case tableBodyScope:
 175                         if tagAtom == a.Html || tagAtom == a.Tbody || tagAtom == a.Tfoot || tagAtom == a.Thead {
 176                                 p.oe = p.oe[:i+1]
 177                                 return
 178                         }
 179                 default:
 180                         panic("unreachable")
 181                 }
 182         }
 183 }
 184
 185 // generateImpliedEndTags pops nodes off the stack of open elements as long as
 186 // the top node has a tag name of dd, dt, li, option, optgroup, p, rp, or rt.
 187 // If exceptions are specified, nodes with that name will not be popped off.
 188 func (p *parser) generateImpliedEndTags(exceptions ...string) {
 189         var i int
 190 loop:
 191         for i = len(p.oe) - 1; i >= 0; i-- {
 192                 n := p.oe[i]
 193                 if n.Type == ElementNode {
 194                         switch n.DataAtom {
 195                         case a.Dd, a.Dt, a.Li, a.Option, a.Optgroup, a.P, a.Rp, a.Rt:
 196                                 for _, except := range exceptions {
 197                                         if n.Data == except {
 198                                                 break loop
 199                                         }
 200                                 }
 201                                 continue
 202                         }
 203                 }
 204                 break
 205         }
 206
 207         p.oe = p.oe[:i+1]
 208 }
 209
 210 // addChild adds a child node n to the top element, and pushes n onto the stack
 211 // of open elements if it is an element node.
 212 func (p *parser) addChild(n *Node) {
 213         if p.shouldFosterParent() {
 214                 p.fosterParent(n)
 215         } else {
 216                 p.top().AppendChild(n)
 217         }
 218
 219         if n.Type == ElementNode {
 220                 p.oe = append(p.oe, n)
 221         }
 222 }
 223
 224 // shouldFosterParent returns whether the next node to be added should be
 225 // foster parented.
 226 func (p *parser) shouldFosterParent() bool {
 227         if p.fosterParenting {
 228                 switch p.top().DataAtom {
 229                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
 230                         return true
 231                 }
 232         }
 233         return false
 234 }
 235
 236 // fosterParent adds a child node according to the foster parenting rules.
 237 // Section 12.2.5.3, "foster parenting".
 238 func (p *parser) fosterParent(n *Node) {
 239         var table, parent, prev *Node
 240         var i int
 241         for i = len(p.oe) - 1; i >= 0; i-- {
 242                 if p.oe[i].DataAtom == a.Table {
 243                         table = p.oe[i]
 244                         break
 245                 }
 246         }
 247
 248         if table == nil {
 249                 // The foster parent is the html element.
 250                 parent = p.oe[0]
 251         } else {
 252                 parent = table.Parent
 253         }
 254         if parent == nil {
 255                 parent = p.oe[i-1]
 256         }
 257
 258         if table != nil {
 259                 prev = table.PrevSibling
 260         } else {
 261                 prev = parent.LastChild
 262         }
 263         if prev != nil && prev.Type == TextNode && n.Type == TextNode {
 264                 prev.Data += n.Data
 265                 return
 266         }
 267
 268         parent.InsertBefore(n, table)
 269 }
 270
 271 // addText adds text to the preceding node if it is a text node, or else it
 272 // calls addChild with a new text node.
 273 func (p *parser) addText(text string) {
 274         if text == "" {
 275                 return
 276         }
 277
 278         if p.shouldFosterParent() {
 279                 p.fosterParent(&Node{
 280                         Type: TextNode,
 281                         Data: text,
 282                 })
 283                 return
 284         }
 285
 286         t := p.top()
 287         if n := t.LastChild; n != nil && n.Type == TextNode {
 288                 n.Data += text
 289                 return
 290         }
 291         p.addChild(&Node{
 292                 Type: TextNode,
 293                 Data: text,
 294         })
 295 }
 296
 297 // addElement adds a child element based on the current token.
 298 func (p *parser) addElement() {
 299         p.addChild(&Node{
 300                 Type:     ElementNode,
 301                 DataAtom: p.tok.DataAtom,
 302                 Data:     p.tok.Data,
 303                 Attr:     p.tok.Attr,
 304         })
 305 }
 306
 307 // Section 12.2.3.3.
 308 func (p *parser) addFormattingElement() {
 309         tagAtom, attr := p.tok.DataAtom, p.tok.Attr
 310         p.addElement()
 311
 312         // Implement the Noah's Ark clause, but with three per family instead of two.
 313         identicalElements := 0
 314 findIdenticalElements:
 315         for i := len(p.afe) - 1; i >= 0; i-- {
 316                 n := p.afe[i]
 317                 if n.Type == scopeMarkerNode {
 318                         break
 319                 }
 320                 if n.Type != ElementNode {
 321                         continue
 322                 }
 323                 if n.Namespace != "" {
 324                         continue
 325                 }
 326                 if n.DataAtom != tagAtom {
 327                         continue
 328                 }
 329                 if len(n.Attr) != len(attr) {
 330                         continue
 331                 }
 332         compareAttributes:
 333                 for _, t0 := range n.Attr {
 334                         for _, t1 := range attr {
 335                                 if t0.Key == t1.Key && t0.Namespace == t1.Namespace && t0.Val == t1.Val {
 336                                         // Found a match for this attribute, continue with the next attribute.
 337                                         continue compareAttributes
 338                                 }
 339                         }
 340                         // If we get here, there is no attribute that matches a.
 341                         // Therefore the element is not identical to the new one.
 342                         continue findIdenticalElements
 343                 }
 344
 345                 identicalElements++
 346                 if identicalElements >= 3 {
 347                         p.afe.remove(n)
 348                 }
 349         }
 350
 351         p.afe = append(p.afe, p.top())
 352 }
 353
 354 // Section 12.2.3.3.
 355 func (p *parser) clearActiveFormattingElements() {
 356         for {
 357                 n := p.afe.pop()
 358                 if len(p.afe) == 0 || n.Type == scopeMarkerNode {
 359                         return
 360                 }
 361         }
 362 }
 363
 364 // Section 12.2.3.3.
 365 func (p *parser) reconstructActiveFormattingElements() {
 366         n := p.afe.top()
 367         if n == nil {
 368                 return
 369         }
 370         if n.Type == scopeMarkerNode || p.oe.index(n) != -1 {
 371                 return
 372         }
 373         i := len(p.afe) - 1
 374         for n.Type != scopeMarkerNode && p.oe.index(n) == -1 {
 375                 if i == 0 {
 376                         i = -1
 377                         break
 378                 }
 379                 i--
 380                 n = p.afe[i]
 381         }
 382         for {
 383                 i++
 384                 clone := p.afe[i].clone()
 385                 p.addChild(clone)
 386                 p.afe[i] = clone
 387                 if i == len(p.afe)-1 {
 388                         break
 389                 }
 390         }
 391 }
 392
 393 // Section 12.2.4.
 394 func (p *parser) acknowledgeSelfClosingTag() {
 395         p.hasSelfClosingToken = false
 396 }
 397
 398 // An insertion mode (section 12.2.3.1) is the state transition function from
 399 // a particular state in the HTML5 parser's state machine. It updates the
 400 // parser's fields depending on parser.tok (where ErrorToken means EOF).
 401 // It returns whether the token was consumed.
 402 type insertionMode func(*parser) bool
 403
 404 // setOriginalIM sets the insertion mode to return to after completing a text or
 405 // inTableText insertion mode.
 406 // Section 12.2.3.1, "using the rules for".
 407 func (p *parser) setOriginalIM() {
 408         if p.originalIM != nil {
 409                 panic("html: bad parser state: originalIM was set twice")
 410         }
 411         p.originalIM = p.im
 412 }
 413
 414 // Section 12.2.3.1, "reset the insertion mode".
 415 func (p *parser) resetInsertionMode() {
 416         for i := len(p.oe) - 1; i >= 0; i-- {
 417                 n := p.oe[i]
 418                 if i == 0 && p.context != nil {
 419                         n = p.context
 420                 }
 421
 422                 switch n.DataAtom {
 423                 case a.Select:
 424                         p.im = inSelectIM
 425                 case a.Td, a.Th:
 426                         p.im = inCellIM
 427                 case a.Tr:
 428                         p.im = inRowIM
 429                 case a.Tbody, a.Thead, a.Tfoot:
 430                         p.im = inTableBodyIM
 431                 case a.Caption:
 432                         p.im = inCaptionIM
 433                 case a.Colgroup:
 434                         p.im = inColumnGroupIM
 435                 case a.Table:
 436                         p.im = inTableIM
 437                 case a.Head:
 438                         p.im = inBodyIM
 439                 case a.Body:
 440                         p.im = inBodyIM
 441                 case a.Frameset:
 442                         p.im = inFramesetIM
 443                 case a.Html:
 444                         p.im = beforeHeadIM
 445                 default:
 446                         continue
 447                 }
 448                 return
 449         }
 450         p.im = inBodyIM
 451 }
 452
 453 const whitespace = " \t\r\n\f"
 454
 455 // Section 12.2.5.4.1.
 456 func initialIM(p *parser) bool {
 457         switch p.tok.Type {
 458         case TextToken:
 459                 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 460                 if len(p.tok.Data) == 0 {
 461                         // It was all whitespace, so ignore it.
 462                         return true
 463                 }
 464         case CommentToken:
 465                 p.doc.AppendChild(&Node{
 466                         Type: CommentNode,
 467                         Data: p.tok.Data,
 468                 })
 469                 return true
 470         case DoctypeToken:
 471                 n, quirks := parseDoctype(p.tok.Data)
 472                 p.doc.AppendChild(n)
 473                 p.quirks = quirks
 474                 p.im = beforeHTMLIM
 475                 return true
 476         }
 477         p.quirks = true
 478         p.im = beforeHTMLIM
 479         return false
 480 }
 481
 482 // Section 12.2.5.4.2.
 483 func beforeHTMLIM(p *parser) bool {
 484         switch p.tok.Type {
 485         case DoctypeToken:
 486                 // Ignore the token.
 487                 return true
 488         case TextToken:
 489                 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 490                 if len(p.tok.Data) == 0 {
 491                         // It was all whitespace, so ignore it.
 492                         return true
 493                 }
 494         case StartTagToken:
 495                 if p.tok.DataAtom == a.Html {
 496                         p.addElement()
 497                         p.im = beforeHeadIM
 498                         return true
 499                 }
 500         case EndTagToken:
 501                 switch p.tok.DataAtom {
 502                 case a.Head, a.Body, a.Html, a.Br:
 503                         p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 504                         return false
 505                 default:
 506                         // Ignore the token.
 507                         return true
 508                 }
 509         case CommentToken:
 510                 p.doc.AppendChild(&Node{
 511                         Type: CommentNode,
 512                         Data: p.tok.Data,
 513                 })
 514                 return true
 515         }
 516         p.parseImpliedToken(StartTagToken, a.Html, a.Html.String())
 517         return false
 518 }
 519
 520 // Section 12.2.5.4.3.
 521 func beforeHeadIM(p *parser) bool {
 522         switch p.tok.Type {
 523         case TextToken:
 524                 p.tok.Data = strings.TrimLeft(p.tok.Data, whitespace)
 525                 if len(p.tok.Data) == 0 {
 526                         // It was all whitespace, so ignore it.
 527                         return true
 528                 }
 529         case StartTagToken:
 530                 switch p.tok.DataAtom {
 531                 case a.Head:
 532                         p.addElement()
 533                         p.head = p.top()
 534                         p.im = inHeadIM
 535                         return true
 536                 case a.Html:
 537                         return inBodyIM(p)
 538                 }
 539         case EndTagToken:
 540                 switch p.tok.DataAtom {
 541                 case a.Head, a.Body, a.Html, a.Br:
 542                         p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 543                         return false
 544                 default:
 545                         // Ignore the token.
 546                         return true
 547                 }
 548         case CommentToken:
 549                 p.addChild(&Node{
 550                         Type: CommentNode,
 551                         Data: p.tok.Data,
 552                 })
 553                 return true
 554         case DoctypeToken:
 555                 // Ignore the token.
 556                 return true
 557         }
 558
 559         p.parseImpliedToken(StartTagToken, a.Head, a.Head.String())
 560         return false
 561 }
 562
 563 // Section 12.2.5.4.4.
 564 func inHeadIM(p *parser) bool {
 565         switch p.tok.Type {
 566         case TextToken:
 567                 s := strings.TrimLeft(p.tok.Data, whitespace)
 568                 if len(s) < len(p.tok.Data) {
 569                         // Add the initial whitespace to the current node.
 570                         p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 571                         if s == "" {
 572                                 return true
 573                         }
 574                         p.tok.Data = s
 575                 }
 576         case StartTagToken:
 577                 switch p.tok.DataAtom {
 578                 case a.Html:
 579                         return inBodyIM(p)
 580                 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta:
 581                         p.addElement()
 582                         p.oe.pop()
 583                         p.acknowledgeSelfClosingTag()
 584                         return true
 585                 case a.Script, a.Title, a.Noscript, a.Noframes, a.Style:
 586                         p.addElement()
 587                         p.setOriginalIM()
 588                         p.im = textIM
 589                         return true
 590                 case a.Head:
 591                         // Ignore the token.
 592                         return true
 593                 }
 594         case EndTagToken:
 595                 switch p.tok.DataAtom {
 596                 case a.Head:
 597                         n := p.oe.pop()
 598                         if n.DataAtom != a.Head {
 599                                 panic("html: bad parser state: <head> element not found, in the in-head insertion mode")
 600                         }
 601                         p.im = afterHeadIM
 602                         return true
 603                 case a.Body, a.Html, a.Br:
 604                         p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 605                         return false
 606                 default:
 607                         // Ignore the token.
 608                         return true
 609                 }
 610         case CommentToken:
 611                 p.addChild(&Node{
 612                         Type: CommentNode,
 613                         Data: p.tok.Data,
 614                 })
 615                 return true
 616         case DoctypeToken:
 617                 // Ignore the token.
 618                 return true
 619         }
 620
 621         p.parseImpliedToken(EndTagToken, a.Head, a.Head.String())
 622         return false
 623 }
 624
 625 // Section 12.2.5.4.6.
 626 func afterHeadIM(p *parser) bool {
 627         switch p.tok.Type {
 628         case TextToken:
 629                 s := strings.TrimLeft(p.tok.Data, whitespace)
 630                 if len(s) < len(p.tok.Data) {
 631                         // Add the initial whitespace to the current node.
 632                         p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
 633                         if s == "" {
 634                                 return true
 635                         }
 636                         p.tok.Data = s
 637                 }
 638         case StartTagToken:
 639                 switch p.tok.DataAtom {
 640                 case a.Html:
 641                         return inBodyIM(p)
 642                 case a.Body:
 643                         p.addElement()
 644                         p.framesetOK = false
 645                         p.im = inBodyIM
 646                         return true
 647                 case a.Frameset:
 648                         p.addElement()
 649                         p.im = inFramesetIM
 650                         return true
 651                 case a.Base, a.Basefont, a.Bgsound, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
 652                         p.oe = append(p.oe, p.head)
 653                         defer p.oe.remove(p.head)
 654                         return inHeadIM(p)
 655                 case a.Head:
 656                         // Ignore the token.
 657                         return true
 658                 }
 659         case EndTagToken:
 660                 switch p.tok.DataAtom {
 661                 case a.Body, a.Html, a.Br:
 662                         // Drop down to creating an implied <body> tag.
 663                 default:
 664                         // Ignore the token.
 665                         return true
 666                 }
 667         case CommentToken:
 668                 p.addChild(&Node{
 669                         Type: CommentNode,
 670                         Data: p.tok.Data,
 671                 })
 672                 return true
 673         case DoctypeToken:
 674                 // Ignore the token.
 675                 return true
 676         }
 677
 678         p.parseImpliedToken(StartTagToken, a.Body, a.Body.String())
 679         p.framesetOK = true
 680         return false
 681 }
 682
 683 // copyAttributes copies attributes of src not found on dst to dst.
 684 func copyAttributes(dst *Node, src Token) {
 685         if len(src.Attr) == 0 {
 686                 return
 687         }
 688         attr := map[string]string{}
 689         for _, t := range dst.Attr {
 690                 attr[t.Key] = t.Val
 691         }
 692         for _, t := range src.Attr {
 693                 if _, ok := attr[t.Key]; !ok {
 694                         dst.Attr = append(dst.Attr, t)
 695                         attr[t.Key] = t.Val
 696                 }
 697         }
 698 }
 699
 700 // Section 12.2.5.4.7.
 701 func inBodyIM(p *parser) bool {
 702         switch p.tok.Type {
 703         case TextToken:
 704                 d := p.tok.Data
 705                 switch n := p.oe.top(); n.DataAtom {
 706                 case a.Pre, a.Listing:
 707                         if n.FirstChild == nil {
 708                                 // Ignore a newline at the start of a <pre> block.
 709                                 if d != "" && d[0] == '\r' {
 710                                         d = d[1:]
 711                                 }
 712                                 if d != "" && d[0] == '\n' {
 713                                         d = d[1:]
 714                                 }
 715                         }
 716                 }
 717                 d = strings.Replace(d, "\x00", "", -1)
 718                 if d == "" {
 719                         return true
 720                 }
 721                 p.reconstructActiveFormattingElements()
 722                 p.addText(d)
 723                 if p.framesetOK && strings.TrimLeft(d, whitespace) != "" {
 724                         // There were non-whitespace characters inserted.
 725                         p.framesetOK = false
 726                 }
 727         case StartTagToken:
 728                 switch p.tok.DataAtom {
 729                 case a.Html:
 730                         copyAttributes(p.oe[0], p.tok)
 731                 case a.Base, a.Basefont, a.Bgsound, a.Command, a.Link, a.Meta, a.Noframes, a.Script, a.Style, a.Title:
 732                         return inHeadIM(p)
 733                 case a.Body:
 734                         if len(p.oe) >= 2 {
 735                                 body := p.oe[1]
 736                                 if body.Type == ElementNode && body.DataAtom == a.Body {
 737                                         p.framesetOK = false
 738                                         copyAttributes(body, p.tok)
 739                                 }
 740                         }
 741                 case a.Frameset:
 742                         if !p.framesetOK || len(p.oe) < 2 || p.oe[1].DataAtom != a.Body {
 743                                 // Ignore the token.
 744                                 return true
 745                         }
 746                         body := p.oe[1]
 747                         if body.Parent != nil {
 748                                 body.Parent.RemoveChild(body)
 749                         }
 750                         p.oe = p.oe[:1]
 751                         p.addElement()
 752                         p.im = inFramesetIM
 753                         return true
 754                 case a.Address, a.Article, a.Aside, a.Blockquote, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Menu, a.Nav, a.Ol, a.P, a.Section, a.Summary, a.Ul:
 755                         p.popUntil(buttonScope, a.P)
 756                         p.addElement()
 757                 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 758                         p.popUntil(buttonScope, a.P)
 759                         switch n := p.top(); n.DataAtom {
 760                         case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
 761                                 p.oe.pop()
 762                         }
 763                         p.addElement()
 764                 case a.Pre, a.Listing:
 765                         p.popUntil(buttonScope, a.P)
 766                         p.addElement()
 767                         // The newline, if any, will be dealt with by the TextToken case.
 768                         p.framesetOK = false
 769                 case a.Form:
 770                         if p.form == nil {
 771                                 p.popUntil(buttonScope, a.P)
 772                                 p.addElement()
 773                                 p.form = p.top()
 774                         }
 775                 case a.Li:
 776                         p.framesetOK = false
 777                         for i := len(p.oe) - 1; i >= 0; i-- {
 778                                 node := p.oe[i]
 779                                 switch node.DataAtom {
 780                                 case a.Li:
 781                                         p.oe = p.oe[:i]
 782                                 case a.Address, a.Div, a.P:
 783                                         continue
 784                                 default:
 785                                         if !isSpecialElement(node) {
 786                                                 continue
 787                                         }
 788                                 }
 789                                 break
 790                         }
 791                         p.popUntil(buttonScope, a.P)
 792                         p.addElement()
 793                 case a.Dd, a.Dt:
 794                         p.framesetOK = false
 795                         for i := len(p.oe) - 1; i >= 0; i-- {
 796                                 node := p.oe[i]
 797                                 switch node.DataAtom {
 798                                 case a.Dd, a.Dt:
 799                                         p.oe = p.oe[:i]
 800                                 case a.Address, a.Div, a.P:
 801                                         continue
 802                                 default:
 803                                         if !isSpecialElement(node) {
 804                                                 continue
 805                                         }
 806                                 }
 807                                 break
 808                         }
 809                         p.popUntil(buttonScope, a.P)
 810                         p.addElement()
 811                 case a.Plaintext:
 812                         p.popUntil(buttonScope, a.P)
 813                         p.addElement()
 814                 case a.Button:
 815                         p.popUntil(defaultScope, a.Button)
 816                         p.reconstructActiveFormattingElements()
 817                         p.addElement()
 818                         p.framesetOK = false
 819                 case a.A:
 820                         for i := len(p.afe) - 1; i >= 0 && p.afe[i].Type != scopeMarkerNode; i-- {
 821                                 if n := p.afe[i]; n.Type == ElementNode && n.DataAtom == a.A {
 822                                         p.inBodyEndTagFormatting(a.A)
 823                                         p.oe.remove(n)
 824                                         p.afe.remove(n)
 825                                         break
 826                                 }
 827                         }
 828                         p.reconstructActiveFormattingElements()
 829                         p.addFormattingElement()
 830                 case a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
 831                         p.reconstructActiveFormattingElements()
 832                         p.addFormattingElement()
 833                 case a.Nobr:
 834                         p.reconstructActiveFormattingElements()
 835                         if p.elementInScope(defaultScope, a.Nobr) {
 836                                 p.inBodyEndTagFormatting(a.Nobr)
 837                                 p.reconstructActiveFormattingElements()
 838                         }
 839                         p.addFormattingElement()
 840                 case a.Applet, a.Marquee, a.Object:
 841                         p.reconstructActiveFormattingElements()
 842                         p.addElement()
 843                         p.afe = append(p.afe, &scopeMarker)
 844                         p.framesetOK = false
 845                 case a.Table:
 846                         if !p.quirks {
 847                                 p.popUntil(buttonScope, a.P)
 848                         }
 849                         p.addElement()
 850                         p.framesetOK = false
 851                         p.im = inTableIM
 852                         return true
 853                 case a.Area, a.Br, a.Embed, a.Img, a.Input, a.Keygen, a.Wbr:
 854                         p.reconstructActiveFormattingElements()
 855                         p.addElement()
 856                         p.oe.pop()
 857                         p.acknowledgeSelfClosingTag()
 858                         if p.tok.DataAtom == a.Input {
 859                                 for _, t := range p.tok.Attr {
 860                                         if t.Key == "type" {
 861                                                 if strings.ToLower(t.Val) == "hidden" {
 862                                                         // Skip setting framesetOK = false
 863                                                         return true
 864                                                 }
 865                                         }
 866                                 }
 867                         }
 868                         p.framesetOK = false
 869                 case a.Param, a.Source, a.Track:
 870                         p.addElement()
 871                         p.oe.pop()
 872                         p.acknowledgeSelfClosingTag()
 873                 case a.Hr:
 874                         p.popUntil(buttonScope, a.P)
 875                         p.addElement()
 876                         p.oe.pop()
 877                         p.acknowledgeSelfClosingTag()
 878                         p.framesetOK = false
 879                 case a.Image:
 880                         p.tok.DataAtom = a.Img
 881                         p.tok.Data = a.Img.String()
 882                         return false
 883                 case a.Isindex:
 884                         if p.form != nil {
 885                                 // Ignore the token.
 886                                 return true
 887                         }
 888                         action := ""
 889                         prompt := "This is a searchable index. Enter search keywords: "
 890                         attr := []Attribute{{Key: "name", Val: "isindex"}}
 891                         for _, t := range p.tok.Attr {
 892                                 switch t.Key {
 893                                 case "action":
 894                                         action = t.Val
 895                                 case "name":
 896                                         // Ignore the attribute.
 897                                 case "prompt":
 898                                         prompt = t.Val
 899                                 default:
 900                                         attr = append(attr, t)
 901                                 }
 902                         }
 903                         p.acknowledgeSelfClosingTag()
 904                         p.popUntil(buttonScope, a.P)
 905                         p.parseImpliedToken(StartTagToken, a.Form, a.Form.String())
 906                         if action != "" {
 907                                 p.form.Attr = []Attribute{{Key: "action", Val: action}}
 908                         }
 909                         p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
 910                         p.parseImpliedToken(StartTagToken, a.Label, a.Label.String())
 911                         p.addText(prompt)
 912                         p.addChild(&Node{
 913                                 Type:     ElementNode,
 914                                 DataAtom: a.Input,
 915                                 Data:     a.Input.String(),
 916                                 Attr:     attr,
 917                         })
 918                         p.oe.pop()
 919                         p.parseImpliedToken(EndTagToken, a.Label, a.Label.String())
 920                         p.parseImpliedToken(StartTagToken, a.Hr, a.Hr.String())
 921                         p.parseImpliedToken(EndTagToken, a.Form, a.Form.String())
 922                 case a.Textarea:
 923                         p.addElement()
 924                         p.setOriginalIM()
 925                         p.framesetOK = false
 926                         p.im = textIM
 927                 case a.Xmp:
 928                         p.popUntil(buttonScope, a.P)
 929                         p.reconstructActiveFormattingElements()
 930                         p.framesetOK = false
 931                         p.addElement()
 932                         p.setOriginalIM()
 933                         p.im = textIM
 934                 case a.Iframe:
 935                         p.framesetOK = false
 936                         p.addElement()
 937                         p.setOriginalIM()
 938                         p.im = textIM
 939                 case a.Noembed, a.Noscript:
 940                         p.addElement()
 941                         p.setOriginalIM()
 942                         p.im = textIM
 943                 case a.Select:
 944                         p.reconstructActiveFormattingElements()
 945                         p.addElement()
 946                         p.framesetOK = false
 947                         p.im = inSelectIM
 948                         return true
 949                 case a.Optgroup, a.Option:
 950                         if p.top().DataAtom == a.Option {
 951                                 p.oe.pop()
 952                         }
 953                         p.reconstructActiveFormattingElements()
 954                         p.addElement()
 955                 case a.Rp, a.Rt:
 956                         if p.elementInScope(defaultScope, a.Ruby) {
 957                                 p.generateImpliedEndTags()
 958                         }
 959                         p.addElement()
 960                 case a.Math, a.Svg:
 961                         p.reconstructActiveFormattingElements()
 962                         if p.tok.DataAtom == a.Math {
 963                                 adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
 964                         } else {
 965                                 adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
 966                         }
 967                         adjustForeignAttributes(p.tok.Attr)
 968                         p.addElement()
 969                         p.top().Namespace = p.tok.Data
 970                         if p.hasSelfClosingToken {
 971                                 p.oe.pop()
 972                                 p.acknowledgeSelfClosingTag()
 973                         }
 974                         return true
 975                 case a.Caption, a.Col, a.Colgroup, a.Frame, a.Head, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
 976                         // Ignore the token.
 977                 default:
 978                         p.reconstructActiveFormattingElements()
 979                         p.addElement()
 980                 }
 981         case EndTagToken:
 982                 switch p.tok.DataAtom {
 983                 case a.Body:
 984                         if p.elementInScope(defaultScope, a.Body) {
 985                                 p.im = afterBodyIM
 986                         }
 987                 case a.Html:
 988                         if p.elementInScope(defaultScope, a.Body) {
 989                                 p.parseImpliedToken(EndTagToken, a.Body, a.Body.String())
 990                                 return false
 991                         }
 992                         return true
 993                 case a.Address, a.Article, a.Aside, a.Blockquote, a.Button, a.Center, a.Details, a.Dir, a.Div, a.Dl, a.Fieldset, a.Figcaption, a.Figure, a.Footer, a.Header, a.Hgroup, a.Listing, a.Menu, a.Nav, a.Ol, a.Pre, a.Section, a.Summary, a.Ul:
 994                         p.popUntil(defaultScope, p.tok.DataAtom)
 995                 case a.Form:
 996                         node := p.form
 997                         p.form = nil
 998                         i := p.indexOfElementInScope(defaultScope, a.Form)
 999                         if node == nil || i == -1 || p.oe[i] != node {
1000                                 // Ignore the token.
1001                                 return true
1002                         }
1003                         p.generateImpliedEndTags()
1004                         p.oe.remove(node)
1005                 case a.P:
1006                         if !p.elementInScope(buttonScope, a.P) {
1007                                 p.parseImpliedToken(StartTagToken, a.P, a.P.String())
1008                         }
1009                         p.popUntil(buttonScope, a.P)
1010                 case a.Li:
1011                         p.popUntil(listItemScope, a.Li)
1012                 case a.Dd, a.Dt:
1013                         p.popUntil(defaultScope, p.tok.DataAtom)
1014                 case a.H1, a.H2, a.H3, a.H4, a.H5, a.H6:
1015                         p.popUntil(defaultScope, a.H1, a.H2, a.H3, a.H4, a.H5, a.H6)
1016                 case a.A, a.B, a.Big, a.Code, a.Em, a.Font, a.I, a.Nobr, a.S, a.Small, a.Strike, a.Strong, a.Tt, a.U:
1017                         p.inBodyEndTagFormatting(p.tok.DataAtom)
1018                 case a.Applet, a.Marquee, a.Object:
1019                         if p.popUntil(defaultScope, p.tok.DataAtom) {
1020                                 p.clearActiveFormattingElements()
1021                         }
1022                 case a.Br:
1023                         p.tok.Type = StartTagToken
1024                         return false
1025                 default:
1026                         p.inBodyEndTagOther(p.tok.DataAtom)
1027                 }
1028         case CommentToken:
1029                 p.addChild(&Node{
1030                         Type: CommentNode,
1031                         Data: p.tok.Data,
1032                 })
1033         }
1034
1035         return true
1036 }
1037
1038 func (p *parser) inBodyEndTagFormatting(tagAtom a.Atom) {
1039         // This is the "adoption agency" algorithm, described at
1040         // https://html.spec.whatwg.org/multipage/syntax.html#adoptionAgency
1041
1042         // TODO: this is a fairly literal line-by-line translation of that algorithm.
1043         // Once the code successfully parses the comprehensive test suite, we should
1044         // refactor this code to be more idiomatic.
1045
1046         // Steps 1-4. The outer loop.
1047         for i := 0; i < 8; i++ {
1048                 // Step 5. Find the formatting element.
1049                 var formattingElement *Node
1050                 for j := len(p.afe) - 1; j >= 0; j-- {
1051                         if p.afe[j].Type == scopeMarkerNode {
1052                                 break
1053                         }
1054                         if p.afe[j].DataAtom == tagAtom {
1055                                 formattingElement = p.afe[j]
1056                                 break
1057                         }
1058                 }
1059                 if formattingElement == nil {
1060                         p.inBodyEndTagOther(tagAtom)
1061                         return
1062                 }
1063                 feIndex := p.oe.index(formattingElement)
1064                 if feIndex == -1 {
1065                         p.afe.remove(formattingElement)
1066                         return
1067                 }
1068                 if !p.elementInScope(defaultScope, tagAtom) {
1069                         // Ignore the tag.
1070                         return
1071                 }
1072
1073                 // Steps 9-10. Find the furthest block.
1074                 var furthestBlock *Node
1075                 for _, e := range p.oe[feIndex:] {
1076                         if isSpecialElement(e) {
1077                                 furthestBlock = e
1078                                 break
1079                         }
1080                 }
1081                 if furthestBlock == nil {
1082                         e := p.oe.pop()
1083                         for e != formattingElement {
1084                                 e = p.oe.pop()
1085                         }
1086                         p.afe.remove(e)
1087                         return
1088                 }
1089
1090                 // Steps 11-12. Find the common ancestor and bookmark node.
1091                 commonAncestor := p.oe[feIndex-1]
1092                 bookmark := p.afe.index(formattingElement)
1093
1094                 // Step 13. The inner loop. Find the lastNode to reparent.
1095                 lastNode := furthestBlock
1096                 node := furthestBlock
1097                 x := p.oe.index(node)
1098                 // Steps 13.1-13.2
1099                 for j := 0; j < 3; j++ {
1100                         // Step 13.3.
1101                         x--
1102                         node = p.oe[x]
1103                         // Step 13.4 - 13.5.
1104                         if p.afe.index(node) == -1 {
1105                                 p.oe.remove(node)
1106                                 continue
1107                         }
1108                         // Step 13.6.
1109                         if node == formattingElement {
1110                                 break
1111                         }
1112                         // Step 13.7.
1113                         clone := node.clone()
1114                         p.afe[p.afe.index(node)] = clone
1115                         p.oe[p.oe.index(node)] = clone
1116                         node = clone
1117                         // Step 13.8.
1118                         if lastNode == furthestBlock {
1119                                 bookmark = p.afe.index(node) + 1
1120                         }
1121                         // Step 13.9.
1122                         if lastNode.Parent != nil {
1123                                 lastNode.Parent.RemoveChild(lastNode)
1124                         }
1125                         node.AppendChild(lastNode)
1126                         // Step 13.10.
1127                         lastNode = node
1128                 }
1129
1130                 // Step 14. Reparent lastNode to the common ancestor,
1131                 // or for misnested table nodes, to the foster parent.
1132                 if lastNode.Parent != nil {
1133                         lastNode.Parent.RemoveChild(lastNode)
1134                 }
1135                 switch commonAncestor.DataAtom {
1136                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1137                         p.fosterParent(lastNode)
1138                 default:
1139                         commonAncestor.AppendChild(lastNode)
1140                 }
1141
1142                 // Steps 15-17. Reparent nodes from the furthest block's children
1143                 // to a clone of the formatting element.
1144                 clone := formattingElement.clone()
1145                 reparentChildren(clone, furthestBlock)
1146                 furthestBlock.AppendChild(clone)
1147
1148                 // Step 18. Fix up the list of active formatting elements.
1149                 if oldLoc := p.afe.index(formattingElement); oldLoc != -1 && oldLoc < bookmark {
1150                         // Move the bookmark with the rest of the list.
1151                         bookmark--
1152                 }
1153                 p.afe.remove(formattingElement)
1154                 p.afe.insert(bookmark, clone)
1155
1156                 // Step 19. Fix up the stack of open elements.
1157                 p.oe.remove(formattingElement)
1158                 p.oe.insert(p.oe.index(furthestBlock)+1, clone)
1159         }
1160 }
1161
1162 // inBodyEndTagOther performs the "any other end tag" algorithm for inBodyIM.
1163 // "Any other end tag" handling from 12.2.5.5 The rules for parsing tokens in foreign content
1164 // https://html.spec.whatwg.org/multipage/syntax.html#parsing-main-inforeign
1165 func (p *parser) inBodyEndTagOther(tagAtom a.Atom) {
1166         for i := len(p.oe) - 1; i >= 0; i-- {
1167                 if p.oe[i].DataAtom == tagAtom {
1168                         p.oe = p.oe[:i]
1169                         break
1170                 }
1171                 if isSpecialElement(p.oe[i]) {
1172                         break
1173                 }
1174         }
1175 }
1176
1177 // Section 12.2.5.4.8.
1178 func textIM(p *parser) bool {
1179         switch p.tok.Type {
1180         case ErrorToken:
1181                 p.oe.pop()
1182         case TextToken:
1183                 d := p.tok.Data
1184                 if n := p.oe.top(); n.DataAtom == a.Textarea && n.FirstChild == nil {
1185                         // Ignore a newline at the start of a <textarea> block.
1186                         if d != "" && d[0] == '\r' {
1187                                 d = d[1:]
1188                         }
1189                         if d != "" && d[0] == '\n' {
1190                                 d = d[1:]
1191                         }
1192                 }
1193                 if d == "" {
1194                         return true
1195                 }
1196                 p.addText(d)
1197                 return true
1198         case EndTagToken:
1199                 p.oe.pop()
1200         }
1201         p.im = p.originalIM
1202         p.originalIM = nil
1203         return p.tok.Type == EndTagToken
1204 }
1205
1206 // Section 12.2.5.4.9.
1207 func inTableIM(p *parser) bool {
1208         switch p.tok.Type {
1209         case ErrorToken:
1210                 // Stop parsing.
1211                 return true
1212         case TextToken:
1213                 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "", -1)
1214                 switch p.oe.top().DataAtom {
1215                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1216                         if strings.Trim(p.tok.Data, whitespace) == "" {
1217                                 p.addText(p.tok.Data)
1218                                 return true
1219                         }
1220                 }
1221         case StartTagToken:
1222                 switch p.tok.DataAtom {
1223                 case a.Caption:
1224                         p.clearStackToContext(tableScope)
1225                         p.afe = append(p.afe, &scopeMarker)
1226                         p.addElement()
1227                         p.im = inCaptionIM
1228                         return true
1229                 case a.Colgroup:
1230                         p.clearStackToContext(tableScope)
1231                         p.addElement()
1232                         p.im = inColumnGroupIM
1233                         return true
1234                 case a.Col:
1235                         p.parseImpliedToken(StartTagToken, a.Colgroup, a.Colgroup.String())
1236                         return false
1237                 case a.Tbody, a.Tfoot, a.Thead:
1238                         p.clearStackToContext(tableScope)
1239                         p.addElement()
1240                         p.im = inTableBodyIM
1241                         return true
1242                 case a.Td, a.Th, a.Tr:
1243                         p.parseImpliedToken(StartTagToken, a.Tbody, a.Tbody.String())
1244                         return false
1245                 case a.Table:
1246                         if p.popUntil(tableScope, a.Table) {
1247                                 p.resetInsertionMode()
1248                                 return false
1249                         }
1250                         // Ignore the token.
1251                         return true
1252                 case a.Style, a.Script:
1253                         return inHeadIM(p)
1254                 case a.Input:
1255                         for _, t := range p.tok.Attr {
1256                                 if t.Key == "type" && strings.ToLower(t.Val) == "hidden" {
1257                                         p.addElement()
1258                                         p.oe.pop()
1259                                         return true
1260                                 }
1261                         }
1262                         // Otherwise drop down to the default action.
1263                 case a.Form:
1264                         if p.form != nil {
1265                                 // Ignore the token.
1266                                 return true
1267                         }
1268                         p.addElement()
1269                         p.form = p.oe.pop()
1270                 case a.Select:
1271                         p.reconstructActiveFormattingElements()
1272                         switch p.top().DataAtom {
1273                         case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1274                                 p.fosterParenting = true
1275                         }
1276                         p.addElement()
1277                         p.fosterParenting = false
1278                         p.framesetOK = false
1279                         p.im = inSelectInTableIM
1280                         return true
1281                 }
1282         case EndTagToken:
1283                 switch p.tok.DataAtom {
1284                 case a.Table:
1285                         if p.popUntil(tableScope, a.Table) {
1286                                 p.resetInsertionMode()
1287                                 return true
1288                         }
1289                         // Ignore the token.
1290                         return true
1291                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1292                         // Ignore the token.
1293                         return true
1294                 }
1295         case CommentToken:
1296                 p.addChild(&Node{
1297                         Type: CommentNode,
1298                         Data: p.tok.Data,
1299                 })
1300                 return true
1301         case DoctypeToken:
1302                 // Ignore the token.
1303                 return true
1304         }
1305
1306         p.fosterParenting = true
1307         defer func() { p.fosterParenting = false }()
1308
1309         return inBodyIM(p)
1310 }
1311
1312 // Section 12.2.5.4.11.
1313 func inCaptionIM(p *parser) bool {
1314         switch p.tok.Type {
1315         case StartTagToken:
1316                 switch p.tok.DataAtom {
1317                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Thead, a.Tr:
1318                         if p.popUntil(tableScope, a.Caption) {
1319                                 p.clearActiveFormattingElements()
1320                                 p.im = inTableIM
1321                                 return false
1322                         } else {
1323                                 // Ignore the token.
1324                                 return true
1325                         }
1326                 case a.Select:
1327                         p.reconstructActiveFormattingElements()
1328                         p.addElement()
1329                         p.framesetOK = false
1330                         p.im = inSelectInTableIM
1331                         return true
1332                 }
1333         case EndTagToken:
1334                 switch p.tok.DataAtom {
1335                 case a.Caption:
1336                         if p.popUntil(tableScope, a.Caption) {
1337                                 p.clearActiveFormattingElements()
1338                                 p.im = inTableIM
1339                         }
1340                         return true
1341                 case a.Table:
1342                         if p.popUntil(tableScope, a.Caption) {
1343                                 p.clearActiveFormattingElements()
1344                                 p.im = inTableIM
1345                                 return false
1346                         } else {
1347                                 // Ignore the token.
1348                                 return true
1349                         }
1350                 case a.Body, a.Col, a.Colgroup, a.Html, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1351                         // Ignore the token.
1352                         return true
1353                 }
1354         }
1355         return inBodyIM(p)
1356 }
1357
1358 // Section 12.2.5.4.12.
1359 func inColumnGroupIM(p *parser) bool {
1360         switch p.tok.Type {
1361         case TextToken:
1362                 s := strings.TrimLeft(p.tok.Data, whitespace)
1363                 if len(s) < len(p.tok.Data) {
1364                         // Add the initial whitespace to the current node.
1365                         p.addText(p.tok.Data[:len(p.tok.Data)-len(s)])
1366                         if s == "" {
1367                                 return true
1368                         }
1369                         p.tok.Data = s
1370                 }
1371         case CommentToken:
1372                 p.addChild(&Node{
1373                         Type: CommentNode,
1374                         Data: p.tok.Data,
1375                 })
1376                 return true
1377         case DoctypeToken:
1378                 // Ignore the token.
1379                 return true
1380         case StartTagToken:
1381                 switch p.tok.DataAtom {
1382                 case a.Html:
1383                         return inBodyIM(p)
1384                 case a.Col:
1385                         p.addElement()
1386                         p.oe.pop()
1387                         p.acknowledgeSelfClosingTag()
1388                         return true
1389                 }
1390         case EndTagToken:
1391                 switch p.tok.DataAtom {
1392                 case a.Colgroup:
1393                         if p.oe.top().DataAtom != a.Html {
1394                                 p.oe.pop()
1395                                 p.im = inTableIM
1396                         }
1397                         return true
1398                 case a.Col:
1399                         // Ignore the token.
1400                         return true
1401                 }
1402         }
1403         if p.oe.top().DataAtom != a.Html {
1404                 p.oe.pop()
1405                 p.im = inTableIM
1406                 return false
1407         }
1408         return true
1409 }
1410
1411 // Section 12.2.5.4.13.
1412 func inTableBodyIM(p *parser) bool {
1413         switch p.tok.Type {
1414         case StartTagToken:
1415                 switch p.tok.DataAtom {
1416                 case a.Tr:
1417                         p.clearStackToContext(tableBodyScope)
1418                         p.addElement()
1419                         p.im = inRowIM
1420                         return true
1421                 case a.Td, a.Th:
1422                         p.parseImpliedToken(StartTagToken, a.Tr, a.Tr.String())
1423                         return false
1424                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead:
1425                         if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1426                                 p.im = inTableIM
1427                                 return false
1428                         }
1429                         // Ignore the token.
1430                         return true
1431                 }
1432         case EndTagToken:
1433                 switch p.tok.DataAtom {
1434                 case a.Tbody, a.Tfoot, a.Thead:
1435                         if p.elementInScope(tableScope, p.tok.DataAtom) {
1436                                 p.clearStackToContext(tableBodyScope)
1437                                 p.oe.pop()
1438                                 p.im = inTableIM
1439                         }
1440                         return true
1441                 case a.Table:
1442                         if p.popUntil(tableScope, a.Tbody, a.Thead, a.Tfoot) {
1443                                 p.im = inTableIM
1444                                 return false
1445                         }
1446                         // Ignore the token.
1447                         return true
1448                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th, a.Tr:
1449                         // Ignore the token.
1450                         return true
1451                 }
1452         case CommentToken:
1453                 p.addChild(&Node{
1454                         Type: CommentNode,
1455                         Data: p.tok.Data,
1456                 })
1457                 return true
1458         }
1459
1460         return inTableIM(p)
1461 }
1462
1463 // Section 12.2.5.4.14.
1464 func inRowIM(p *parser) bool {
1465         switch p.tok.Type {
1466         case StartTagToken:
1467                 switch p.tok.DataAtom {
1468                 case a.Td, a.Th:
1469                         p.clearStackToContext(tableRowScope)
1470                         p.addElement()
1471                         p.afe = append(p.afe, &scopeMarker)
1472                         p.im = inCellIM
1473                         return true
1474                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1475                         if p.popUntil(tableScope, a.Tr) {
1476                                 p.im = inTableBodyIM
1477                                 return false
1478                         }
1479                         // Ignore the token.
1480                         return true
1481                 }
1482         case EndTagToken:
1483                 switch p.tok.DataAtom {
1484                 case a.Tr:
1485                         if p.popUntil(tableScope, a.Tr) {
1486                                 p.im = inTableBodyIM
1487                                 return true
1488                         }
1489                         // Ignore the token.
1490                         return true
1491                 case a.Table:
1492                         if p.popUntil(tableScope, a.Tr) {
1493                                 p.im = inTableBodyIM
1494                                 return false
1495                         }
1496                         // Ignore the token.
1497                         return true
1498                 case a.Tbody, a.Tfoot, a.Thead:
1499                         if p.elementInScope(tableScope, p.tok.DataAtom) {
1500                                 p.parseImpliedToken(EndTagToken, a.Tr, a.Tr.String())
1501                                 return false
1502                         }
1503                         // Ignore the token.
1504                         return true
1505                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html, a.Td, a.Th:
1506                         // Ignore the token.
1507                         return true
1508                 }
1509         }
1510
1511         return inTableIM(p)
1512 }
1513
1514 // Section 12.2.5.4.15.
1515 func inCellIM(p *parser) bool {
1516         switch p.tok.Type {
1517         case StartTagToken:
1518                 switch p.tok.DataAtom {
1519                 case a.Caption, a.Col, a.Colgroup, a.Tbody, a.Td, a.Tfoot, a.Th, a.Thead, a.Tr:
1520                         if p.popUntil(tableScope, a.Td, a.Th) {
1521                                 // Close the cell and reprocess.
1522                                 p.clearActiveFormattingElements()
1523                                 p.im = inRowIM
1524                                 return false
1525                         }
1526                         // Ignore the token.
1527                         return true
1528                 case a.Select:
1529                         p.reconstructActiveFormattingElements()
1530                         p.addElement()
1531                         p.framesetOK = false
1532                         p.im = inSelectInTableIM
1533                         return true
1534                 }
1535         case EndTagToken:
1536                 switch p.tok.DataAtom {
1537                 case a.Td, a.Th:
1538                         if !p.popUntil(tableScope, p.tok.DataAtom) {
1539                                 // Ignore the token.
1540                                 return true
1541                         }
1542                         p.clearActiveFormattingElements()
1543                         p.im = inRowIM
1544                         return true
1545                 case a.Body, a.Caption, a.Col, a.Colgroup, a.Html:
1546                         // Ignore the token.
1547                         return true
1548                 case a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr:
1549                         if !p.elementInScope(tableScope, p.tok.DataAtom) {
1550                                 // Ignore the token.
1551                                 return true
1552                         }
1553                         // Close the cell and reprocess.
1554                         p.popUntil(tableScope, a.Td, a.Th)
1555                         p.clearActiveFormattingElements()
1556                         p.im = inRowIM
1557                         return false
1558                 }
1559         }
1560         return inBodyIM(p)
1561 }
1562
1563 // Section 12.2.5.4.16.
1564 func inSelectIM(p *parser) bool {
1565         switch p.tok.Type {
1566         case ErrorToken:
1567                 // Stop parsing.
1568                 return true
1569         case TextToken:
1570                 p.addText(strings.Replace(p.tok.Data, "\x00", "", -1))
1571         case StartTagToken:
1572                 switch p.tok.DataAtom {
1573                 case a.Html:
1574                         return inBodyIM(p)
1575                 case a.Option:
1576                         if p.top().DataAtom == a.Option {
1577                                 p.oe.pop()
1578                         }
1579                         p.addElement()
1580                 case a.Optgroup:
1581                         if p.top().DataAtom == a.Option {
1582                                 p.oe.pop()
1583                         }
1584                         if p.top().DataAtom == a.Optgroup {
1585                                 p.oe.pop()
1586                         }
1587                         p.addElement()
1588                 case a.Select:
1589                         p.tok.Type = EndTagToken
1590                         return false
1591                 case a.Input, a.Keygen, a.Textarea:
1592                         if p.elementInScope(selectScope, a.Select) {
1593                                 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1594                                 return false
1595                         }
1596                         // In order to properly ignore <textarea>, we need to change the tokenizer mode.
1597                         p.tokenizer.NextIsNotRawText()
1598                         // Ignore the token.
1599                         return true
1600                 case a.Script:
1601                         return inHeadIM(p)
1602                 }
1603         case EndTagToken:
1604                 switch p.tok.DataAtom {
1605                 case a.Option:
1606                         if p.top().DataAtom == a.Option {
1607                                 p.oe.pop()
1608                         }
1609                 case a.Optgroup:
1610                         i := len(p.oe) - 1
1611                         if p.oe[i].DataAtom == a.Option {
1612                                 i--
1613                         }
1614                         if p.oe[i].DataAtom == a.Optgroup {
1615                                 p.oe = p.oe[:i]
1616                         }
1617                 case a.Select:
1618                         if p.popUntil(selectScope, a.Select) {
1619                                 p.resetInsertionMode()
1620                         }
1621                 }
1622         case CommentToken:
1623                 p.addChild(&Node{
1624                         Type: CommentNode,
1625                         Data: p.tok.Data,
1626                 })
1627         case DoctypeToken:
1628                 // Ignore the token.
1629                 return true
1630         }
1631
1632         return true
1633 }
1634
1635 // Section 12.2.5.4.17.
1636 func inSelectInTableIM(p *parser) bool {
1637         switch p.tok.Type {
1638         case StartTagToken, EndTagToken:
1639                 switch p.tok.DataAtom {
1640                 case a.Caption, a.Table, a.Tbody, a.Tfoot, a.Thead, a.Tr, a.Td, a.Th:
1641                         if p.tok.Type == StartTagToken || p.elementInScope(tableScope, p.tok.DataAtom) {
1642                                 p.parseImpliedToken(EndTagToken, a.Select, a.Select.String())
1643                                 return false
1644                         } else {
1645                                 // Ignore the token.
1646                                 return true
1647                         }
1648                 }
1649         }
1650         return inSelectIM(p)
1651 }
1652
1653 // Section 12.2.5.4.18.
1654 func afterBodyIM(p *parser) bool {
1655         switch p.tok.Type {
1656         case ErrorToken:
1657                 // Stop parsing.
1658                 return true
1659         case TextToken:
1660                 s := strings.TrimLeft(p.tok.Data, whitespace)
1661                 if len(s) == 0 {
1662                         // It was all whitespace.
1663                         return inBodyIM(p)
1664                 }
1665         case StartTagToken:
1666                 if p.tok.DataAtom == a.Html {
1667                         return inBodyIM(p)
1668                 }
1669         case EndTagToken:
1670                 if p.tok.DataAtom == a.Html {
1671                         if !p.fragment {
1672                                 p.im = afterAfterBodyIM
1673                         }
1674                         return true
1675                 }
1676         case CommentToken:
1677                 // The comment is attached to the <html> element.
1678                 if len(p.oe) < 1 || p.oe[0].DataAtom != a.Html {
1679                         panic("html: bad parser state: <html> element not found, in the after-body insertion mode")
1680                 }
1681                 p.oe[0].AppendChild(&Node{
1682                         Type: CommentNode,
1683                         Data: p.tok.Data,
1684                 })
1685                 return true
1686         }
1687         p.im = inBodyIM
1688         return false
1689 }
1690
1691 // Section 12.2.5.4.19.
1692 func inFramesetIM(p *parser) bool {
1693         switch p.tok.Type {
1694         case CommentToken:
1695                 p.addChild(&Node{
1696                         Type: CommentNode,
1697                         Data: p.tok.Data,
1698                 })
1699         case TextToken:
1700                 // Ignore all text but whitespace.
1701                 s := strings.Map(func(c rune) rune {
1702                         switch c {
1703                         case ' ', '\t', '\n', '\f', '\r':
1704                                 return c
1705                         }
1706                         return -1
1707                 }, p.tok.Data)
1708                 if s != "" {
1709                         p.addText(s)
1710                 }
1711         case StartTagToken:
1712                 switch p.tok.DataAtom {
1713                 case a.Html:
1714                         return inBodyIM(p)
1715                 case a.Frameset:
1716                         p.addElement()
1717                 case a.Frame:
1718                         p.addElement()
1719                         p.oe.pop()
1720                         p.acknowledgeSelfClosingTag()
1721                 case a.Noframes:
1722                         return inHeadIM(p)
1723                 }
1724         case EndTagToken:
1725                 switch p.tok.DataAtom {
1726                 case a.Frameset:
1727                         if p.oe.top().DataAtom != a.Html {
1728                                 p.oe.pop()
1729                                 if p.oe.top().DataAtom != a.Frameset {
1730                                         p.im = afterFramesetIM
1731                                         return true
1732                                 }
1733                         }
1734                 }
1735         default:
1736                 // Ignore the token.
1737         }
1738         return true
1739 }
1740
1741 // Section 12.2.5.4.20.
1742 func afterFramesetIM(p *parser) bool {
1743         switch p.tok.Type {
1744         case CommentToken:
1745                 p.addChild(&Node{
1746                         Type: CommentNode,
1747                         Data: p.tok.Data,
1748                 })
1749         case TextToken:
1750                 // Ignore all text but whitespace.
1751                 s := strings.Map(func(c rune) rune {
1752                         switch c {
1753                         case ' ', '\t', '\n', '\f', '\r':
1754                                 return c
1755                         }
1756                         return -1
1757                 }, p.tok.Data)
1758                 if s != "" {
1759                         p.addText(s)
1760                 }
1761         case StartTagToken:
1762                 switch p.tok.DataAtom {
1763                 case a.Html:
1764                         return inBodyIM(p)
1765                 case a.Noframes:
1766                         return inHeadIM(p)
1767                 }
1768         case EndTagToken:
1769                 switch p.tok.DataAtom {
1770                 case a.Html:
1771                         p.im = afterAfterFramesetIM
1772                         return true
1773                 }
1774         default:
1775                 // Ignore the token.
1776         }
1777         return true
1778 }
1779
1780 // Section 12.2.5.4.21.
1781 func afterAfterBodyIM(p *parser) bool {
1782         switch p.tok.Type {
1783         case ErrorToken:
1784                 // Stop parsing.
1785                 return true
1786         case TextToken:
1787                 s := strings.TrimLeft(p.tok.Data, whitespace)
1788                 if len(s) == 0 {
1789                         // It was all whitespace.
1790                         return inBodyIM(p)
1791                 }
1792         case StartTagToken:
1793                 if p.tok.DataAtom == a.Html {
1794                         return inBodyIM(p)
1795                 }
1796         case CommentToken:
1797                 p.doc.AppendChild(&Node{
1798                         Type: CommentNode,
1799                         Data: p.tok.Data,
1800                 })
1801                 return true
1802         case DoctypeToken:
1803                 return inBodyIM(p)
1804         }
1805         p.im = inBodyIM
1806         return false
1807 }
1808
1809 // Section 12.2.5.4.22.
1810 func afterAfterFramesetIM(p *parser) bool {
1811         switch p.tok.Type {
1812         case CommentToken:
1813                 p.doc.AppendChild(&Node{
1814                         Type: CommentNode,
1815                         Data: p.tok.Data,
1816                 })
1817         case TextToken:
1818                 // Ignore all text but whitespace.
1819                 s := strings.Map(func(c rune) rune {
1820                         switch c {
1821                         case ' ', '\t', '\n', '\f', '\r':
1822                                 return c
1823                         }
1824                         return -1
1825                 }, p.tok.Data)
1826                 if s != "" {
1827                         p.tok.Data = s
1828                         return inBodyIM(p)
1829                 }
1830         case StartTagToken:
1831                 switch p.tok.DataAtom {
1832                 case a.Html:
1833                         return inBodyIM(p)
1834                 case a.Noframes:
1835                         return inHeadIM(p)
1836                 }
1837         case DoctypeToken:
1838                 return inBodyIM(p)
1839         default:
1840                 // Ignore the token.
1841         }
1842         return true
1843 }
1844
1845 const whitespaceOrNUL = whitespace + "\x00"
1846
1847 // Section 12.2.5.5.
1848 func parseForeignContent(p *parser) bool {
1849         switch p.tok.Type {
1850         case TextToken:
1851                 if p.framesetOK {
1852                         p.framesetOK = strings.TrimLeft(p.tok.Data, whitespaceOrNUL) == ""
1853                 }
1854                 p.tok.Data = strings.Replace(p.tok.Data, "\x00", "\ufffd", -1)
1855                 p.addText(p.tok.Data)
1856         case CommentToken:
1857                 p.addChild(&Node{
1858                         Type: CommentNode,
1859                         Data: p.tok.Data,
1860                 })
1861         case StartTagToken:
1862                 b := breakout[p.tok.Data]
1863                 if p.tok.DataAtom == a.Font {
1864                 loop:
1865                         for _, attr := range p.tok.Attr {
1866                                 switch attr.Key {
1867                                 case "color", "face", "size":
1868                                         b = true
1869                                         break loop
1870                                 }
1871                         }
1872                 }
1873                 if b {
1874                         for i := len(p.oe) - 1; i >= 0; i-- {
1875                                 n := p.oe[i]
1876                                 if n.Namespace == "" || htmlIntegrationPoint(n) || mathMLTextIntegrationPoint(n) {
1877                                         p.oe = p.oe[:i+1]
1878                                         break
1879                                 }
1880                         }
1881                         return false
1882                 }
1883                 switch p.top().Namespace {
1884                 case "math":
1885                         adjustAttributeNames(p.tok.Attr, mathMLAttributeAdjustments)
1886                 case "svg":
1887                         // Adjust SVG tag names. The tokenizer lower-cases tag names, but
1888                         // SVG wants e.g. "foreignObject" with a capital second "O".
1889                         if x := svgTagNameAdjustments[p.tok.Data]; x != "" {
1890                                 p.tok.DataAtom = a.Lookup([]byte(x))
1891                                 p.tok.Data = x
1892                         }
1893                         adjustAttributeNames(p.tok.Attr, svgAttributeAdjustments)
1894                 default:
1895                         panic("html: bad parser state: unexpected namespace")
1896                 }
1897                 adjustForeignAttributes(p.tok.Attr)
1898                 namespace := p.top().Namespace
1899                 p.addElement()
1900                 p.top().Namespace = namespace
1901                 if namespace != "" {
1902                         // Don't let the tokenizer go into raw text mode in foreign content
1903                         // (e.g. in an SVG <title> tag).
1904                         p.tokenizer.NextIsNotRawText()
1905                 }
1906                 if p.hasSelfClosingToken {
1907                         p.oe.pop()
1908                         p.acknowledgeSelfClosingTag()
1909                 }
1910         case EndTagToken:
1911                 for i := len(p.oe) - 1; i >= 0; i-- {
1912                         if p.oe[i].Namespace == "" {
1913                                 return p.im(p)
1914                         }
1915                         if strings.EqualFold(p.oe[i].Data, p.tok.Data) {
1916                                 p.oe = p.oe[:i]
1917                                 break
1918                         }
1919                 }
1920                 return true
1921         default:
1922                 // Ignore the token.
1923         }
1924         return true
1925 }
1926
1927 // Section 12.2.5.
1928 func (p *parser) inForeignContent() bool {
1929         if len(p.oe) == 0 {
1930                 return false
1931         }
1932         n := p.oe[len(p.oe)-1]
1933         if n.Namespace == "" {
1934                 return false
1935         }
1936         if mathMLTextIntegrationPoint(n) {
1937                 if p.tok.Type == StartTagToken && p.tok.DataAtom != a.Mglyph && p.tok.DataAtom != a.Malignmark {
1938                         return false
1939                 }
1940                 if p.tok.Type == TextToken {
1941                         return false
1942                 }
1943         }
1944         if n.Namespace == "math" && n.DataAtom == a.AnnotationXml && p.tok.Type == StartTagToken && p.tok.DataAtom == a.Svg {
1945                 return false
1946         }
1947         if htmlIntegrationPoint(n) && (p.tok.Type == StartTagToken || p.tok.Type == TextToken) {
1948                 return false
1949         }
1950         if p.tok.Type == ErrorToken {
1951                 return false
1952         }
1953         return true
1954 }
1955
1956 // parseImpliedToken parses a token as though it had appeared in the parser's
1957 // input.
1958 func (p *parser) parseImpliedToken(t TokenType, dataAtom a.Atom, data string) {
1959         realToken, selfClosing := p.tok, p.hasSelfClosingToken
1960         p.tok = Token{
1961                 Type:     t,
1962                 DataAtom: dataAtom,
1963                 Data:     data,
1964         }
1965         p.hasSelfClosingToken = false
1966         p.parseCurrentToken()
1967         p.tok, p.hasSelfClosingToken = realToken, selfClosing
1968 }
1969
1970 // parseCurrentToken runs the current token through the parsing routines
1971 // until it is consumed.
1972 func (p *parser) parseCurrentToken() {
1973         if p.tok.Type == SelfClosingTagToken {
1974                 p.hasSelfClosingToken = true
1975                 p.tok.Type = StartTagToken
1976         }
1977
1978         consumed := false
1979         for !consumed {
1980                 if p.inForeignContent() {
1981                         consumed = parseForeignContent(p)
1982                 } else {
1983                         consumed = p.im(p)
1984                 }
1985         }
1986
1987         if p.hasSelfClosingToken {
1988                 // This is a parse error, but ignore it.
1989                 p.hasSelfClosingToken = false
1990         }
1991 }
1992
1993 func (p *parser) parse() error {
1994         // Iterate until EOF. Any other error will cause an early return.
1995         var err error
1996         for err != io.EOF {
1997                 // CDATA sections are allowed only in foreign content.
1998                 n := p.oe.top()
1999                 p.tokenizer.AllowCDATA(n != nil && n.Namespace != "")
2000                 // Read and parse the next token.
2001                 p.tokenizer.Next()
2002                 p.tok = p.tokenizer.Token()
2003                 if p.tok.Type == ErrorToken {
2004                         err = p.tokenizer.Err()
2005                         if err != nil && err != io.EOF {
2006                                 return err
2007                         }
2008                 }
2009                 p.parseCurrentToken()
2010         }
2011         return nil
2012 }
2013
2014 // Parse returns the parse tree for the HTML from the given Reader.
2015 // The input is assumed to be UTF-8 encoded.
2016 func Parse(r io.Reader) (*Node, error) {
2017         p := &parser{
2018                 tokenizer: NewTokenizer(r),
2019                 doc: &Node{
2020                         Type: DocumentNode,
2021                 },
2022                 scripting:  true,
2023                 framesetOK: true,
2024                 im:         initialIM,
2025         }
2026         err := p.parse()
2027         if err != nil {
2028                 return nil, err
2029         }
2030         return p.doc, nil
2031 }
2032
2033 // ParseFragment parses a fragment of HTML and returns the nodes that were
2034 // found. If the fragment is the InnerHTML for an existing element, pass that
2035 // element in context.
2036 func ParseFragment(r io.Reader, context *Node) ([]*Node, error) {
2037         contextTag := ""
2038         if context != nil {
2039                 if context.Type != ElementNode {
2040                         return nil, errors.New("html: ParseFragment of non-element Node")
2041                 }
2042                 // The next check isn't just context.DataAtom.String() == context.Data because
2043                 // it is valid to pass an element whose tag isn't a known atom. For example,
2044                 // DataAtom == 0 and Data = "tagfromthefuture" is perfectly consistent.
2045                 if context.DataAtom != a.Lookup([]byte(context.Data)) {
2046                         return nil, fmt.Errorf("html: inconsistent Node: DataAtom=%q, Data=%q", context.DataAtom, context.Data)
2047                 }
2048                 contextTag = context.DataAtom.String()
2049         }
2050         p := &parser{
2051                 tokenizer: NewTokenizerFragment(r, contextTag),
2052                 doc: &Node{
2053                         Type: DocumentNode,
2054                 },
2055                 scripting: true,
2056                 fragment:  true,
2057                 context:   context,
2058         }
2059
2060         root := &Node{
2061                 Type:     ElementNode,
2062                 DataAtom: a.Html,
2063                 Data:     a.Html.String(),
2064         }
2065         p.doc.AppendChild(root)
2066         p.oe = nodeStack{root}
2067         p.resetInsertionMode()
2068
2069         for n := context; n != nil; n = n.Parent {
2070                 if n.Type == ElementNode && n.DataAtom == a.Form {
2071                         p.form = n
2072                         break
2073                 }
2074         }
2075
2076         err := p.parse()
2077         if err != nil {
2078                 return nil, err
2079         }
2080
2081         parent := p.doc
2082         if context != nil {
2083                 parent = root
2084         }
2085
2086         var result []*Node
2087         for c := parent.FirstChild; c != nil; {
2088                 next := c.NextSibling
2089                 parent.RemoveChild(c)
2090                 result = append(result, c)
2091                 c = next
2092         }
2093         return result, nil
2094 }