5 Copyright 2007 Jeroen van der Meer <http://jero.net/>
6 Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
29 // Tags for FIX ME!!!: (in order of priority)
30 // XXX - should be fixed NAO!
31 // XERROR - with regards to parse errors
32 // XSCRIPT - with regards to scripting mode
33 // XENCODING - with regards to encoding (for reparsing tests)
34 // XDOM - DOM specific code (tagName is explicitly not marked).
35 // this is not (yet) in helper functions.
37 class HTML5_TreeBuilder
{
38 public $stack = array();
39 public $content_model;
42 private $original_mode;
43 private $secondary_mode;
45 // Whether or not normal insertion of nodes should actually foster
46 // parent (used in one case in spec)
47 private $foster_parent = false;
48 private $a_formatting = array();
50 private $head_pointer = null;
51 private $form_pointer = null;
53 private $flag_frameset_ok = true;
54 private $flag_force_quirks = false;
55 private $ignored = false;
56 private $quirks_mode = null;
57 // this gets to 2 when we want to ignore the next lf character, and
58 // is decrement at the beginning of each processed token (this way,
59 // code can check for (bool)$ignore_lf_token, but it phases out
61 private $ignore_lf_token = 0;
62 private $fragment = false;
65 private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
66 private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
67 // dl and ds are speculative
68 private $special = array('address','area','article','aside','base','basefont','bgsound',
69 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
70 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
71 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
72 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
73 'p','param','plaintext','pre','script','select','spacer','style',
74 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
76 private $pendingTableCharacters;
77 private $pendingTableCharactersDirty;
79 // Tree construction modes
81 const BEFORE_HTML
= 1;
82 const BEFORE_HEAD
= 2;
84 const IN_HEAD_NOSCRIPT
= 4;
87 const IN_CDATA_RCDATA
= 7;
89 const IN_TABLE_TEXT
= 9;
90 const IN_CAPTION
= 10;
91 const IN_COLUMN_GROUP
= 11;
92 const IN_TABLE_BODY
= 12;
96 const IN_SELECT_IN_TABLE
= 16;
97 const IN_FOREIGN_CONTENT
= 17;
98 const AFTER_BODY
= 18;
99 const IN_FRAMESET
= 19;
100 const AFTER_FRAMESET
= 20;
101 const AFTER_AFTER_BODY
= 21;
102 const AFTER_AFTER_FRAMESET
= 22;
105 * Converts a magic number to a readable name. Use for debugging.
107 private function strConst($number) {
111 $r = new ReflectionClass('HTML5_TreeBuilder');
112 $consts = $r->getConstants();
113 foreach ($consts as $const => $num) {
114 if (!is_int($num)) continue;
115 $lookup[$num] = $const;
118 return $lookup[$number];
121 // The different types of elements.
124 const FORMATTING
= 102;
125 const PHRASING
= 103;
127 // Quirks modes in $quirks_mode
128 const NO_QUIRKS
= 200;
129 const QUIRKS_MODE
= 201;
130 const LIMITED_QUIRKS_MODE
= 202;
132 // Marker to be placed in $a_formatting
135 // Namespaces for foreign content
136 const NS_HTML
= null; // to prevent DOM from requiring NS on everything
137 const NS_XHTML
= 'http://www.w3.org/1999/xhtml';
138 const NS_MATHML
= 'http://www.w3.org/1998/Math/MathML';
139 const NS_SVG
= 'http://www.w3.org/2000/svg';
140 const NS_XLINK
= 'http://www.w3.org/1999/xlink';
141 const NS_XML
= 'http://www.w3.org/XML/1998/namespace';
142 const NS_XMLNS
= 'http://www.w3.org/2000/xmlns/';
144 // Different types of scopes to test for elements
146 const SCOPE_LISTITEM
= 1;
147 const SCOPE_TABLE
= 2;
149 public function __construct() {
150 $this->mode
= self
::INITIAL
;
151 $this->dom
= new DOMDocument
;
153 $this->dom
->encoding
= 'UTF-8';
154 $this->dom
->preserveWhiteSpace
= true;
155 $this->dom
->substituteEntities
= true;
156 $this->dom
->strictErrorChecking
= false;
159 // Process tag tokens
160 public function emitToken($token, $mode = null) {
161 // XXX: ignore parse errors... why are we emitting them, again?
162 if ($token['type'] === HTML5_Tokenizer
::PARSEERROR
) return;
163 if ($mode === null) $mode = $this->mode
;
166 $backtrace = debug_backtrace();
167 if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
168 echo $this->strConst($mode);
169 if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
173 $this->printActiveFormattingElements();
174 if ($this->foster_parent) echo " -> this is a foster parent mode\n";
175 if ($this->flag_frameset_ok) echo " -> frameset ok\n";
178 if ($this->ignore_lf_token
) $this->ignore_lf_token
--;
179 $this->ignored
= false;
180 // indenting is a little wonky, this can be changed later on
185 /* A character token that is one of U+0009 CHARACTER TABULATION,
186 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
187 if ($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
188 /* Ignore the token. */
189 $this->ignored
= true;
190 } elseif ($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
192 $token['name'] !== 'html' || !empty($token['public']) ||
193 !empty($token['system']) || $token !== 'about:legacy-compat'
195 /* If the DOCTYPE token's name is not a case-sensitive match
196 * for the string "html", or if the token's public identifier
197 * is not missing, or if the token's system identifier is
198 * neither missing nor a case-sensitive match for the string
199 * "about:legacy-compat", then there is a parse error (this
200 * is the DOCTYPE parse error). */
201 // DOCTYPE parse error
203 /* Append a DocumentType node to the Document node, with the name
204 * attribute set to the name given in the DOCTYPE token, or the
205 * empty string if the name was missing; the publicId attribute
206 * set to the public identifier given in the DOCTYPE token, or
207 * the empty string if the public identifier was missing; the
208 * systemId attribute set to the system identifier given in the
209 * DOCTYPE token, or the empty string if the system identifier
210 * was missing; and the other attributes specific to
211 * DocumentType objects set to null and empty lists as
212 * appropriate. Associate the DocumentType node with the
213 * Document object so that it is returned as the value of the
214 * doctype attribute of the Document object. */
215 if (!isset($token['public'])) $token['public'] = null;
216 if (!isset($token['system'])) $token['system'] = null;
218 // Yes this is hacky. I'm kind of annoyed that I can't appendChild
219 // a doctype to DOMDocument. Maybe I haven't chanted the right
221 $impl = new DOMImplementation();
222 // This call can fail for particularly pathological cases (namely,
223 // the qualifiedName parameter ($token['name']) could be missing.
224 if ($token['name']) {
225 $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
226 $this->dom
->appendChild($doctype);
228 // It looks like libxml's not actually *able* to express this case.
230 $this->dom
->emptyDoctype
= true;
232 $public = is_null($token['public']) ? false : strtolower($token['public']);
233 $system = is_null($token['system']) ? false : strtolower($token['system']);
234 $publicStartsWithForQuirks = array(
235 "+//silmaril//dtd html pro v0r11 19970101//",
236 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
237 "-//as//dtd html 3.0 aswedit + extensions//",
238 "-//ietf//dtd html 2.0 level 1//",
239 "-//ietf//dtd html 2.0 level 2//",
240 "-//ietf//dtd html 2.0 strict level 1//",
241 "-//ietf//dtd html 2.0 strict level 2//",
242 "-//ietf//dtd html 2.0 strict//",
243 "-//ietf//dtd html 2.0//",
244 "-//ietf//dtd html 2.1e//",
245 "-//ietf//dtd html 3.0//",
246 "-//ietf//dtd html 3.2 final//",
247 "-//ietf//dtd html 3.2//",
248 "-//ietf//dtd html 3//",
249 "-//ietf//dtd html level 0//",
250 "-//ietf//dtd html level 1//",
251 "-//ietf//dtd html level 2//",
252 "-//ietf//dtd html level 3//",
253 "-//ietf//dtd html strict level 0//",
254 "-//ietf//dtd html strict level 1//",
255 "-//ietf//dtd html strict level 2//",
256 "-//ietf//dtd html strict level 3//",
257 "-//ietf//dtd html strict//",
258 "-//ietf//dtd html//",
259 "-//metrius//dtd metrius presentational//",
260 "-//microsoft//dtd internet explorer 2.0 html strict//",
261 "-//microsoft//dtd internet explorer 2.0 html//",
262 "-//microsoft//dtd internet explorer 2.0 tables//",
263 "-//microsoft//dtd internet explorer 3.0 html strict//",
264 "-//microsoft//dtd internet explorer 3.0 html//",
265 "-//microsoft//dtd internet explorer 3.0 tables//",
266 "-//netscape comm. corp.//dtd html//",
267 "-//netscape comm. corp.//dtd strict html//",
268 "-//o'reilly and associates//dtd html 2.0//",
269 "-//o'reilly and associates//dtd html extended 1.0//",
270 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
271 "-//spyglass//dtd html 2.0 extended//",
272 "-//sq//dtd html 2.0 hotmetal + extensions//",
273 "-//sun microsystems corp.//dtd hotjava html//",
274 "-//sun microsystems corp.//dtd hotjava strict html//",
275 "-//w3c//dtd html 3 1995-03-24//",
276 "-//w3c//dtd html 3.2 draft//",
277 "-//w3c//dtd html 3.2 final//",
278 "-//w3c//dtd html 3.2//",
279 "-//w3c//dtd html 3.2s draft//",
280 "-//w3c//dtd html 4.0 frameset//",
281 "-//w3c//dtd html 4.0 transitional//",
282 "-//w3c//dtd html experimental 19960712//",
283 "-//w3c//dtd html experimental 970421//",
284 "-//w3c//dtd w3 html//",
285 "-//w3o//dtd w3 html 3.0//",
286 "-//webtechs//dtd mozilla html 2.0//",
287 "-//webtechs//dtd mozilla html//",
289 $publicSetToForQuirks = array(
290 "-//w3o//dtd w3 html strict 3.0//",
291 "-/w3c/dtd html 4.0 transitional/en",
294 $publicStartsWithAndSystemForQuirks = array(
295 "-//w3c//dtd html 4.01 frameset//",
296 "-//w3c//dtd html 4.01 transitional//",
298 $publicStartsWithForLimitedQuirks = array(
299 "-//w3c//dtd xhtml 1.0 frameset//",
300 "-//w3c//dtd xhtml 1.0 transitional//",
302 $publicStartsWithAndSystemForLimitedQuirks = array(
303 "-//w3c//dtd html 4.01 frameset//",
304 "-//w3c//dtd html 4.01 transitional//",
306 // first, do easy checks
308 !empty($token['force-quirks']) ||
309 strtolower($token['name']) !== 'html'
311 $this->quirks_mode
= self
::QUIRKS_MODE
;
315 foreach ($publicStartsWithAndSystemForQuirks as $x) {
316 if (strncmp($public, $x, strlen($x)) === 0) {
317 $this->quirks_mode
= self
::QUIRKS_MODE
;
321 if (!is_null($this->quirks_mode
)) break;
322 foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
323 if (strncmp($public, $x, strlen($x)) === 0) {
324 $this->quirks_mode
= self
::LIMITED_QUIRKS_MODE
;
328 if (!is_null($this->quirks_mode
)) break;
330 foreach ($publicSetToForQuirks as $x) {
331 if ($public === $x) {
332 $this->quirks_mode
= self
::QUIRKS_MODE
;
336 if (!is_null($this->quirks_mode
)) break;
337 foreach ($publicStartsWithForLimitedQuirks as $x) {
338 if (strncmp($public, $x, strlen($x)) === 0) {
339 $this->quirks_mode
= self
::LIMITED_QUIRKS_MODE
;
342 if (!is_null($this->quirks_mode
)) break;
343 if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
344 $this->quirks_mode
= self
::QUIRKS_MODE
;
347 foreach ($publicStartsWithForQuirks as $x) {
348 if (strncmp($public, $x, strlen($x)) === 0) {
349 $this->quirks_mode
= self
::QUIRKS_MODE
;
353 if (is_null($this->quirks_mode
)) {
354 $this->quirks_mode
= self
::NO_QUIRKS
;
358 $this->mode
= self
::BEFORE_HTML
;
361 /* Switch the insertion mode to "before html", then reprocess the
363 $this->mode
= self
::BEFORE_HTML
;
364 $this->quirks_mode
= self
::QUIRKS_MODE
;
365 $this->emitToken($token);
369 case self
::BEFORE_HTML
:
371 /* A DOCTYPE token */
372 if($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
373 // Parse error. Ignore the token.
374 $this->ignored
= true;
376 /* A comment token */
377 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
378 /* Append a Comment node to the Document object with the data
379 attribute set to the data given in the comment token. */
381 $comment = $this->dom
->createComment($token['data']);
382 $this->dom
->appendChild($comment);
384 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
385 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
387 } elseif($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
388 /* Ignore the token. */
389 $this->ignored
= true;
391 /* A start tag whose tag name is "html" */
392 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] == 'html') {
393 /* Create an element for the token in the HTML namespace. Append it
394 * to the Document object. Put this element in the stack of open
397 $html = $this->insertElement($token, false);
398 $this->dom
->appendChild($html);
399 $this->stack
[] = $html;
401 $this->mode
= self
::BEFORE_HEAD
;
404 /* Create an html element. Append it to the Document object. Put
405 * this element in the stack of open elements. */
407 $html = $this->dom
->createElementNS(self
::NS_HTML
, 'html');
408 $this->dom
->appendChild($html);
409 $this->stack
[] = $html;
411 /* Switch the insertion mode to "before head", then reprocess the
413 $this->mode
= self
::BEFORE_HEAD
;
414 $this->emitToken($token);
418 case self
::BEFORE_HEAD
:
420 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
421 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
423 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
424 /* Ignore the token. */
425 $this->ignored
= true;
427 /* A comment token */
428 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
429 /* Append a Comment node to the current node with the data attribute
430 set to the data given in the comment token. */
431 $this->insertComment($token['data']);
433 /* A DOCTYPE token */
434 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
435 /* Parse error. Ignore the token */
436 $this->ignored
= true;
439 /* A start tag token with the tag name "html" */
440 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
441 /* Process the token using the rules for the "in body"
443 $this->processWithRulesFor($token, self
::IN_BODY
);
445 /* A start tag token with the tag name "head" */
446 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'head') {
447 /* Insert an HTML element for the token. */
448 $element = $this->insertElement($token);
450 /* Set the head element pointer to this new element node. */
451 $this->head_pointer
= $element;
453 /* Change the insertion mode to "in head". */
454 $this->mode
= self
::IN_HEAD
;
456 /* An end tag whose tag name is one of: "head", "body", "html", "br" */
458 $token['type'] === HTML5_Tokenizer
::ENDTAG
&& (
459 $token['name'] === 'head' || $token['name'] === 'body' ||
460 $token['name'] === 'html' || $token['name'] === 'br'
462 /* Act as if a start tag token with the tag name "head" and no
463 * attributes had been seen, then reprocess the current token. */
464 $this->emitToken(array(
466 'type' => HTML5_Tokenizer
::STARTTAG
,
469 $this->emitToken($token);
471 /* Any other end tag */
472 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
) {
473 /* Parse error. Ignore the token. */
474 $this->ignored
= true;
477 /* Act as if a start tag token with the tag name "head" and no
478 * attributes had been seen, then reprocess the current token.
479 * Note: This will result in an empty head element being
480 * generated, with the current token being reprocessed in the
481 * "after head" insertion mode. */
482 $this->emitToken(array(
484 'type' => HTML5_Tokenizer
::STARTTAG
,
487 $this->emitToken($token);
493 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
494 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
496 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
497 /* Insert the character into the current node. */
498 $this->insertText($token['data']);
500 /* A comment token */
501 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
502 /* Append a Comment node to the current node with the data attribute
503 set to the data given in the comment token. */
504 $this->insertComment($token['data']);
506 /* A DOCTYPE token */
507 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
508 /* Parse error. Ignore the token. */
509 $this->ignored
= true;
512 /* A start tag whose tag name is "html" */
513 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
514 $token['name'] === 'html') {
515 $this->processWithRulesFor($token, self
::IN_BODY
);
517 /* A start tag whose tag name is one of: "base", "command", "link" */
518 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
519 ($token['name'] === 'base' || $token['name'] === 'command' ||
520 $token['name'] === 'link')) {
521 /* Insert an HTML element for the token. Immediately pop the
522 * current node off the stack of open elements. */
523 $this->insertElement($token);
524 array_pop($this->stack
);
526 // YYY: Acknowledge the token's self-closing flag, if it is set.
528 /* A start tag whose tag name is "meta" */
529 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'meta') {
530 /* Insert an HTML element for the token. Immediately pop the
531 * current node off the stack of open elements. */
532 $this->insertElement($token);
533 array_pop($this->stack
);
535 // XERROR: Acknowledge the token's self-closing flag, if it is set.
537 // XENCODING: If the element has a charset attribute, and its value is a
538 // supported encoding, and the confidence is currently tentative,
539 // then change the encoding to the encoding given by the value of
540 // the charset attribute.
542 // Otherwise, if the element has a content attribute, and applying
543 // the algorithm for extracting an encoding from a Content-Type to
544 // its value returns a supported encoding encoding, and the
545 // confidence is currently tentative, then change the encoding to
546 // the encoding encoding.
548 /* A start tag with the tag name "title" */
549 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'title') {
550 $this->insertRCDATAElement($token);
552 /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
553 * A start tag whose tag name is one of: "noframes", "style" */
554 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
555 ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
556 // XSCRIPT: Scripting flag not respected
557 $this->insertCDATAElement($token);
559 // XSCRIPT: Scripting flag disable not implemented
561 /* A start tag with the tag name "script" */
562 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'script') {
563 /* 1. Create an element for the token in the HTML namespace. */
564 $node = $this->insertElement($token, false);
566 /* 2. Mark the element as being "parser-inserted" */
569 /* 3. If the parser was originally created for the HTML
570 * fragment parsing algorithm, then mark the script element as
571 * "already executed". (fragment case) */
574 /* 4. Append the new element to the current node and push it onto
575 * the stack of open elements. */
576 end($this->stack
)->appendChild($node);
577 $this->stack
[] = $node;
578 // I guess we could squash these together
580 /* 6. Let the original insertion mode be the current insertion mode. */
581 $this->original_mode
= $this->mode
;
582 /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
583 $this->mode
= self
::IN_CDATA_RCDATA
;
584 /* 5. Switch the tokeniser's content model flag to the CDATA state. */
585 $this->content_model
= HTML5_Tokenizer
::CDATA
;
587 /* An end tag with the tag name "head" */
588 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'head') {
589 /* Pop the current node (which will be the head element) off the stack of open elements. */
590 array_pop($this->stack
);
592 /* Change the insertion mode to "after head". */
593 $this->mode
= self
::AFTER_HEAD
;
595 // Slight logic inversion here to minimize duplication
596 /* A start tag with the tag name "head". */
597 /* An end tag whose tag name is not one of: "body", "html", "br" */
598 } elseif(($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'head') ||
599 ($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] !== 'html' &&
600 $token['name'] !== 'body' && $token['name'] !== 'br')) {
601 // Parse error. Ignore the token.
602 $this->ignored
= true;
606 /* Act as if an end tag token with the tag name "head" had been
607 * seen, and reprocess the current token. */
608 $this->emitToken(array(
610 'type' => HTML5_Tokenizer
::ENDTAG
613 /* Then, reprocess the current token. */
614 $this->emitToken($token);
618 case self
::IN_HEAD_NOSCRIPT
:
619 if ($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
621 } elseif ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
622 $this->processWithRulesFor($token, self
::IN_BODY
);
623 } elseif ($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'noscript') {
624 /* Pop the current node (which will be a noscript element) from the
625 * stack of open elements; the new current node will be a head
627 array_pop($this->stack
);
628 $this->mode
= self
::IN_HEAD
;
630 ($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) ||
631 ($token['type'] === HTML5_Tokenizer
::COMMENT
) ||
632 ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& (
633 $token['name'] === 'link' || $token['name'] === 'meta' ||
634 $token['name'] === 'noframes' || $token['name'] === 'style'))) {
635 $this->processWithRulesFor($token, self
::IN_HEAD
);
638 ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& (
639 $token['name'] === 'head' || $token['name'] === 'noscript')) ||
640 ($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
641 $token['name'] !== 'br')) {
645 $this->emitToken(array(
646 'type' => HTML5_Tokenizer
::ENDTAG
,
647 'name' => 'noscript',
649 $this->emitToken($token);
653 case self
::AFTER_HEAD
:
654 /* Handle the token as follows: */
656 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
657 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
659 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
660 /* Append the character to the current node. */
661 $this->insertText($token['data']);
663 /* A comment token */
664 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
665 /* Append a Comment node to the current node with the data attribute
666 set to the data given in the comment token. */
667 $this->insertComment($token['data']);
669 } elseif ($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
672 } elseif ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
673 $this->processWithRulesFor($token, self
::IN_BODY
);
675 /* A start tag token with the tag name "body" */
676 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'body') {
677 $this->insertElement($token);
679 /* Set the frameset-ok flag to "not ok". */
680 $this->flag_frameset_ok
= false;
682 /* Change the insertion mode to "in body". */
683 $this->mode
= self
::IN_BODY
;
685 /* A start tag token with the tag name "frameset" */
686 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'frameset') {
687 /* Insert a frameset element for the token. */
688 $this->insertElement($token);
690 /* Change the insertion mode to "in frameset". */
691 $this->mode
= self
::IN_FRAMESET
;
693 /* A start tag token whose tag name is one of: "base", "link", "meta",
694 "script", "style", "title" */
695 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& in_array($token['name'],
696 array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
698 /* Push the node pointed to by the head element pointer onto the
699 * stack of open elements. */
700 $this->stack
[] = $this->head_pointer
;
701 $this->processWithRulesFor($token, self
::IN_HEAD
);
702 array_splice($this->stack
, array_search($this->head_pointer
, $this->stack
, true), 1);
704 // inversion of specification
706 ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'head') ||
707 ($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
708 $token['name'] !== 'body' && $token['name'] !== 'html' &&
709 $token['name'] !== 'br')) {
714 $this->emitToken(array(
716 'type' => HTML5_Tokenizer
::STARTTAG
,
719 $this->flag_frameset_ok
= true;
720 $this->emitToken($token);
725 /* Handle the token as follows: */
727 switch($token['type']) {
728 /* A character token */
729 case HTML5_Tokenizer
::CHARACTER
:
730 case HTML5_Tokenizer
::SPACECHARACTER
:
731 /* Reconstruct the active formatting elements, if any. */
732 $this->reconstructActiveFormattingElements();
734 /* Append the token's character to the current node. */
735 $this->insertText($token['data']);
737 /* If the token is not one of U+0009 CHARACTER TABULATION,
738 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
739 * SPACE, then set the frameset-ok flag to "not ok". */
740 // i.e., if any of the characters is not whitespace
741 if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer
::WHITESPACE
)) {
742 $this->flag_frameset_ok
= false;
746 /* A comment token */
747 case HTML5_Tokenizer
::COMMENT
:
748 /* Append a Comment node to the current node with the data
749 attribute set to the data given in the comment token. */
750 $this->insertComment($token['data']);
753 case HTML5_Tokenizer
::DOCTYPE
:
757 case HTML5_Tokenizer
::EOF
:
761 case HTML5_Tokenizer
::STARTTAG
:
762 switch($token['name']) {
765 /* For each attribute on the token, check to see if the
766 * attribute is already present on the top element of the
767 * stack of open elements. If it is not, add the attribute
768 * and its corresponding value to that element. */
769 foreach($token['attr'] as $attr) {
770 if(!$this->stack
[0]->hasAttribute($attr['name'])) {
771 $this->stack
[0]->setAttribute($attr['name'], $attr['value']);
776 case 'base': case 'command': case 'link': case 'meta': case 'noframes':
777 case 'script': case 'style': case 'title':
778 /* Process the token as if the insertion mode had been "in
780 $this->processWithRulesFor($token, self
::IN_HEAD
);
783 /* A start tag token with the tag name "body" */
785 /* Parse error. If the second element on the stack of open
786 elements is not a body element, or, if the stack of open
787 elements has only one node on it, then ignore the token.
789 if(count($this->stack
) === 1 || $this->stack
[1]->tagName
!== 'body') {
790 $this->ignored
= true;
793 /* Otherwise, for each attribute on the token, check to see
794 if the attribute is already present on the body element (the
795 second element) on the stack of open elements. If it is not,
796 add the attribute and its corresponding value to that
799 foreach($token['attr'] as $attr) {
800 if(!$this->stack
[1]->hasAttribute($attr['name'])) {
801 $this->stack
[1]->setAttribute($attr['name'], $attr['value']);
809 /* If the second element on the stack of open elements is
810 * not a body element, or, if the stack of open elements
811 * has only one node on it, then ignore the token.
813 if(count($this->stack
) === 1 || $this->stack
[1]->tagName
!== 'body') {
814 $this->ignored
= true;
816 } elseif (!$this->flag_frameset_ok
) {
817 $this->ignored
= true;
820 /* 1. Remove the second element on the stack of open
821 * elements from its parent node, if it has one. */
822 if($this->stack
[1]->parentNode
) {
823 $this->stack
[1]->parentNode
->removeChild($this->stack
[1]);
826 /* 2. Pop all the nodes from the bottom of the stack of
827 * open elements, from the current node up to the root
829 array_splice($this->stack
, 1);
831 $this->insertElement($token);
832 $this->mode
= self
::IN_FRAMESET
;
836 // in spec, there is a diversion here
838 case 'address': case 'article': case 'aside': case 'blockquote':
839 case 'center': case 'datagrid': case 'details': case 'dir':
840 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
841 case 'header': case 'hgroup': case 'menu': case 'nav':
842 case 'ol': case 'p': case 'section': case 'ul':
843 /* If the stack of open elements has a p element in scope,
844 then act as if an end tag with the tag name p had been
846 if($this->elementInScope('p')) {
847 $this->emitToken(array(
849 'type' => HTML5_Tokenizer
::ENDTAG
853 /* Insert an HTML element for the token. */
854 $this->insertElement($token);
857 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
859 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
860 /* If the stack of open elements has a p element in scope,
861 then act as if an end tag with the tag name p had been seen. */
862 if($this->elementInScope('p')) {
863 $this->emitToken(array(
865 'type' => HTML5_Tokenizer
::ENDTAG
869 /* If the current node is an element whose tag name is one
870 * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
871 * parse error; pop the current node off the stack of open
873 $peek = array_pop($this->stack
);
874 if (in_array($peek->tagName
, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
877 $this->stack
[] = $peek;
880 /* Insert an HTML element for the token. */
881 $this->insertElement($token);
884 case 'pre': case 'listing':
885 /* If the stack of open elements has a p element in scope,
886 then act as if an end tag with the tag name p had been seen. */
887 if($this->elementInScope('p')) {
888 $this->emitToken(array(
890 'type' => HTML5_Tokenizer
::ENDTAG
893 $this->insertElement($token);
894 /* If the next token is a U+000A LINE FEED (LF) character
895 * token, then ignore that token and move on to the next
896 * one. (Newlines at the start of pre blocks are ignored as
897 * an authoring convenience.) */
898 $this->ignore_lf_token
= 2;
899 $this->flag_frameset_ok
= false;
902 /* A start tag whose tag name is "form" */
904 /* If the form element pointer is not null, ignore the
905 token with a parse error. */
906 if($this->form_pointer
!== null) {
907 $this->ignored
= true;
912 /* If the stack of open elements has a p element in
913 scope, then act as if an end tag with the tag name p
915 if($this->elementInScope('p')) {
916 $this->emitToken(array(
918 'type' => HTML5_Tokenizer
::ENDTAG
922 /* Insert an HTML element for the token, and set the
923 form element pointer to point to the element created. */
924 $element = $this->insertElement($token);
925 $this->form_pointer
= $element;
929 // condensed specification
930 case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
931 /* 1. Set the frameset-ok flag to "not ok". */
932 $this->flag_frameset_ok
= false;
934 $stack_length = count($this->stack
) - 1;
935 for($n = $stack_length; 0 <= $n; $n--) {
936 /* 2. Initialise node to be the current node (the
937 bottommost node of the stack). */
939 $node = $this->stack
[$n];
940 $cat = $this->getElementCategory($node);
943 /* 3. If node is an li element, then act as if an end
944 * tag with the tag name "li" had been seen, then jump
945 * to the last step. */
946 // for case 'dc': case 'dd': case 'ds': case 'dt':
947 /* If node is a dc, dd, ds or dt element, then act as if an end
948 * tag with the same tag name as node had been seen, then
949 * jump to the last step. */
950 if(($token['name'] === 'li' && $node->tagName
=== 'li') ||
951 ($token['name'] !== 'li' && ($node->tagName
== 'dc' || $node->tagName
=== 'dd' || $node->tagName
== 'ds' || $node->tagName
=== 'dt'))) { // limited conditional
952 $this->emitToken(array(
953 'type' => HTML5_Tokenizer
::ENDTAG
,
954 'name' => $node->tagName
,
959 /* 4. If node is not in the formatting category, and is
960 not in the phrasing category, and is not an address,
961 div or p element, then stop this algorithm. */
962 if($cat !== self
::FORMATTING
&& $cat !== self
::PHRASING
&&
963 $node->tagName
!== 'address' && $node->tagName
!== 'div' &&
964 $node->tagName
!== 'p') {
968 /* 5. Otherwise, set node to the previous entry in the
969 * stack of open elements and return to step 2. */
972 /* 6. This is the last step. */
974 /* If the stack of open elements has a p element in scope,
975 then act as if an end tag with the tag name p had been
977 if($this->elementInScope('p')) {
978 $this->emitToken(array(
980 'type' => HTML5_Tokenizer
::ENDTAG
984 /* Finally, insert an HTML element with the same tag
985 name as the token's. */
986 $this->insertElement($token);
989 /* A start tag token whose tag name is "plaintext" */
991 /* If the stack of open elements has a p element in scope,
992 then act as if an end tag with the tag name p had been
994 if($this->elementInScope('p')) {
995 $this->emitToken(array(
997 'type' => HTML5_Tokenizer
::ENDTAG
1001 /* Insert an HTML element for the token. */
1002 $this->insertElement($token);
1004 $this->content_model
= HTML5_Tokenizer
::PLAINTEXT
;
1009 /* A start tag whose tag name is "a" */
1011 /* If the list of active formatting elements contains
1012 an element whose tag name is "a" between the end of the
1013 list and the last marker on the list (or the start of
1014 the list if there is no marker on the list), then this
1015 is a parse error; act as if an end tag with the tag name
1016 "a" had been seen, then remove that element from the list
1017 of active formatting elements and the stack of open
1018 elements if the end tag didn't already remove it (it
1019 might not have if the element is not in table scope). */
1020 $leng = count($this->a_formatting
);
1022 for($n = $leng - 1; $n >= 0; $n--) {
1023 if($this->a_formatting
[$n] === self
::MARKER
) {
1026 } elseif($this->a_formatting
[$n]->tagName
=== 'a') {
1027 $a = $this->a_formatting
[$n];
1028 $this->emitToken(array(
1030 'type' => HTML5_Tokenizer
::ENDTAG
1032 if (in_array($a, $this->a_formatting
)) {
1033 $a_i = array_search($a, $this->a_formatting
, true);
1034 if($a_i !== false) array_splice($this->a_formatting
, $a_i, 1);
1036 if (in_array($a, $this->stack
)) {
1037 $a_i = array_search($a, $this->stack
, true);
1038 if ($a_i !== false) array_splice($this->stack
, $a_i, 1);
1044 /* Reconstruct the active formatting elements, if any. */
1045 $this->reconstructActiveFormattingElements();
1047 /* Insert an HTML element for the token. */
1048 $el = $this->insertElement($token);
1050 /* Add that element to the list of active formatting
1052 $this->a_formatting
[] = $el;
1055 case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1056 case 's': case 'small': case 'strike':
1057 case 'strong': case 'tt': case 'u':
1058 /* Reconstruct the active formatting elements, if any. */
1059 $this->reconstructActiveFormattingElements();
1061 /* Insert an HTML element for the token. */
1062 $el = $this->insertElement($token);
1064 /* Add that element to the list of active formatting
1066 $this->a_formatting
[] = $el;
1070 /* Reconstruct the active formatting elements, if any. */
1071 $this->reconstructActiveFormattingElements();
1073 /* If the stack of open elements has a nobr element in
1074 * scope, then this is a parse error; act as if an end tag
1075 * with the tag name "nobr" had been seen, then once again
1076 * reconstruct the active formatting elements, if any. */
1077 if ($this->elementInScope('nobr')) {
1078 $this->emitToken(array(
1080 'type' => HTML5_Tokenizer
::ENDTAG
,
1082 $this->reconstructActiveFormattingElements();
1085 /* Insert an HTML element for the token. */
1086 $el = $this->insertElement($token);
1088 /* Add that element to the list of active formatting
1090 $this->a_formatting
[] = $el;
1093 // another diversion
1095 /* A start tag token whose tag name is "button" */
1097 /* If the stack of open elements has a button element in scope,
1098 then this is a parse error; act as if an end tag with the tag
1099 name "button" had been seen, then reprocess the token. (We don't
1100 do that. Unnecessary.) (I hope you're right! -- ezyang) */
1101 if($this->elementInScope('button')) {
1102 $this->emitToken(array(
1104 'type' => HTML5_Tokenizer
::ENDTAG
1108 /* Reconstruct the active formatting elements, if any. */
1109 $this->reconstructActiveFormattingElements();
1111 /* Insert an HTML element for the token. */
1112 $this->insertElement($token);
1114 /* Insert a marker at the end of the list of active
1115 formatting elements. */
1116 $this->a_formatting
[] = self
::MARKER
;
1118 $this->flag_frameset_ok
= false;
1121 case 'applet': case 'marquee': case 'object':
1122 /* Reconstruct the active formatting elements, if any. */
1123 $this->reconstructActiveFormattingElements();
1125 /* Insert an HTML element for the token. */
1126 $this->insertElement($token);
1128 /* Insert a marker at the end of the list of active
1129 formatting elements. */
1130 $this->a_formatting
[] = self
::MARKER
;
1132 $this->flag_frameset_ok
= false;
1137 /* A start tag whose tag name is "table" */
1139 /* If the Document is not set to quirks mode, and the
1140 * stack of open elements has a p element in scope, then
1141 * act as if an end tag with the tag name "p" had been
1143 if($this->quirks_mode
!== self
::QUIRKS_MODE
&&
1144 $this->elementInScope('p')) {
1145 $this->emitToken(array(
1147 'type' => HTML5_Tokenizer
::ENDTAG
1151 /* Insert an HTML element for the token. */
1152 $this->insertElement($token);
1154 $this->flag_frameset_ok
= false;
1156 /* Change the insertion mode to "in table". */
1157 $this->mode
= self
::IN_TABLE
;
1160 /* A start tag whose tag name is one of: "area", "basefont",
1161 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1162 case 'area': case 'basefont': case 'bgsound': case 'br':
1163 case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1165 /* Reconstruct the active formatting elements, if any. */
1166 $this->reconstructActiveFormattingElements();
1168 /* Insert an HTML element for the token. */
1169 $this->insertElement($token);
1171 /* Immediately pop the current node off the stack of open elements. */
1172 array_pop($this->stack
);
1174 // YYY: Acknowledge the token's self-closing flag, if it is set.
1176 $this->flag_frameset_ok
= false;
1179 case 'param': case 'source':
1180 /* Insert an HTML element for the token. */
1181 $this->insertElement($token);
1183 /* Immediately pop the current node off the stack of open elements. */
1184 array_pop($this->stack
);
1186 // YYY: Acknowledge the token's self-closing flag, if it is set.
1189 /* A start tag whose tag name is "hr" */
1191 /* If the stack of open elements has a p element in scope,
1192 then act as if an end tag with the tag name p had been seen. */
1193 if($this->elementInScope('p')) {
1194 $this->emitToken(array(
1196 'type' => HTML5_Tokenizer
::ENDTAG
1200 /* Insert an HTML element for the token. */
1201 $this->insertElement($token);
1203 /* Immediately pop the current node off the stack of open elements. */
1204 array_pop($this->stack
);
1206 // YYY: Acknowledge the token's self-closing flag, if it is set.
1208 $this->flag_frameset_ok
= false;
1211 /* A start tag whose tag name is "image" */
1213 /* Parse error. Change the token's tag name to "img" and
1214 reprocess it. (Don't ask.) */
1215 $token['name'] = 'img';
1216 $this->emitToken($token);
1219 /* A start tag whose tag name is "isindex" */
1223 /* If the form element pointer is not null,
1224 then ignore the token. */
1225 if($this->form_pointer
=== null) {
1226 /* Act as if a start tag token with the tag name "form" had
1228 /* If the token has an attribute called "action", set
1229 * the action attribute on the resulting form
1230 * element to the value of the "action" attribute of
1233 $action = $this->getAttr($token, 'action');
1234 if ($action !== false) {
1235 $attr[] = array('name' => 'action', 'value' => $action);
1237 $this->emitToken(array(
1239 'type' => HTML5_Tokenizer
::STARTTAG
,
1243 /* Act as if a start tag token with the tag name "hr" had
1245 $this->emitToken(array(
1247 'type' => HTML5_Tokenizer
::STARTTAG
,
1251 /* Act as if a start tag token with the tag name "label"
1253 $this->emitToken(array(
1255 'type' => HTML5_Tokenizer
::STARTTAG
,
1259 /* Act as if a stream of character tokens had been seen. */
1260 $prompt = $this->getAttr($token, 'prompt');
1261 if ($prompt === false) {
1262 $prompt = 'This is a searchable index. '.
1263 'Insert your search keywords here: ';
1265 $this->emitToken(array(
1267 'type' => HTML5_Tokenizer
::CHARACTER
,
1270 /* Act as if a start tag token with the tag name "input"
1271 had been seen, with all the attributes from the "isindex"
1272 token, except with the "name" attribute set to the value
1273 "isindex" (ignoring any explicit "name" attribute). */
1275 foreach ($token['attr'] as $keypair) {
1276 if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1277 $keypair['name'] === 'prompt') continue;
1280 $attr[] = array('name' => 'name', 'value' => 'isindex');
1282 $this->emitToken(array(
1284 'type' => HTML5_Tokenizer
::STARTTAG
,
1288 /* Act as if an end tag token with the tag name "label"
1290 $this->emitToken(array(
1292 'type' => HTML5_Tokenizer
::ENDTAG
1295 /* Act as if a start tag token with the tag name "hr" had
1297 $this->emitToken(array(
1299 'type' => HTML5_Tokenizer
::STARTTAG
1302 /* Act as if an end tag token with the tag name "form" had
1304 $this->emitToken(array(
1306 'type' => HTML5_Tokenizer
::ENDTAG
1309 $this->ignored
= true;
1313 /* A start tag whose tag name is "textarea" */
1315 $this->insertElement($token);
1317 /* If the next token is a U+000A LINE FEED (LF)
1318 * character token, then ignore that token and move on to
1319 * the next one. (Newlines at the start of textarea
1320 * elements are ignored as an authoring convenience.)
1321 * need flag, see also <pre> */
1322 $this->ignore_lf_token
= 2;
1324 $this->original_mode
= $this->mode
;
1325 $this->flag_frameset_ok
= false;
1326 $this->mode
= self
::IN_CDATA_RCDATA
;
1328 /* Switch the tokeniser's content model flag to the
1330 $this->content_model
= HTML5_Tokenizer
::RCDATA
;
1333 /* A start tag token whose tag name is "xmp" */
1335 /* If the stack of open elements has a p element in
1336 scope, then act as if an end tag with the tag name
1337 "p" has been seen. */
1338 if ($this->elementInScope('p')) {
1339 $this->emitToken(array(
1341 'type' => HTML5_Tokenizer
::ENDTAG
1345 /* Reconstruct the active formatting elements, if any. */
1346 $this->reconstructActiveFormattingElements();
1348 $this->flag_frameset_ok
= false;
1350 $this->insertCDATAElement($token);
1354 $this->flag_frameset_ok
= false;
1355 $this->insertCDATAElement($token);
1358 case 'noembed': case 'noscript':
1359 // XSCRIPT: should check scripting flag
1360 $this->insertCDATAElement($token);
1363 /* A start tag whose tag name is "select" */
1365 /* Reconstruct the active formatting elements, if any. */
1366 $this->reconstructActiveFormattingElements();
1368 /* Insert an HTML element for the token. */
1369 $this->insertElement($token);
1371 $this->flag_frameset_ok
= false;
1373 /* If the insertion mode is one of in table", "in caption",
1374 * "in column group", "in table body", "in row", or "in
1375 * cell", then switch the insertion mode to "in select in
1376 * table". Otherwise, switch the insertion mode to "in
1379 $this->mode
=== self
::IN_TABLE
|| $this->mode
=== self
::IN_CAPTION
||
1380 $this->mode
=== self
::IN_COLUMN_GROUP
|| $this->mode
==+self
::IN_TABLE_BODY
||
1381 $this->mode
=== self
::IN_ROW
|| $this->mode
=== self
::IN_CELL
1383 $this->mode
= self
::IN_SELECT_IN_TABLE
;
1385 $this->mode
= self
::IN_SELECT
;
1389 case 'option': case 'optgroup':
1390 if ($this->elementInScope('option')) {
1391 $this->emitToken(array(
1393 'type' => HTML5_Tokenizer
::ENDTAG
,
1396 $this->reconstructActiveFormattingElements();
1397 $this->insertElement($token);
1400 case 'rp': case 'rt':
1401 /* If the stack of open elements has a ruby element in scope, then generate
1402 * implied end tags. If the current node is not then a ruby element, this is
1403 * a parse error; pop all the nodes from the current node up to the node
1404 * immediately before the bottommost ruby element on the stack of open elements.
1406 if ($this->elementInScope('ruby')) {
1407 $this->generateImpliedEndTags();
1414 $peek = array_pop($this->stack
);
1415 } while ($peek->tagName
!== 'ruby');
1416 $this->stack
[] = $peek; // we popped one too many
1417 $this->insertElement($token);
1423 $this->reconstructActiveFormattingElements();
1424 $token = $this->adjustMathMLAttributes($token);
1425 $token = $this->adjustForeignAttributes($token);
1426 $this->insertForeignElement($token, self
::NS_MATHML
);
1427 if (isset($token['self-closing'])) {
1428 // XERROR: acknowledge the token's self-closing flag
1429 array_pop($this->stack
);
1431 if ($this->mode
!== self
::IN_FOREIGN_CONTENT
) {
1432 $this->secondary_mode
= $this->mode
;
1433 $this->mode
= self
::IN_FOREIGN_CONTENT
;
1438 $this->reconstructActiveFormattingElements();
1439 $token = $this->adjustSVGAttributes($token);
1440 $token = $this->adjustForeignAttributes($token);
1441 $this->insertForeignElement($token, self
::NS_SVG
);
1442 if (isset($token['self-closing'])) {
1443 // XERROR: acknowledge the token's self-closing flag
1444 array_pop($this->stack
);
1446 if ($this->mode
!== self
::IN_FOREIGN_CONTENT
) {
1447 $this->secondary_mode
= $this->mode
;
1448 $this->mode
= self
::IN_FOREIGN_CONTENT
;
1452 case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1453 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1457 /* A start tag token not covered by the previous entries */
1459 /* Reconstruct the active formatting elements, if any. */
1460 $this->reconstructActiveFormattingElements();
1462 $this->insertElement($token);
1463 /* This element will be a phrasing element. */
1468 case HTML5_Tokenizer
::ENDTAG
:
1469 switch($token['name']) {
1470 /* An end tag with the tag name "body" */
1472 /* If the stack of open elements does not have a body
1473 * element in scope, this is a parse error; ignore the
1475 if(!$this->elementInScope('body')) {
1476 $this->ignored
= true;
1478 /* Otherwise, if there is a node in the stack of open
1479 * elements that is not either a dc element, a dd element,
1480 * a ds element, a dt element, an li element, an optgroup
1481 * element, an option element, a p element, an rp element,
1482 * an rt element, a tbody element, a td element, a tfoot
1483 * element, a th element, a thead element, a tr element,
1484 * the body element, or the html element, then this is a
1488 // XERROR: implement this check for parse error
1491 /* Change the insertion mode to "after body". */
1492 $this->mode
= self
::AFTER_BODY
;
1495 /* An end tag with the tag name "html" */
1497 /* Act as if an end tag with tag name "body" had been seen,
1498 then, if that token wasn't ignored, reprocess the current
1500 $this->emitToken(array(
1502 'type' => HTML5_Tokenizer
::ENDTAG
1505 if (!$this->ignored
) $this->emitToken($token);
1508 case 'address': case 'article': case 'aside': case 'blockquote':
1509 case 'center': case 'datagrid': case 'details': case 'dir':
1510 case 'div': case 'dl': case 'fieldset': case 'footer':
1511 case 'header': case 'hgroup': case 'listing': case 'menu':
1512 case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
1513 /* If the stack of open elements has an element in scope
1514 with the same tag name as that of the token, then generate
1515 implied end tags. */
1516 if($this->elementInScope($token['name'])) {
1517 $this->generateImpliedEndTags();
1519 /* Now, if the current node is not an element with
1520 the same tag name as that of the token, then this
1521 is a parse error. */
1522 // XERROR: implement parse error logic
1524 /* If the stack of open elements has an element in
1525 scope with the same tag name as that of the token,
1526 then pop elements from this stack until an element
1527 with that tag name has been popped from the stack. */
1529 $node = array_pop($this->stack
);
1530 } while ($node->tagName
!== $token['name']);
1536 /* An end tag whose tag name is "form" */
1538 /* Let node be the element that the form element pointer is set to. */
1539 $node = $this->form_pointer
;
1540 /* Set the form element pointer to null. */
1541 $this->form_pointer
= null;
1542 /* If node is null or the stack of open elements does not
1543 * have node in scope, then this is a parse error; ignore the token. */
1544 if ($node === null || !in_array($node, $this->stack
)) {
1546 $this->ignored
= true;
1548 /* 1. Generate implied end tags. */
1549 $this->generateImpliedEndTags();
1550 /* 2. If the current node is not node, then this is a parse error. */
1551 if (end($this->stack
) !== $node) {
1554 /* 3. Remove node from the stack of open elements. */
1555 array_splice($this->stack
, array_search($node, $this->stack
, true), 1);
1560 /* An end tag whose tag name is "p" */
1562 /* If the stack of open elements has a p element in scope,
1563 then generate implied end tags, except for p elements. */
1564 if($this->elementInScope('p')) {
1565 /* Generate implied end tags, except for elements with
1566 * the same tag name as the token. */
1567 $this->generateImpliedEndTags(array('p'));
1569 /* If the current node is not a p element, then this is
1571 // XERROR: implement
1573 /* Pop elements from the stack of open elements until
1574 * an element with the same tag name as the token has
1575 * been popped from the stack. */
1577 $node = array_pop($this->stack
);
1578 } while ($node->tagName
!== 'p');
1582 $this->emitToken(array(
1584 'type' => HTML5_Tokenizer
::STARTTAG
,
1586 $this->emitToken($token);
1590 /* An end tag whose tag name is "li" */
1592 /* If the stack of open elements does not have an element
1593 * in list item scope with the same tag name as that of the
1594 * token, then this is a parse error; ignore the token. */
1595 if ($this->elementInScope($token['name'], self
::SCOPE_LISTITEM
)) {
1596 /* Generate implied end tags, except for elements with the
1597 * same tag name as the token. */
1598 $this->generateImpliedEndTags(array($token['name']));
1599 /* If the current node is not an element with the same tag
1600 * name as that of the token, then this is a parse error. */
1601 // XERROR: parse error
1602 /* Pop elements from the stack of open elements until an
1603 * element with the same tag name as the token has been
1604 * popped from the stack. */
1606 $node = array_pop($this->stack
);
1607 } while ($node->tagName
!== $token['name']);
1609 // XERROR: parse error
1613 /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
1614 case 'dc': case 'dd': case 'ds': case 'dt':
1615 if($this->elementInScope($token['name'])) {
1616 $this->generateImpliedEndTags(array($token['name']));
1618 /* If the current node is not an element with the same
1619 tag name as the token, then this is a parse error. */
1620 // XERROR: implement parse error
1622 /* Pop elements from the stack of open elements until
1623 * an element with the same tag name as the token has
1624 * been popped from the stack. */
1626 $node = array_pop($this->stack
);
1627 } while ($node->tagName
!== $token['name']);
1630 // XERROR: parse error
1634 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1636 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1637 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1639 /* If the stack of open elements has in scope an element whose
1640 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1641 generate implied end tags. */
1642 if($this->elementInScope($elements)) {
1643 $this->generateImpliedEndTags();
1645 /* Now, if the current node is not an element with the same
1646 tag name as that of the token, then this is a parse error. */
1647 // XERROR: implement parse error
1649 /* If the stack of open elements has in scope an element
1650 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1651 "h6", then pop elements from the stack until an element
1652 with one of those tag names has been popped from the stack. */
1654 $node = array_pop($this->stack
);
1655 } while (!in_array($node->tagName
, $elements));
1661 /* An end tag whose tag name is one of: "a", "b", "big", "em",
1662 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1663 case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1664 case 'i': case 'nobr': case 's': case 'small': case 'strike':
1665 case 'strong': case 'tt': case 'u':
1666 // XERROR: generally speaking this needs parse error logic
1667 /* 1. Let the formatting element be the last element in
1668 the list of active formatting elements that:
1669 * is between the end of the list and the last scope
1670 marker in the list, if any, or the start of the list
1672 * has the same tag name as the token.
1675 for($a = count($this->a_formatting
) - 1; $a >= 0; $a--) {
1676 if($this->a_formatting
[$a] === self
::MARKER
) {
1679 } elseif($this->a_formatting
[$a]->tagName
=== $token['name']) {
1680 $formatting_element = $this->a_formatting
[$a];
1681 $in_stack = in_array($formatting_element, $this->stack
, true);
1687 /* If there is no such node, or, if that node is
1688 also in the stack of open elements but the element
1689 is not in scope, then this is a parse error. Abort
1690 these steps. The token is ignored. */
1691 if(!isset($formatting_element) || ($in_stack &&
1692 !$this->elementInScope($token['name']))) {
1693 $this->ignored
= true;
1696 /* Otherwise, if there is such a node, but that node
1697 is not in the stack of open elements, then this is a
1698 parse error; remove the element from the list, and
1699 abort these steps. */
1700 } elseif(isset($formatting_element) && !$in_stack) {
1701 unset($this->a_formatting
[$fe_af_pos]);
1702 $this->a_formatting
= array_merge($this->a_formatting
);
1706 /* Otherwise, there is a formatting element and that
1707 * element is in the stack and is in scope. If the
1708 * element is not the current node, this is a parse
1709 * error. In any case, proceed with the algorithm as
1710 * written in the following steps. */
1711 // XERROR: implement me
1713 /* 2. Let the furthest block be the topmost node in the
1714 stack of open elements that is lower in the stack
1715 than the formatting element, and is not an element in
1716 the phrasing or formatting categories. There might
1718 $fe_s_pos = array_search($formatting_element, $this->stack
, true);
1719 $length = count($this->stack
);
1721 for($s = $fe_s_pos +
1; $s < $length; $s++
) {
1722 $category = $this->getElementCategory($this->stack
[$s]);
1724 if($category !== self
::PHRASING
&& $category !== self
::FORMATTING
) {
1725 $furthest_block = $this->stack
[$s];
1730 /* 3. If there is no furthest block, then the UA must
1731 skip the subsequent steps and instead just pop all
1732 the nodes from the bottom of the stack of open
1733 elements, from the current node up to the formatting
1734 element, and remove the formatting element from the
1735 list of active formatting elements. */
1736 if(!isset($furthest_block)) {
1737 for($n = $length - 1; $n >= $fe_s_pos; $n--) {
1738 array_pop($this->stack
);
1741 unset($this->a_formatting
[$fe_af_pos]);
1742 $this->a_formatting
= array_merge($this->a_formatting
);
1746 /* 4. Let the common ancestor be the element
1747 immediately above the formatting element in the stack
1748 of open elements. */
1749 $common_ancestor = $this->stack
[$fe_s_pos - 1];
1751 /* 5. Let a bookmark note the position of the
1752 formatting element in the list of active formatting
1753 elements relative to the elements on either side
1754 of it in the list. */
1755 $bookmark = $fe_af_pos;
1757 /* 6. Let node and last node be the furthest block.
1758 Follow these steps: */
1759 $node = $furthest_block;
1760 $last_node = $furthest_block;
1763 for($n = array_search($node, $this->stack
, true) - 1; $n >= 0; $n--) {
1764 /* 6.1 Let node be the element immediately
1765 prior to node in the stack of open elements. */
1766 $node = $this->stack
[$n];
1768 /* 6.2 If node is not in the list of active
1769 formatting elements, then remove node from
1770 the stack of open elements and then go back
1772 if(!in_array($node, $this->a_formatting
, true)) {
1773 array_splice($this->stack
, $n, 1);
1780 /* 6.3 Otherwise, if node is the formatting
1781 element, then go to the next step in the overall
1783 if($node === $formatting_element) {
1786 /* 6.4 Otherwise, if last node is the furthest
1787 block, then move the aforementioned bookmark to
1788 be immediately after the node in the list of
1789 active formatting elements. */
1790 } elseif($last_node === $furthest_block) {
1791 $bookmark = array_search($node, $this->a_formatting
, true) +
1;
1794 /* 6.5 Create an element for the token for which
1795 * the element node was created, replace the entry
1796 * for node in the list of active formatting
1797 * elements with an entry for the new element,
1798 * replace the entry for node in the stack of open
1799 * elements with an entry for the new element, and
1800 * let node be the new element. */
1801 // we don't know what the token is anymore
1803 $clone = $node->cloneNode();
1804 $a_pos = array_search($node, $this->a_formatting
, true);
1805 $s_pos = array_search($node, $this->stack
, true);
1806 $this->a_formatting
[$a_pos] = $clone;
1807 $this->stack
[$s_pos] = $clone;
1810 /* 6.6 Insert last node into node, first removing
1811 it from its previous parent node if any. */
1813 if($last_node->parentNode
!== null) {
1814 $last_node->parentNode
->removeChild($last_node);
1818 $node->appendChild($last_node);
1820 /* 6.7 Let last node be node. */
1823 /* 6.8 Return to step 1 of this inner set of steps. */
1826 /* 7. If the common ancestor node is a table, tbody,
1827 * tfoot, thead, or tr element, then, foster parent
1828 * whatever last node ended up being in the previous
1829 * step, first removing it from its previous parent
1832 if ($last_node->parentNode
) { // common step
1833 $last_node->parentNode
->removeChild($last_node);
1835 if (in_array($common_ancestor->tagName
, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1836 $this->fosterParent($last_node);
1837 /* Otherwise, append whatever last node ended up being
1838 * in the previous step to the common ancestor node,
1839 * first removing it from its previous parent node if
1843 $common_ancestor->appendChild($last_node);
1846 /* 8. Create an element for the token for which the
1847 * formatting element was created. */
1849 $clone = $formatting_element->cloneNode();
1851 /* 9. Take all of the child nodes of the furthest
1852 block and append them to the element created in the
1855 while($furthest_block->hasChildNodes()) {
1856 $child = $furthest_block->firstChild
;
1857 $furthest_block->removeChild($child);
1858 $clone->appendChild($child);
1861 /* 10. Append that clone to the furthest block. */
1863 $furthest_block->appendChild($clone);
1865 /* 11. Remove the formatting element from the list
1866 of active formatting elements, and insert the new element
1867 into the list of active formatting elements at the
1868 position of the aforementioned bookmark. */
1869 $fe_af_pos = array_search($formatting_element, $this->a_formatting
, true);
1870 array_splice($this->a_formatting
, $fe_af_pos, 1);
1872 $af_part1 = array_slice($this->a_formatting
, 0, $bookmark - 1);
1873 $af_part2 = array_slice($this->a_formatting
, $bookmark);
1874 $this->a_formatting
= array_merge($af_part1, array($clone), $af_part2);
1876 /* 12. Remove the formatting element from the stack
1877 of open elements, and insert the new element into the stack
1878 of open elements immediately below the position of the
1879 furthest block in that stack. */
1880 $fe_s_pos = array_search($formatting_element, $this->stack
, true);
1881 array_splice($this->stack
, $fe_s_pos, 1);
1883 $fb_s_pos = array_search($furthest_block, $this->stack
, true);
1884 $s_part1 = array_slice($this->stack
, 0, $fb_s_pos +
1);
1885 $s_part2 = array_slice($this->stack
, $fb_s_pos +
1);
1886 $this->stack
= array_merge($s_part1, array($clone), $s_part2);
1888 /* 13. Jump back to step 1 in this series of steps. */
1889 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1893 case 'applet': case 'button': case 'marquee': case 'object':
1894 /* If the stack of open elements has an element in scope whose
1895 tag name matches the tag name of the token, then generate implied
1897 if($this->elementInScope($token['name'])) {
1898 $this->generateImpliedEndTags();
1900 /* Now, if the current node is not an element with the same
1901 tag name as the token, then this is a parse error. */
1902 // XERROR: implement logic
1904 /* Pop elements from the stack of open elements until
1905 * an element with the same tag name as the token has
1906 * been popped from the stack. */
1908 $node = array_pop($this->stack
);
1909 } while ($node->tagName
!== $token['name']);
1911 /* Clear the list of active formatting elements up to the
1913 $keys = array_keys($this->a_formatting
, self
::MARKER
, true);
1914 $marker = end($keys);
1916 for($n = count($this->a_formatting
) - 1; $n > $marker; $n--) {
1917 array_pop($this->a_formatting
);
1926 $this->emitToken(array(
1928 'type' => HTML5_Tokenizer
::STARTTAG
,
1932 /* An end tag token not covered by the previous entries */
1934 for($n = count($this->stack
) - 1; $n >= 0; $n--) {
1935 /* Initialise node to be the current node (the bottommost
1936 node of the stack). */
1937 $node = $this->stack
[$n];
1939 /* If node has the same tag name as the end tag token,
1941 if($token['name'] === $node->tagName
) {
1942 /* Generate implied end tags. */
1943 $this->generateImpliedEndTags();
1945 /* If the tag name of the end tag token does not
1946 match the tag name of the current node, this is a
1948 // XERROR: implement this
1950 /* Pop all the nodes from the current node up to
1951 node, including node, then stop these steps. */
1954 $pop = array_pop($this->stack
);
1955 } while ($pop !== $node);
1959 $category = $this->getElementCategory($node);
1961 if($category !== self
::FORMATTING
&& $category !== self
::PHRASING
) {
1962 /* Otherwise, if node is in neither the formatting
1963 category nor the phrasing category, then this is a
1964 parse error. Stop this algorithm. The end tag token
1966 $this->ignored
= true;
1971 /* Set node to the previous entry in the stack of open elements. Loop. */
1979 case self
::IN_CDATA_RCDATA
:
1981 $token['type'] === HTML5_Tokenizer
::CHARACTER
||
1982 $token['type'] === HTML5_Tokenizer
::SPACECHARACTER
1984 $this->insertText($token['data']);
1985 } elseif ($token['type'] === HTML5_Tokenizer
::EOF
) {
1987 /* If the current node is a script element, mark the script
1988 * element as "already executed". */
1989 // probably not necessary
1990 array_pop($this->stack
);
1991 $this->mode
= $this->original_mode
;
1992 $this->emitToken($token);
1993 } elseif ($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'script') {
1994 array_pop($this->stack
);
1995 $this->mode
= $this->original_mode
;
1996 // we're ignoring all of the execution stuff
1997 } elseif ($token['type'] === HTML5_Tokenizer
::ENDTAG
) {
1998 array_pop($this->stack
);
1999 $this->mode
= $this->original_mode
;
2003 case self
::IN_TABLE
:
2004 $clear = array('html', 'table');
2006 /* A character token */
2007 if ($token['type'] === HTML5_Tokenizer
::CHARACTER
||
2008 $token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
2009 /* Let the pending table character tokens
2010 * be an empty list of tokens. */
2011 $this->pendingTableCharacters
= "";
2012 $this->pendingTableCharactersDirty
= false;
2013 /* Let the original insertion mode be the current
2014 * insertion mode. */
2015 $this->original_mode
= $this->mode
;
2016 /* Switch the insertion mode to
2017 * "in table text" and
2018 * reprocess the token. */
2019 $this->mode
= self
::IN_TABLE_TEXT
;
2020 $this->emitToken($token);
2022 /* A comment token */
2023 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
2024 /* Append a Comment node to the current node with the data
2025 attribute set to the data given in the comment token. */
2026 $this->insertComment($token['data']);
2028 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
2031 /* A start tag whose tag name is "caption" */
2032 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2033 $token['name'] === 'caption') {
2034 /* Clear the stack back to a table context. */
2035 $this->clearStackToTableContext($clear);
2037 /* Insert a marker at the end of the list of active
2038 formatting elements. */
2039 $this->a_formatting
[] = self
::MARKER
;
2041 /* Insert an HTML element for the token, then switch the
2042 insertion mode to "in caption". */
2043 $this->insertElement($token);
2044 $this->mode
= self
::IN_CAPTION
;
2046 /* A start tag whose tag name is "colgroup" */
2047 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2048 $token['name'] === 'colgroup') {
2049 /* Clear the stack back to a table context. */
2050 $this->clearStackToTableContext($clear);
2052 /* Insert an HTML element for the token, then switch the
2053 insertion mode to "in column group". */
2054 $this->insertElement($token);
2055 $this->mode
= self
::IN_COLUMN_GROUP
;
2057 /* A start tag whose tag name is "col" */
2058 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2059 $token['name'] === 'col') {
2060 $this->emitToken(array(
2061 'name' => 'colgroup',
2062 'type' => HTML5_Tokenizer
::STARTTAG
,
2066 $this->emitToken($token);
2068 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2069 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& in_array($token['name'],
2070 array('tbody', 'tfoot', 'thead'))) {
2071 /* Clear the stack back to a table context. */
2072 $this->clearStackToTableContext($clear);
2074 /* Insert an HTML element for the token, then switch the insertion
2075 mode to "in table body". */
2076 $this->insertElement($token);
2077 $this->mode
= self
::IN_TABLE_BODY
;
2079 /* A start tag whose tag name is one of: "td", "th", "tr" */
2080 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2081 in_array($token['name'], array('td', 'th', 'tr'))) {
2082 /* Act as if a start tag token with the tag name "tbody" had been
2083 seen, then reprocess the current token. */
2084 $this->emitToken(array(
2086 'type' => HTML5_Tokenizer
::STARTTAG
,
2090 $this->emitToken($token);
2092 /* A start tag whose tag name is "table" */
2093 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2094 $token['name'] === 'table') {
2095 /* Parse error. Act as if an end tag token with the tag name "table"
2096 had been seen, then, if that token wasn't ignored, reprocess the
2098 $this->emitToken(array(
2100 'type' => HTML5_Tokenizer
::ENDTAG
2103 if (!$this->ignored
) $this->emitToken($token);
2105 /* An end tag whose tag name is "table" */
2106 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2107 $token['name'] === 'table') {
2108 /* If the stack of open elements does not have an element in table
2109 scope with the same tag name as the token, this is a parse error.
2110 Ignore the token. (fragment case) */
2111 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2112 $this->ignored
= true;
2117 $node = array_pop($this->stack
);
2118 } while ($node->tagName
!== 'table');
2120 /* Reset the insertion mode appropriately. */
2121 $this->resetInsertionMode();
2124 /* An end tag whose tag name is one of: "body", "caption", "col",
2125 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2126 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& in_array($token['name'],
2127 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2128 'tfoot', 'th', 'thead', 'tr'))) {
2129 // Parse error. Ignore the token.
2131 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2132 ($token['name'] === 'style' || $token['name'] === 'script')) {
2133 $this->processWithRulesFor($token, self
::IN_HEAD
);
2135 } elseif ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'input' &&
2136 // assignment is intentional
2137 /* If the token does not have an attribute with the name "type", or
2138 * if it does, but that attribute's value is not an ASCII
2139 * case-insensitive match for the string "hidden", then: act as
2140 * described in the "anything else" entry below. */
2141 ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2142 // I.e., if its an input with the type attribute == 'hidden'
2145 $this->insertElement($token);
2146 array_pop($this->stack
);
2147 } elseif ($token['type'] === HTML5_Tokenizer
::EOF
) {
2148 /* If the current node is not the root html element, then this is a parse error. */
2149 if (end($this->stack
)->tagName
!== 'html') {
2150 // Note: It can only be the current node in the fragment case.
2156 /* Parse error. Process the token as if the insertion mode was "in
2157 body", with the following exception: */
2159 $old = $this->foster_parent
;
2160 $this->foster_parent
= true;
2161 $this->processWithRulesFor($token, self
::IN_BODY
);
2162 $this->foster_parent
= $old;
2166 case self
::IN_TABLE_TEXT
:
2167 /* A character token */
2168 if($token['type'] === HTML5_Tokenizer
::CHARACTER
) {
2169 /* Append the character token to the pending table
2170 * character tokens list. */
2171 $this->pendingTableCharacters
.= $token['data'];
2172 $this->pendingTableCharactersDirty
= true;
2173 } elseif ($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
2174 $this->pendingTableCharacters
.= $token['data'];
2177 if ($this->pendingTableCharacters
!== '' && is_string($this->pendingTableCharacters
)) {
2178 /* If any of the tokens in the pending table character tokens list
2179 * are character tokens that are not one of U+0009 CHARACTER
2180 * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2181 * U+0020 SPACE, then reprocess those character tokens using the
2182 * rules given in the "anything else" entry in the in table"
2184 if ($this->pendingTableCharactersDirty
) {
2185 /* Parse error. Process the token using the rules for the
2186 * "in body" insertion mode, except that if the current
2187 * node is a table, tbody, tfoot, thead, or tr element,
2188 * then, whenever a node would be inserted into the current
2189 * node, it must instead be foster parented. */
2191 $old = $this->foster_parent
;
2192 $this->foster_parent
= true;
2193 $text_token = array(
2194 'type' => HTML5_Tokenizer
::CHARACTER
,
2195 'data' => $this->pendingTableCharacters
,
2197 $this->processWithRulesFor($text_token, self
::IN_BODY
);
2198 $this->foster_parent
= $old;
2200 /* Otherwise, insert the characters given by the pending table
2201 * character tokens list into the current node. */
2203 $this->insertText($this->pendingTableCharacters
);
2205 $this->pendingTableCharacters
= null;
2206 $this->pendingTableCharactersNull
= null;
2209 /* Switch the insertion mode to the original insertion mode and
2210 * reprocess the token.
2212 $this->mode
= $this->original_mode
;
2213 $this->emitToken($token);
2217 case self
::IN_CAPTION
:
2218 /* An end tag whose tag name is "caption" */
2219 if($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'caption') {
2220 /* If the stack of open elements does not have an element in table
2221 scope with the same tag name as the token, this is a parse error.
2222 Ignore the token. (fragment case) */
2223 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2224 $this->ignored
= true;
2229 /* Generate implied end tags. */
2230 $this->generateImpliedEndTags();
2232 /* Now, if the current node is not a caption element, then this
2233 is a parse error. */
2234 // XERROR: implement
2236 /* Pop elements from this stack until a caption element has
2237 been popped from the stack. */
2239 $node = array_pop($this->stack
);
2240 } while ($node->tagName
!== 'caption');
2242 /* Clear the list of active formatting elements up to the last
2244 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2246 /* Switch the insertion mode to "in table". */
2247 $this->mode
= self
::IN_TABLE
;
2250 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2251 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2253 } elseif(($token['type'] === HTML5_Tokenizer
::STARTTAG
&& in_array($token['name'],
2254 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2255 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2256 $token['name'] === 'table')) {
2257 /* Parse error. Act as if an end tag with the tag name "caption"
2258 had been seen, then, if that token wasn't ignored, reprocess the
2260 $this->emitToken(array(
2261 'name' => 'caption',
2262 'type' => HTML5_Tokenizer
::ENDTAG
2265 if (!$this->ignored
) $this->emitToken($token);
2267 /* An end tag whose tag name is one of: "body", "col", "colgroup",
2268 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2269 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& in_array($token['name'],
2270 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2272 // Parse error. Ignore the token.
2273 $this->ignored
= true;
2277 /* Process the token as if the insertion mode was "in body". */
2278 $this->processWithRulesFor($token, self
::IN_BODY
);
2282 case self
::IN_COLUMN_GROUP
:
2283 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2284 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2286 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
2287 /* Append the character to the current node. */
2288 $this->insertText($token['data']);
2290 /* A comment token */
2291 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
2292 /* Append a Comment node to the current node with the data
2293 attribute set to the data given in the comment token. */
2294 $this->insertToken($token['data']);
2296 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
2299 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
2300 $this->processWithRulesFor($token, self
::IN_BODY
);
2302 /* A start tag whose tag name is "col" */
2303 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'col') {
2304 /* Insert a col element for the token. Immediately pop the current
2305 node off the stack of open elements. */
2306 $this->insertElement($token);
2307 array_pop($this->stack
);
2308 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2310 /* An end tag whose tag name is "colgroup" */
2311 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2312 $token['name'] === 'colgroup') {
2313 /* If the current node is the root html element, then this is a
2314 parse error, ignore the token. (fragment case) */
2315 if(end($this->stack
)->tagName
=== 'html') {
2316 $this->ignored
= true;
2318 /* Otherwise, pop the current node (which will be a colgroup
2319 element) from the stack of open elements. Switch the insertion
2320 mode to "in table". */
2322 array_pop($this->stack
);
2323 $this->mode
= self
::IN_TABLE
;
2326 /* An end tag whose tag name is "col" */
2327 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'col') {
2328 /* Parse error. Ignore the token. */
2329 $this->ignored
= true;
2331 /* An end-of-file token */
2332 /* If the current node is the root html element */
2333 } elseif($token['type'] === HTML5_Tokenizer
::EOF
&& end($this->stack
)->tagName
=== 'html') {
2338 /* Act as if an end tag with the tag name "colgroup" had been seen,
2339 and then, if that token wasn't ignored, reprocess the current token. */
2340 $this->emitToken(array(
2341 'name' => 'colgroup',
2342 'type' => HTML5_Tokenizer
::ENDTAG
2345 if (!$this->ignored
) $this->emitToken($token);
2349 case self
::IN_TABLE_BODY
:
2350 $clear = array('tbody', 'tfoot', 'thead', 'html');
2352 /* A start tag whose tag name is "tr" */
2353 if($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'tr') {
2354 /* Clear the stack back to a table body context. */
2355 $this->clearStackToTableContext($clear);
2357 /* Insert a tr element for the token, then switch the insertion
2358 mode to "in row". */
2359 $this->insertElement($token);
2360 $this->mode
= self
::IN_ROW
;
2362 /* A start tag whose tag name is one of: "th", "td" */
2363 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2364 ($token['name'] === 'th' || $token['name'] === 'td')) {
2365 /* Parse error. Act as if a start tag with the tag name "tr" had
2366 been seen, then reprocess the current token. */
2367 $this->emitToken(array(
2369 'type' => HTML5_Tokenizer
::STARTTAG
,
2373 $this->emitToken($token);
2375 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2376 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2377 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2378 /* If the stack of open elements does not have an element in table
2379 scope with the same tag name as the token, this is a parse error.
2380 Ignore the token. */
2381 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2383 $this->ignored
= true;
2387 /* Clear the stack back to a table body context. */
2388 $this->clearStackToTableContext($clear);
2390 /* Pop the current node from the stack of open elements. Switch
2391 the insertion mode to "in table". */
2392 array_pop($this->stack
);
2393 $this->mode
= self
::IN_TABLE
;
2396 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2397 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2398 } elseif(($token['type'] === HTML5_Tokenizer
::STARTTAG
&& in_array($token['name'],
2399 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2400 ($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'table')) {
2401 /* If the stack of open elements does not have a tbody, thead, or
2402 tfoot element in table scope, this is a parse error. Ignore the
2403 token. (fragment case) */
2404 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self
::SCOPE_TABLE
)) {
2406 $this->ignored
= true;
2410 /* Clear the stack back to a table body context. */
2411 $this->clearStackToTableContext($clear);
2413 /* Act as if an end tag with the same tag name as the current
2414 node ("tbody", "tfoot", or "thead") had been seen, then
2415 reprocess the current token. */
2416 $this->emitToken(array(
2417 'name' => end($this->stack
)->tagName
,
2418 'type' => HTML5_Tokenizer
::ENDTAG
2421 $this->emitToken($token);
2424 /* An end tag whose tag name is one of: "body", "caption", "col",
2425 "colgroup", "html", "td", "th", "tr" */
2426 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& in_array($token['name'],
2427 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2428 /* Parse error. Ignore the token. */
2429 $this->ignored
= true;
2433 /* Process the token as if the insertion mode was "in table". */
2434 $this->processWithRulesFor($token, self
::IN_TABLE
);
2439 $clear = array('tr', 'html');
2441 /* A start tag whose tag name is one of: "th", "td" */
2442 if($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2443 ($token['name'] === 'th' || $token['name'] === 'td')) {
2444 /* Clear the stack back to a table row context. */
2445 $this->clearStackToTableContext($clear);
2447 /* Insert an HTML element for the token, then switch the insertion
2448 mode to "in cell". */
2449 $this->insertElement($token);
2450 $this->mode
= self
::IN_CELL
;
2452 /* Insert a marker at the end of the list of active formatting
2454 $this->a_formatting
[] = self
::MARKER
;
2456 /* An end tag whose tag name is "tr" */
2457 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'tr') {
2458 /* If the stack of open elements does not have an element in table
2459 scope with the same tag name as the token, this is a parse error.
2460 Ignore the token. (fragment case) */
2461 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2463 $this->ignored
= true;
2467 /* Clear the stack back to a table row context. */
2468 $this->clearStackToTableContext($clear);
2470 /* Pop the current node (which will be a tr element) from the
2471 stack of open elements. Switch the insertion mode to "in table
2473 array_pop($this->stack
);
2474 $this->mode
= self
::IN_TABLE_BODY
;
2477 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2478 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2479 } elseif(($token['type'] === HTML5_Tokenizer
::STARTTAG
&& in_array($token['name'],
2480 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2481 ($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'table')) {
2482 /* Act as if an end tag with the tag name "tr" had been seen, then,
2483 if that token wasn't ignored, reprocess the current token. */
2484 $this->emitToken(array(
2486 'type' => HTML5_Tokenizer
::ENDTAG
2488 if (!$this->ignored
) $this->emitToken($token);
2490 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2491 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2492 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2493 /* If the stack of open elements does not have an element in table
2494 scope with the same tag name as the token, this is a parse error.
2495 Ignore the token. */
2496 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2497 $this->ignored
= true;
2501 /* Otherwise, act as if an end tag with the tag name "tr" had
2502 been seen, then reprocess the current token. */
2503 $this->emitToken(array(
2505 'type' => HTML5_Tokenizer
::ENDTAG
2508 $this->emitToken($token);
2511 /* An end tag whose tag name is one of: "body", "caption", "col",
2512 "colgroup", "html", "td", "th" */
2513 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& in_array($token['name'],
2514 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2515 /* Parse error. Ignore the token. */
2516 $this->ignored
= true;
2520 /* Process the token as if the insertion mode was "in table". */
2521 $this->processWithRulesFor($token, self
::IN_TABLE
);
2526 /* An end tag whose tag name is one of: "td", "th" */
2527 if($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2528 ($token['name'] === 'td' || $token['name'] === 'th')) {
2529 /* If the stack of open elements does not have an element in table
2530 scope with the same tag name as that of the token, then this is a
2531 parse error and the token must be ignored. */
2532 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2533 $this->ignored
= true;
2537 /* Generate implied end tags, except for elements with the same
2538 tag name as the token. */
2539 $this->generateImpliedEndTags(array($token['name']));
2541 /* Now, if the current node is not an element with the same tag
2542 name as the token, then this is a parse error. */
2543 // XERROR: Implement parse error code
2545 /* Pop elements from this stack until an element with the same
2546 tag name as the token has been popped from the stack. */
2548 $node = array_pop($this->stack
);
2549 } while ($node->tagName
!== $token['name']);
2551 /* Clear the list of active formatting elements up to the last
2553 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2555 /* Switch the insertion mode to "in row". (The current node
2556 will be a tr element at this point.) */
2557 $this->mode
= self
::IN_ROW
;
2560 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2561 "tbody", "td", "tfoot", "th", "thead", "tr" */
2562 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& in_array($token['name'],
2563 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2565 /* If the stack of open elements does not have a td or th element
2566 in table scope, then this is a parse error; ignore the token.
2568 if(!$this->elementInScope(array('td', 'th'), self
::SCOPE_TABLE
)) {
2570 $this->ignored
= true;
2572 /* Otherwise, close the cell (see below) and reprocess the current
2576 $this->emitToken($token);
2579 /* An end tag whose tag name is one of: "body", "caption", "col",
2580 "colgroup", "html" */
2581 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& in_array($token['name'],
2582 array('body', 'caption', 'col', 'colgroup', 'html'))) {
2583 /* Parse error. Ignore the token. */
2584 $this->ignored
= true;
2586 /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2588 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& in_array($token['name'],
2589 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2590 /* If the stack of open elements does not have a td or th element
2591 in table scope, then this is a parse error; ignore the token.
2593 if(!$this->elementInScope(array('td', 'th'), self
::SCOPE_TABLE
)) {
2595 $this->ignored
= true;
2597 /* Otherwise, close the cell (see below) and reprocess the current
2601 $this->emitToken($token);
2606 /* Process the token as if the insertion mode was "in body". */
2607 $this->processWithRulesFor($token, self
::IN_BODY
);
2611 case self
::IN_SELECT
:
2612 /* Handle the token as follows: */
2614 /* A character token */
2616 $token['type'] === HTML5_Tokenizer
::CHARACTER
||
2617 $token['type'] === HTML5_Tokenizer
::SPACECHARACTER
2619 /* Append the token's character to the current node. */
2620 $this->insertText($token['data']);
2622 /* A comment token */
2623 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
2624 /* Append a Comment node to the current node with the data
2625 attribute set to the data given in the comment token. */
2626 $this->insertComment($token['data']);
2628 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
2631 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
2632 $this->processWithRulesFor($token, self
::INBODY
);
2634 /* A start tag token whose tag name is "option" */
2635 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2636 $token['name'] === 'option') {
2637 /* If the current node is an option element, act as if an end tag
2638 with the tag name "option" had been seen. */
2639 if(end($this->stack
)->tagName
=== 'option') {
2640 $this->emitToken(array(
2642 'type' => HTML5_Tokenizer
::ENDTAG
2646 /* Insert an HTML element for the token. */
2647 $this->insertElement($token);
2649 /* A start tag token whose tag name is "optgroup" */
2650 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2651 $token['name'] === 'optgroup') {
2652 /* If the current node is an option element, act as if an end tag
2653 with the tag name "option" had been seen. */
2654 if(end($this->stack
)->tagName
=== 'option') {
2655 $this->emitToken(array(
2657 'type' => HTML5_Tokenizer
::ENDTAG
2661 /* If the current node is an optgroup element, act as if an end tag
2662 with the tag name "optgroup" had been seen. */
2663 if(end($this->stack
)->tagName
=== 'optgroup') {
2664 $this->emitToken(array(
2665 'name' => 'optgroup',
2666 'type' => HTML5_Tokenizer
::ENDTAG
2670 /* Insert an HTML element for the token. */
2671 $this->insertElement($token);
2673 /* An end tag token whose tag name is "optgroup" */
2674 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2675 $token['name'] === 'optgroup') {
2676 /* First, if the current node is an option element, and the node
2677 immediately before it in the stack of open elements is an optgroup
2678 element, then act as if an end tag with the tag name "option" had
2680 $elements_in_stack = count($this->stack
);
2682 if($this->stack
[$elements_in_stack - 1]->tagName
=== 'option' &&
2683 $this->stack
[$elements_in_stack - 2]->tagName
=== 'optgroup') {
2684 $this->emitToken(array(
2686 'type' => HTML5_Tokenizer
::ENDTAG
2690 /* If the current node is an optgroup element, then pop that node
2691 from the stack of open elements. Otherwise, this is a parse error,
2692 ignore the token. */
2693 if(end($this->stack
)->tagName
=== 'optgroup') {
2694 array_pop($this->stack
);
2697 $this->ignored
= true;
2700 /* An end tag token whose tag name is "option" */
2701 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2702 $token['name'] === 'option') {
2703 /* If the current node is an option element, then pop that node
2704 from the stack of open elements. Otherwise, this is a parse error,
2705 ignore the token. */
2706 if(end($this->stack
)->tagName
=== 'option') {
2707 array_pop($this->stack
);
2710 $this->ignored
= true;
2713 /* An end tag whose tag name is "select" */
2714 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2715 $token['name'] === 'select') {
2716 /* If the stack of open elements does not have an element in table
2717 scope with the same tag name as the token, this is a parse error.
2718 Ignore the token. (fragment case) */
2719 if(!$this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2720 $this->ignored
= true;
2725 /* Pop elements from the stack of open elements until a select
2726 element has been popped from the stack. */
2728 $node = array_pop($this->stack
);
2729 } while ($node->tagName
!== 'select');
2731 /* Reset the insertion mode appropriately. */
2732 $this->resetInsertionMode();
2735 /* A start tag whose tag name is "select" */
2736 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'select') {
2737 /* Parse error. Act as if the token had been an end tag with the
2738 tag name "select" instead. */
2739 $this->emitToken(array(
2741 'type' => HTML5_Tokenizer
::ENDTAG
2744 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2745 ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) {
2747 $this->emitToken(array(
2749 'type' => HTML5_Tokenizer
::ENDTAG
2751 $this->emitToken($token);
2753 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'script') {
2754 $this->processWithRulesFor($token, self
::IN_HEAD
);
2756 } elseif($token['type'] === HTML5_Tokenizer
::EOF
) {
2757 // XERROR: If the current node is not the root html element, then this is a parse error.
2762 /* Parse error. Ignore the token. */
2763 $this->ignored
= true;
2767 case self
::IN_SELECT_IN_TABLE
:
2769 if($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2770 in_array($token['name'], array('caption', 'table', 'tbody',
2771 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2773 $this->emitToken(array(
2775 'type' => HTML5_Tokenizer
::ENDTAG
,
2777 $this->emitToken($token);
2779 /* An end tag whose tag name is one of: "caption", "table", "tbody",
2780 "tfoot", "thead", "tr", "td", "th" */
2781 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2782 in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2786 /* If the stack of open elements has an element in table scope with
2787 the same tag name as that of the token, then act as if an end tag
2788 with the tag name "select" had been seen, and reprocess the token.
2789 Otherwise, ignore the token. */
2790 if($this->elementInScope($token['name'], self
::SCOPE_TABLE
)) {
2791 $this->emitToken(array(
2793 'type' => HTML5_Tokenizer
::ENDTAG
2796 $this->emitToken($token);
2798 $this->ignored
= true;
2801 $this->processWithRulesFor($token, self
::IN_SELECT
);
2805 case self
::IN_FOREIGN_CONTENT
:
2806 if ($token['type'] === HTML5_Tokenizer
::CHARACTER
||
2807 $token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
2808 $this->insertText($token['data']);
2809 } elseif ($token['type'] === HTML5_Tokenizer
::COMMENT
) {
2810 $this->insertComment($token['data']);
2811 } elseif ($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
2812 // XERROR: parse error
2813 } elseif ($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
2814 $token['name'] === 'script' && end($this->stack
)->tagName
=== 'script' &&
2816 end($this->stack
)->namespaceURI
=== self
::NS_SVG
) {
2817 array_pop($this->stack
);
2818 // a bunch of script running mumbo jumbo
2820 ($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2822 $token['name'] !== 'mglyph' &&
2823 $token['name'] !== 'malignmark' &&
2825 end($this->stack
)->namespaceURI
=== self
::NS_MATHML
&&
2826 in_array(end($this->stack
)->tagName
, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2829 $token['name'] === 'svg' &&
2831 end($this->stack
)->namespaceURI
=== self
::NS_MATHML
&&
2832 end($this->stack
)->tagName
=== 'annotation-xml'
2836 end($this->stack
)->namespaceURI
=== self
::NS_SVG
&&
2837 in_array(end($this->stack
)->tagName
, array('foreignObject', 'desc', 'title'))
2841 end($this->stack
)->namespaceURI
=== self
::NS_HTML
2843 ) || $token['type'] === HTML5_Tokenizer
::ENDTAG
2845 $this->processWithRulesFor($token, $this->secondary_mode
);
2846 /* If, after doing so, the insertion mode is still "in foreign
2847 * content", but there is no element in scope that has a namespace
2848 * other than the HTML namespace, switch the insertion mode to the
2849 * secondary insertion mode. */
2850 if ($this->mode
=== self
::IN_FOREIGN_CONTENT
) {
2852 // this basically duplicates elementInScope()
2853 for ($i = count($this->stack
) - 1; $i >= 0; $i--) {
2855 $node = $this->stack
[$i];
2856 if ($node->namespaceURI
!== self
::NS_HTML
) {
2859 } elseif (in_array($node->tagName
, array('table', 'html',
2860 'applet', 'caption', 'td', 'th', 'button', 'marquee',
2861 'object')) || ($node->tagName
=== 'foreignObject' &&
2862 $node->namespaceURI
=== self
::NS_SVG
)) {
2867 $this->mode
= $this->secondary_mode
;
2870 } elseif ($token['type'] === HTML5_Tokenizer
::EOF
|| (
2871 $token['type'] === HTML5_Tokenizer
::STARTTAG
&&
2872 (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
2873 "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
2874 "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
2875 "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
2876 "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
2877 "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2878 $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2879 // XERROR: parse error
2881 $node = array_pop($this->stack
);
2883 } while ($node->namespaceURI
!== self
::NS_HTML
);
2884 $this->stack
[] = $node;
2885 $this->mode
= $this->secondary_mode
;
2886 $this->emitToken($token);
2887 } elseif ($token['type'] === HTML5_Tokenizer
::STARTTAG
) {
2888 static $svg_lookup = array(
2889 'altglyph' => 'altGlyph',
2890 'altglyphdef' => 'altGlyphDef',
2891 'altglyphitem' => 'altGlyphItem',
2892 'animatecolor' => 'animateColor',
2893 'animatemotion' => 'animateMotion',
2894 'animatetransform' => 'animateTransform',
2895 'clippath' => 'clipPath',
2896 'feblend' => 'feBlend',
2897 'fecolormatrix' => 'feColorMatrix',
2898 'fecomponenttransfer' => 'feComponentTransfer',
2899 'fecomposite' => 'feComposite',
2900 'feconvolvematrix' => 'feConvolveMatrix',
2901 'fediffuselighting' => 'feDiffuseLighting',
2902 'fedisplacementmap' => 'feDisplacementMap',
2903 'fedistantlight' => 'feDistantLight',
2904 'feflood' => 'feFlood',
2905 'fefunca' => 'feFuncA',
2906 'fefuncb' => 'feFuncB',
2907 'fefuncg' => 'feFuncG',
2908 'fefuncr' => 'feFuncR',
2909 'fegaussianblur' => 'feGaussianBlur',
2910 'feimage' => 'feImage',
2911 'femerge' => 'feMerge',
2912 'femergenode' => 'feMergeNode',
2913 'femorphology' => 'feMorphology',
2914 'feoffset' => 'feOffset',
2915 'fepointlight' => 'fePointLight',
2916 'fespecularlighting' => 'feSpecularLighting',
2917 'fespotlight' => 'feSpotLight',
2918 'fetile' => 'feTile',
2919 'feturbulence' => 'feTurbulence',
2920 'foreignobject' => 'foreignObject',
2921 'glyphref' => 'glyphRef',
2922 'lineargradient' => 'linearGradient',
2923 'radialgradient' => 'radialGradient',
2924 'textpath' => 'textPath',
2927 $current = end($this->stack
);
2928 if ($current->namespaceURI
=== self
::NS_MATHML
) {
2929 $token = $this->adjustMathMLAttributes($token);
2931 if ($current->namespaceURI
=== self
::NS_SVG
&&
2932 isset($svg_lookup[$token['name']])) {
2933 $token['name'] = $svg_lookup[$token['name']];
2935 if ($current->namespaceURI
=== self
::NS_SVG
) {
2936 $token = $this->adjustSVGAttributes($token);
2938 $token = $this->adjustForeignAttributes($token);
2939 $this->insertForeignElement($token, $current->namespaceURI
);
2940 if (isset($token['self-closing'])) {
2941 array_pop($this->stack
);
2942 // XERROR: acknowledge self-closing flag
2947 case self
::AFTER_BODY
:
2948 /* Handle the token as follows: */
2950 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2951 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2953 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
2954 /* Process the token as it would be processed if the insertion mode
2956 $this->processWithRulesFor($token, self
::IN_BODY
);
2958 /* A comment token */
2959 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
2960 /* Append a Comment node to the first element in the stack of open
2961 elements (the html element), with the data attribute set to the
2962 data given in the comment token. */
2964 $comment = $this->dom
->createComment($token['data']);
2965 $this->stack
[0]->appendChild($comment);
2967 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
2970 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
2971 $this->processWithRulesFor($token, self
::IN_BODY
);
2973 /* An end tag with the tag name "html" */
2974 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&& $token['name'] === 'html') {
2975 /* If the parser was originally created as part of the HTML
2976 * fragment parsing algorithm, this is a parse error; ignore
2977 * the token. (fragment case) */
2978 $this->ignored
= true;
2979 // XERROR: implement this
2981 $this->mode
= self
::AFTER_AFTER_BODY
;
2983 } elseif($token['type'] === HTML5_Tokenizer
::EOF
) {
2988 /* Parse error. Set the insertion mode to "in body" and reprocess
2990 $this->mode
= self
::IN_BODY
;
2991 $this->emitToken($token);
2995 case self
::IN_FRAMESET
:
2996 /* Handle the token as follows: */
2998 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2999 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3000 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3001 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
3002 /* Append the character to the current node. */
3003 $this->insertText($token['data']);
3005 /* A comment token */
3006 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
3007 /* Append a Comment node to the current node with the data
3008 attribute set to the data given in the comment token. */
3009 $this->insertComment($token['data']);
3011 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
3014 /* A start tag with the tag name "frameset" */
3015 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
3016 $token['name'] === 'frameset') {
3017 $this->insertElement($token);
3019 /* An end tag with the tag name "frameset" */
3020 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
3021 $token['name'] === 'frameset') {
3022 /* If the current node is the root html element, then this is a
3023 parse error; ignore the token. (fragment case) */
3024 if(end($this->stack
)->tagName
=== 'html') {
3025 $this->ignored
= true;
3029 /* Otherwise, pop the current node from the stack of open
3031 array_pop($this->stack
);
3033 /* If the parser was not originally created as part of the HTML
3034 * fragment parsing algorithm (fragment case), and the current
3035 * node is no longer a frameset element, then switch the
3036 * insertion mode to "after frameset". */
3037 $this->mode
= self
::AFTER_FRAMESET
;
3040 /* A start tag with the tag name "frame" */
3041 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
3042 $token['name'] === 'frame') {
3043 /* Insert an HTML element for the token. */
3044 $this->insertElement($token);
3046 /* Immediately pop the current node off the stack of open elements. */
3047 array_pop($this->stack
);
3049 // XERROR: Acknowledge the token's self-closing flag, if it is set.
3051 /* A start tag with the tag name "noframes" */
3052 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
3053 $token['name'] === 'noframes') {
3054 /* Process the token using the rules for the "in head" insertion mode. */
3055 $this->processwithRulesFor($token, self
::IN_HEAD
);
3057 } elseif($token['type'] === HTML5_Tokenizer
::EOF
) {
3058 // XERROR: If the current node is not the root html element, then this is a parse error.
3062 /* Parse error. Ignore the token. */
3063 $this->ignored
= true;
3067 case self
::AFTER_FRAMESET
:
3068 /* Handle the token as follows: */
3070 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3071 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3072 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3073 if($token['type'] === HTML5_Tokenizer
::SPACECHARACTER
) {
3074 /* Append the character to the current node. */
3075 $this->insertText($token['data']);
3077 /* A comment token */
3078 } elseif($token['type'] === HTML5_Tokenizer
::COMMENT
) {
3079 /* Append a Comment node to the current node with the data
3080 attribute set to the data given in the comment token. */
3081 $this->insertComment($token['data']);
3083 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
) {
3086 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html') {
3087 $this->processWithRulesFor($token, self
::IN_BODY
);
3089 /* An end tag with the tag name "html" */
3090 } elseif($token['type'] === HTML5_Tokenizer
::ENDTAG
&&
3091 $token['name'] === 'html') {
3092 $this->mode
= self
::AFTER_AFTER_FRAMESET
;
3094 /* A start tag with the tag name "noframes" */
3095 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&&
3096 $token['name'] === 'noframes') {
3097 $this->processWithRulesFor($token, self
::IN_HEAD
);
3099 } elseif($token['type'] === HTML5_Tokenizer
::EOF
) {
3104 /* Parse error. Ignore the token. */
3105 $this->ignored
= true;
3109 case self
::AFTER_AFTER_BODY
:
3110 /* A comment token */
3111 if($token['type'] === HTML5_Tokenizer
::COMMENT
) {
3112 /* Append a Comment node to the Document object with the data
3113 attribute set to the data given in the comment token. */
3115 $comment = $this->dom
->createComment($token['data']);
3116 $this->dom
->appendChild($comment);
3118 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
||
3119 $token['type'] === HTML5_Tokenizer
::SPACECHARACTER
||
3120 ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html')) {
3121 $this->processWithRulesFor($token, self
::IN_BODY
);
3123 /* An end-of-file token */
3124 } elseif($token['type'] === HTML5_Tokenizer
::EOF
) {
3128 $this->mode
= self
::IN_BODY
;
3129 $this->emitToken($token);
3133 case self
::AFTER_AFTER_FRAMESET
:
3134 /* A comment token */
3135 if($token['type'] === HTML5_Tokenizer
::COMMENT
) {
3136 /* Append a Comment node to the Document object with the data
3137 attribute set to the data given in the comment token. */
3139 $comment = $this->dom
->createComment($token['data']);
3140 $this->dom
->appendChild($comment);
3142 } elseif($token['type'] === HTML5_Tokenizer
::DOCTYPE
||
3143 $token['type'] === HTML5_Tokenizer
::SPACECHARACTER
||
3144 ($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'html')) {
3145 $this->processWithRulesFor($token, self
::IN_BODY
);
3147 /* An end-of-file token */
3148 } elseif($token['type'] === HTML5_Tokenizer
::EOF
) {
3150 } elseif($token['type'] === HTML5_Tokenizer
::STARTTAG
&& $token['name'] === 'nofrmaes') {
3151 $this->processWithRulesFor($token, self
::IN_HEAD
);
3157 // end funky indenting
3160 private function insertElement($token, $append = true) {
3161 //$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3162 $namespaceURI = strpos($token['name'], ':') ? self
::NS_XHTML
: self
::NS_HTML
;
3163 $el = $this->dom
->createElementNS($namespaceURI, $token['name']);
3165 if (!empty($token['attr'])) {
3166 foreach($token['attr'] as $attr) {
3168 // mike@macgirvin.com 2011-11-17, check attribute name for
3169 // validity (ignoring extenders and combiners) as illegal chars in names
3170 // causes everything to abort
3172 $valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
3173 if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
3174 $el->setAttribute($attr['name'], $attr['value']);
3179 $this->appendToRealParent($el);
3180 $this->stack
[] = $el;
3186 private function insertText($data) {
3187 if ($data === '') return;
3188 if ($this->ignore_lf_token
) {
3189 if ($data[0] === "\n") {
3190 $data = substr($data, 1);
3191 if ($data === false) return;
3194 $text = $this->dom
->createTextNode($data);
3195 $this->appendToRealParent($text);
3198 private function insertComment($data) {
3199 $comment = $this->dom
->createComment($data);
3200 $this->appendToRealParent($comment);
3203 private function appendToRealParent($node) {
3204 // this is only for the foster_parent case
3205 /* If the current node is a table, tbody, tfoot, thead, or tr
3206 element, then, whenever a node would be inserted into the current
3207 node, it must instead be inserted into the foster parent element. */
3208 if(!$this->foster_parent
|| !in_array(end($this->stack
)->tagName
,
3209 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3210 end($this->stack
)->appendChild($node);
3212 $this->fosterParent($node);
3216 private function elementInScope($el, $scope = self
::SCOPE
) {
3218 foreach($el as $element) {
3219 if($this->elementInScope($element, $scope)) {
3227 $leng = count($this->stack
);
3229 for($n = 0; $n < $leng; $n++
) {
3230 /* 1. Initialise node to be the current node (the bottommost node of
3232 $node = $this->stack
[$leng - 1 - $n];
3234 if($node->tagName
=== $el) {
3235 /* 2. If node is the target node, terminate in a match state. */
3238 // We've expanded the logic for these states a little differently;
3239 // Hixie's refactoring into "specific scope" is more general, but
3240 // this "gets the job done"
3242 // these are the common states for all scopes
3243 } elseif($node->tagName
=== 'table' || $node->tagName
=== 'html') {
3246 // these are valid for "in scope" and "in list item scope"
3247 } elseif($scope !== self
::SCOPE_TABLE
&&
3248 (in_array($node->tagName
, array('applet', 'caption', 'td',
3249 'th', 'button', 'marquee', 'object')) ||
3250 $node->tagName
=== 'foreignObject' && $node->namespaceURI
=== self
::NS_SVG
)) {
3254 // these are valid for "in list item scope"
3255 } elseif($scope === self
::SCOPE_LISTITEM
&& in_array($node->tagName
, array('ol', 'ul'))) {
3259 /* Otherwise, set node to the previous entry in the stack of open
3260 elements and return to step 2. (This will never fail, since the loop
3261 will always terminate in the previous step if the top of the stack
3266 private function reconstructActiveFormattingElements() {
3267 /* 1. If there are no entries in the list of active formatting elements,
3268 then there is nothing to reconstruct; stop this algorithm. */
3269 $formatting_elements = count($this->a_formatting
);
3271 if($formatting_elements === 0) {
3275 /* 3. Let entry be the last (most recently added) element in the list
3276 of active formatting elements. */
3277 $entry = end($this->a_formatting
);
3279 /* 2. If the last (most recently added) entry in the list of active
3280 formatting elements is a marker, or if it is an element that is in the
3281 stack of open elements, then there is nothing to reconstruct; stop this
3283 if($entry === self
::MARKER
|| in_array($entry, $this->stack
, true)) {
3287 for($a = $formatting_elements - 1; $a >= 0; true) {
3288 /* 4. If there are no entries before entry in the list of active
3289 formatting elements, then jump to step 8. */
3291 $step_seven = false;
3295 /* 5. Let entry be the entry one earlier than entry in the list of
3296 active formatting elements. */
3298 $entry = $this->a_formatting
[$a];
3300 /* 6. If entry is neither a marker nor an element that is also in
3301 thetack of open elements, go to step 4. */
3302 if($entry === self
::MARKER
|| in_array($entry, $this->stack
, true)) {
3308 /* 7. Let entry be the element one later than entry in the list of
3309 active formatting elements. */
3310 if(isset($step_seven) && $step_seven === true) {
3312 $entry = $this->a_formatting
[$a];
3315 /* 8. Perform a shallow clone of the element entry to obtain clone. */
3316 $clone = $entry->cloneNode();
3318 /* 9. Append clone to the current node and push it onto the stack
3319 of open elements so that it is the new current node. */
3320 $this->appendToRealParent($clone);
3321 $this->stack
[] = $clone;
3323 /* 10. Replace the entry for entry in the list with an entry for
3325 $this->a_formatting
[$a] = $clone;
3327 /* 11. If the entry for clone in the list of active formatting
3328 elements is not the last entry in the list, return to step 7. */
3329 if(end($this->a_formatting
) !== $clone) {
3337 private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3338 /* When the steps below require the UA to clear the list of active
3339 formatting elements up to the last marker, the UA must perform the
3343 /* 1. Let entry be the last (most recently added) entry in the list
3344 of active formatting elements. */
3345 $entry = end($this->a_formatting
);
3347 /* 2. Remove entry from the list of active formatting elements. */
3348 array_pop($this->a_formatting
);
3350 /* 3. If entry was a marker, then stop the algorithm at this point.
3351 The list has been cleared up to the last marker. */
3352 if($entry === self
::MARKER
) {
3358 private function generateImpliedEndTags($exclude = array()) {
3359 /* When the steps below require the UA to generate implied end tags,
3360 * then, while the current node is a dc element, a dd element, a ds
3361 * element, a dt element, an li element, an option element, an optgroup
3362 * element, a p element, an rp element, or an rt element, the UA must
3363 * pop the current node off the stack of open elements. */
3364 $node = end($this->stack
);
3365 $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3367 while(in_array(end($this->stack
)->tagName
, $elements)) {
3368 array_pop($this->stack
);
3372 private function getElementCategory($node) {
3373 if (!is_object($node)) debug_print_backtrace();
3374 $name = $node->tagName
;
3375 if(in_array($name, $this->special
))
3376 return self
::SPECIAL
;
3378 elseif(in_array($name, $this->scoping
))
3379 return self
::SCOPING
;
3381 elseif(in_array($name, $this->formatting
))
3382 return self
::FORMATTING
;
3385 return self
::PHRASING
;
3388 private function clearStackToTableContext($elements) {
3389 /* When the steps above require the UA to clear the stack back to a
3390 table context, it means that the UA must, while the current node is not
3391 a table element or an html element, pop elements from the stack of open
3394 $name = end($this->stack
)->tagName
;
3396 if(in_array($name, $elements)) {
3399 array_pop($this->stack
);
3404 private function resetInsertionMode($context = null) {
3405 /* 1. Let last be false. */
3407 $leng = count($this->stack
);
3409 for($n = $leng - 1; $n >= 0; $n--) {
3410 /* 2. Let node be the last node in the stack of open elements. */
3411 $node = $this->stack
[$n];
3413 /* 3. If node is the first node in the stack of open elements, then
3414 * set last to true and set node to the context element. (fragment
3416 if($this->stack
[0]->isSameNode($node)) {
3421 /* 4. If node is a select element, then switch the insertion mode to
3422 "in select" and abort these steps. (fragment case) */
3423 if($node->tagName
=== 'select') {
3424 $this->mode
= self
::IN_SELECT
;
3427 /* 5. If node is a td or th element, then switch the insertion mode
3428 to "in cell" and abort these steps. */
3429 } elseif($node->tagName
=== 'td' || $node->nodeName
=== 'th') {
3430 $this->mode
= self
::IN_CELL
;
3433 /* 6. If node is a tr element, then switch the insertion mode to
3434 "in row" and abort these steps. */
3435 } elseif($node->tagName
=== 'tr') {
3436 $this->mode
= self
::IN_ROW
;
3439 /* 7. If node is a tbody, thead, or tfoot element, then switch the
3440 insertion mode to "in table body" and abort these steps. */
3441 } elseif(in_array($node->tagName
, array('tbody', 'thead', 'tfoot'))) {
3442 $this->mode
= self
::IN_TABLE_BODY
;
3445 /* 8. If node is a caption element, then switch the insertion mode
3446 to "in caption" and abort these steps. */
3447 } elseif($node->tagName
=== 'caption') {
3448 $this->mode
= self
::IN_CAPTION
;
3451 /* 9. If node is a colgroup element, then switch the insertion mode
3452 to "in column group" and abort these steps. (innerHTML case) */
3453 } elseif($node->tagName
=== 'colgroup') {
3454 $this->mode
= self
::IN_COLUMN_GROUP
;
3457 /* 10. If node is a table element, then switch the insertion mode
3458 to "in table" and abort these steps. */
3459 } elseif($node->tagName
=== 'table') {
3460 $this->mode
= self
::IN_TABLE
;
3463 /* 11. If node is an element from the MathML namespace or the SVG
3464 * namespace, then switch the insertion mode to "in foreign
3465 * content", let the secondary insertion mode be "in body", and
3466 * abort these steps. */
3467 } elseif($node->namespaceURI
=== self
::NS_SVG
||
3468 $node->namespaceURI
=== self
::NS_MATHML
) {
3469 $this->mode
= self
::IN_FOREIGN_CONTENT
;
3470 $this->secondary_mode
= self
::IN_BODY
;
3473 /* 12. If node is a head element, then switch the insertion mode
3474 to "in body" ("in body"! not "in head"!) and abort these steps.
3476 } elseif($node->tagName
=== 'head') {
3477 $this->mode
= self
::IN_BODY
;
3480 /* 13. If node is a body element, then switch the insertion mode to
3481 "in body" and abort these steps. */
3482 } elseif($node->tagName
=== 'body') {
3483 $this->mode
= self
::IN_BODY
;
3486 /* 14. If node is a frameset element, then switch the insertion
3487 mode to "in frameset" and abort these steps. (fragment case) */
3488 } elseif($node->tagName
=== 'frameset') {
3489 $this->mode
= self
::IN_FRAMESET
;
3492 /* 15. If node is an html element, then: if the head element
3493 pointer is null, switch the insertion mode to "before head",
3494 otherwise, switch the insertion mode to "after head". In either
3495 case, abort these steps. (fragment case) */
3496 } elseif($node->tagName
=== 'html') {
3497 $this->mode
= ($this->head_pointer
=== null)
3503 /* 16. If last is true, then set the insertion mode to "in body"
3504 and abort these steps. (fragment case) */
3506 $this->mode
= self
::IN_BODY
;
3512 private function closeCell() {
3513 /* If the stack of open elements has a td or th element in table scope,
3514 then act as if an end tag token with that tag name had been seen. */
3515 foreach(array('td', 'th') as $cell) {
3516 if($this->elementInScope($cell, self
::SCOPE_TABLE
)) {
3517 $this->emitToken(array(
3519 'type' => HTML5_Tokenizer
::ENDTAG
3527 private function processWithRulesFor($token, $mode) {
3528 /* "using the rules for the m insertion mode", where m is one of these
3529 * modes, the user agent must use the rules described under the m
3530 * insertion mode's section, but must leave the insertion mode
3531 * unchanged unless the rules in m themselves switch the insertion mode
3532 * to a new value. */
3533 return $this->emitToken($token, $mode);
3536 private function insertCDATAElement($token) {
3537 $this->insertElement($token);
3538 $this->original_mode
= $this->mode
;
3539 $this->mode
= self
::IN_CDATA_RCDATA
;
3540 $this->content_model
= HTML5_Tokenizer
::CDATA
;
3543 private function insertRCDATAElement($token) {
3544 $this->insertElement($token);
3545 $this->original_mode
= $this->mode
;
3546 $this->mode
= self
::IN_CDATA_RCDATA
;
3547 $this->content_model
= HTML5_Tokenizer
::RCDATA
;
3550 private function getAttr($token, $key) {
3551 if (!isset($token['attr'])) return false;
3553 foreach ($token['attr'] as $keypair) {
3554 if ($keypair['name'] === $key) $ret = $keypair['value'];
3559 private function getCurrentTable() {
3560 /* The current table is the last table element in the stack of open
3561 * elements, if there is one. If there is no table element in the stack
3562 * of open elements (fragment case), then the current table is the
3563 * first element in the stack of open elements (the html element). */
3564 for ($i = count($this->stack
) - 1; $i >= 0; $i--) {
3565 if ($this->stack
[$i]->tagName
=== 'table') {
3566 return $this->stack
[$i];
3569 return $this->stack
[0];
3572 private function getFosterParent() {
3573 /* The foster parent element is the parent element of the last
3574 table element in the stack of open elements, if there is a
3575 table element and it has such a parent element. If there is no
3576 table element in the stack of open elements (innerHTML case),
3577 then the foster parent element is the first element in the
3578 stack of open elements (the html element). Otherwise, if there
3579 is a table element in the stack of open elements, but the last
3580 table element in the stack of open elements has no parent, or
3581 its parent node is not an element, then the foster parent
3582 element is the element before the last table element in the
3583 stack of open elements. */
3584 for($n = count($this->stack
) - 1; $n >= 0; $n--) {
3585 if($this->stack
[$n]->tagName
=== 'table') {
3586 $table = $this->stack
[$n];
3591 if(isset($table) && $table->parentNode
!== null) {
3592 return $table->parentNode
;
3594 } elseif(!isset($table)) {
3595 return $this->stack
[0];
3597 } elseif(isset($table) && ($table->parentNode
=== null ||
3598 $table->parentNode
->nodeType
!== XML_ELEMENT_NODE
)) {
3599 return $this->stack
[$n - 1];
3603 public function fosterParent($node) {
3604 $foster_parent = $this->getFosterParent();
3605 $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3606 /* When a node node is to be foster parented, the node node must be
3607 * be inserted into the foster parent element. */
3608 /* If the foster parent element is the parent element of the last table
3609 * element in the stack of open elements, then node must be inserted
3610 * immediately before the last table element in the stack of open
3611 * elements in the foster parent element; otherwise, node must be
3612 * appended to the foster parent element. */
3613 if ($table->tagName
=== 'table' && $table->parentNode
->isSameNode($foster_parent)) {
3614 $foster_parent->insertBefore($node, $table);
3616 $foster_parent->appendChild($node);
3621 * For debugging, prints the stack
3623 private function printStack() {
3625 foreach ($this->stack
as $i => $element) {
3626 $names[] = $element->tagName
;
3628 echo " -> stack [" . implode(', ', $names) . "]\n";
3632 * For debugging, prints active formatting elements
3634 private function printActiveFormattingElements() {
3635 if (!$this->a_formatting
) return;
3637 foreach ($this->a_formatting
as $node) {
3638 if ($node === self
::MARKER
) $names[] = 'MARKER';
3639 else $names[] = $node->tagName
;
3641 echo " -> active formatting [" . implode(', ', $names) . "]\n";
3644 public function currentTableIsTainted() {
3645 return !empty($this->getCurrentTable()->tainted
);
3649 * Sets up the tree constructor for building a fragment.
3651 public function setupContext($context = null) {
3652 $this->fragment
= true;
3654 $context = $this->dom
->createElementNS(self
::NS_HTML
, $context);
3655 /* 4.1. Set the HTML parser's tokenization stage's content model
3656 * flag according to the context element, as follows: */
3657 switch ($context->tagName
) {
3658 case 'title': case 'textarea':
3659 $this->content_model
= HTML5_Tokenizer
::RCDATA
;
3661 case 'style': case 'script': case 'xmp': case 'iframe':
3662 case 'noembed': case 'noframes':
3663 $this->content_model
= HTML5_Tokenizer
::CDATA
;
3666 // XSCRIPT: assuming scripting is enabled
3667 $this->content_model
= HTML5_Tokenizer
::CDATA
;
3670 $this->content_model
= HTML5_Tokenizer
::PLAINTEXT
;
3673 /* 4.2. Let root be a new html element with no attributes. */
3674 $root = $this->dom
->createElementNS(self
::NS_HTML
, 'html');
3675 $this->root
= $root;
3676 /* 4.3 Append the element root to the Document node created above. */
3677 $this->dom
->appendChild($root);
3678 /* 4.4 Set up the parser's stack of open elements so that it
3679 * contains just the single element root. */
3680 $this->stack
= array($root);
3681 /* 4.5 Reset the parser's insertion mode appropriately. */
3682 $this->resetInsertionMode($context);
3683 /* 4.6 Set the parser's form element pointer to the nearest node
3684 * to the context element that is a form element (going straight up
3685 * the ancestor chain, and including the element itself, if it is a
3686 * form element), or, if there is no such form element, to null. */
3689 if ($node->tagName
=== 'form') {
3690 $this->form_pointer
= $node;
3693 } while ($node = $node->parentNode
);
3697 public function adjustMathMLAttributes($token) {
3698 foreach ($token['attr'] as &$kp) {
3699 if ($kp['name'] === 'definitionurl') {
3700 $kp['name'] = 'definitionURL';
3706 public function adjustSVGAttributes($token) {
3707 static $lookup = array(
3708 'attributename' => 'attributeName',
3709 'attributetype' => 'attributeType',
3710 'basefrequency' => 'baseFrequency',
3711 'baseprofile' => 'baseProfile',
3712 'calcmode' => 'calcMode',
3713 'clippathunits' => 'clipPathUnits',
3714 'contentscripttype' => 'contentScriptType',
3715 'contentstyletype' => 'contentStyleType',
3716 'diffuseconstant' => 'diffuseConstant',
3717 'edgemode' => 'edgeMode',
3718 'externalresourcesrequired' => 'externalResourcesRequired',
3719 'filterres' => 'filterRes',
3720 'filterunits' => 'filterUnits',
3721 'glyphref' => 'glyphRef',
3722 'gradienttransform' => 'gradientTransform',
3723 'gradientunits' => 'gradientUnits',
3724 'kernelmatrix' => 'kernelMatrix',
3725 'kernelunitlength' => 'kernelUnitLength',
3726 'keypoints' => 'keyPoints',
3727 'keysplines' => 'keySplines',
3728 'keytimes' => 'keyTimes',
3729 'lengthadjust' => 'lengthAdjust',
3730 'limitingconeangle' => 'limitingConeAngle',
3731 'markerheight' => 'markerHeight',
3732 'markerunits' => 'markerUnits',
3733 'markerwidth' => 'markerWidth',
3734 'maskcontentunits' => 'maskContentUnits',
3735 'maskunits' => 'maskUnits',
3736 'numoctaves' => 'numOctaves',
3737 'pathlength' => 'pathLength',
3738 'patterncontentunits' => 'patternContentUnits',
3739 'patterntransform' => 'patternTransform',
3740 'patternunits' => 'patternUnits',
3741 'pointsatx' => 'pointsAtX',
3742 'pointsaty' => 'pointsAtY',
3743 'pointsatz' => 'pointsAtZ',
3744 'preservealpha' => 'preserveAlpha',
3745 'preserveaspectratio' => 'preserveAspectRatio',
3746 'primitiveunits' => 'primitiveUnits',
3749 'repeatcount' => 'repeatCount',
3750 'repeatdur' => 'repeatDur',
3751 'requiredextensions' => 'requiredExtensions',
3752 'requiredfeatures' => 'requiredFeatures',
3753 'specularconstant' => 'specularConstant',
3754 'specularexponent' => 'specularExponent',
3755 'spreadmethod' => 'spreadMethod',
3756 'startoffset' => 'startOffset',
3757 'stddeviation' => 'stdDeviation',
3758 'stitchtiles' => 'stitchTiles',
3759 'surfacescale' => 'surfaceScale',
3760 'systemlanguage' => 'systemLanguage',
3761 'tablevalues' => 'tableValues',
3762 'targetx' => 'targetX',
3763 'targety' => 'targetY',
3764 'textlength' => 'textLength',
3765 'viewbox' => 'viewBox',
3766 'viewtarget' => 'viewTarget',
3767 'xchannelselector' => 'xChannelSelector',
3768 'ychannelselector' => 'yChannelSelector',
3769 'zoomandpan' => 'zoomAndPan',
3771 foreach ($token['attr'] as &$kp) {
3772 if (isset($lookup[$kp['name']])) {
3773 $kp['name'] = $lookup[$kp['name']];
3779 public function adjustForeignAttributes($token) {
3780 static $lookup = array(
3781 'xlink:actuate' => array('xlink', 'actuate', self
::NS_XLINK
),
3782 'xlink:arcrole' => array('xlink', 'arcrole', self
::NS_XLINK
),
3783 'xlink:href' => array('xlink', 'href', self
::NS_XLINK
),
3784 'xlink:role' => array('xlink', 'role', self
::NS_XLINK
),
3785 'xlink:show' => array('xlink', 'show', self
::NS_XLINK
),
3786 'xlink:title' => array('xlink', 'title', self
::NS_XLINK
),
3787 'xlink:type' => array('xlink', 'type', self
::NS_XLINK
),
3788 'xml:base' => array('xml', 'base', self
::NS_XML
),
3789 'xml:lang' => array('xml', 'lang', self
::NS_XML
),
3790 'xml:space' => array('xml', 'space', self
::NS_XML
),
3791 'xmlns' => array(null, 'xmlns', self
::NS_XMLNS
),
3792 'xmlns:xlink' => array('xmlns', 'xlink', self
::NS_XMLNS
),
3794 foreach ($token['attr'] as &$kp) {
3795 if (isset($lookup[$kp['name']])) {
3796 $kp['name'] = $lookup[$kp['name']];
3802 public function insertForeignElement($token, $namespaceURI) {
3803 $el = $this->dom
->createElementNS($namespaceURI, $token['name']);
3804 if (!empty($token['attr'])) {
3805 foreach ($token['attr'] as $kp) {
3806 $attr = $kp['name'];
3807 if (is_array($attr)) {
3811 $ns = self
::NS_HTML
;
3813 if (!$el->hasAttributeNS($ns, $attr)) {
3814 // XSKETCHY: work around godawful libxml bug
3815 if ($ns === self
::NS_XLINK
) {
3816 $el->setAttribute('xlink:'.$attr, $kp['value']);
3817 } elseif ($ns === self
::NS_HTML
) {
3818 // Another godawful libxml bug
3819 $el->setAttribute($attr, $kp['value']);
3821 $el->setAttributeNS($ns, $attr, $kp['value']);
3826 $this->appendToRealParent($el);
3827 $this->stack
[] = $el;
3828 // XERROR: see below
3829 /* If the newly created element has an xmlns attribute in the XMLNS
3830 * namespace whose value is not exactly the same as the element's
3831 * namespace, that is a parse error. Similarly, if the newly created
3832 * element has an xmlns:xlink attribute in the XMLNS namespace whose
3833 * value is not the XLink Namespace, that is a parse error. */
3836 public function save() {
3837 $this->dom
->normalize();
3838 if (!$this->fragment
) {
3842 return $this->root
->childNodes
;
3844 return $this->dom
->childNodes
;