]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/libraries/html5/TreeBuilder.php
htmlawed via composer
[github/wallabag/wallabag.git] / inc / 3rdparty / libraries / html5 / TreeBuilder.php
CommitLineData
42c80841
NL
1<?php
2
3/*
4
5Copyright 2007 Jeroen van der Meer <http://jero.net/>
6Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
7
8Permission is hereby granted, free of charge, to any person obtaining a
9copy of this software and associated documentation files (the
10"Software"), to deal in the Software without restriction, including
11without limitation the rights to use, copy, modify, merge, publish,
12distribute, sublicense, and/or sell copies of the Software, and to
13permit persons to whom the Software is furnished to do so, subject to
14the following conditions:
15
16The above copyright notice and this permission notice shall be included
17in all copies or substantial portions of the Software.
18
19THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27*/
28
29// Tags for FIX ME!!!: (in order of priority)
30// XXX - should be fixed NAO!
31// XERROR - with regards to parse errors
32// XSCRIPT - with regards to scripting mode
33// XENCODING - with regards to encoding (for reparsing tests)
34// XDOM - DOM specific code (tagName is explicitly not marked).
35// this is not (yet) in helper functions.
36
37class HTML5_TreeBuilder {
38 public $stack = array();
39 public $content_model;
40
41 private $mode;
42 private $original_mode;
43 private $secondary_mode;
44 private $dom;
45 // Whether or not normal insertion of nodes should actually foster
46 // parent (used in one case in spec)
47 private $foster_parent = false;
48 private $a_formatting = array();
49
50 private $head_pointer = null;
51 private $form_pointer = null;
52
53 private $flag_frameset_ok = true;
54 private $flag_force_quirks = false;
55 private $ignored = false;
56 private $quirks_mode = null;
57 // this gets to 2 when we want to ignore the next lf character, and
58 // is decrement at the beginning of each processed token (this way,
59 // code can check for (bool)$ignore_lf_token, but it phases out
60 // appropriately)
61 private $ignore_lf_token = 0;
62 private $fragment = false;
63 private $root;
64
65 private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
66 private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
67 // dl and ds are speculative
68 private $special = array('address','area','article','aside','base','basefont','bgsound',
69 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
70 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
71 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
72 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
73 'p','param','plaintext','pre','script','select','spacer','style',
74 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
75
76 private $pendingTableCharacters;
77 private $pendingTableCharactersDirty;
78
79 // Tree construction modes
80 const INITIAL = 0;
81 const BEFORE_HTML = 1;
82 const BEFORE_HEAD = 2;
83 const IN_HEAD = 3;
84 const IN_HEAD_NOSCRIPT = 4;
85 const AFTER_HEAD = 5;
86 const IN_BODY = 6;
87 const IN_CDATA_RCDATA = 7;
88 const IN_TABLE = 8;
89 const IN_TABLE_TEXT = 9;
90 const IN_CAPTION = 10;
91 const IN_COLUMN_GROUP = 11;
92 const IN_TABLE_BODY = 12;
93 const IN_ROW = 13;
94 const IN_CELL = 14;
95 const IN_SELECT = 15;
96 const IN_SELECT_IN_TABLE= 16;
97 const IN_FOREIGN_CONTENT= 17;
98 const AFTER_BODY = 18;
99 const IN_FRAMESET = 19;
100 const AFTER_FRAMESET = 20;
101 const AFTER_AFTER_BODY = 21;
102 const AFTER_AFTER_FRAMESET = 22;
103
104 /**
105 * Converts a magic number to a readable name. Use for debugging.
106 */
107 private function strConst($number) {
108 static $lookup;
109 if (!$lookup) {
110 $lookup = array();
111 $r = new ReflectionClass('HTML5_TreeBuilder');
112 $consts = $r->getConstants();
113 foreach ($consts as $const => $num) {
114 if (!is_int($num)) continue;
115 $lookup[$num] = $const;
116 }
117 }
118 return $lookup[$number];
119 }
120
121 // The different types of elements.
122 const SPECIAL = 100;
123 const SCOPING = 101;
124 const FORMATTING = 102;
125 const PHRASING = 103;
126
127 // Quirks modes in $quirks_mode
128 const NO_QUIRKS = 200;
129 const QUIRKS_MODE = 201;
130 const LIMITED_QUIRKS_MODE = 202;
131
132 // Marker to be placed in $a_formatting
133 const MARKER = 300;
134
135 // Namespaces for foreign content
136 const NS_HTML = null; // to prevent DOM from requiring NS on everything
3ec62cf9 137 const NS_XHTML = 'http://www.w3.org/1999/xhtml';
42c80841
NL
138 const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
139 const NS_SVG = 'http://www.w3.org/2000/svg';
140 const NS_XLINK = 'http://www.w3.org/1999/xlink';
141 const NS_XML = 'http://www.w3.org/XML/1998/namespace';
142 const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
143
144 // Different types of scopes to test for elements
145 const SCOPE = 0;
146 const SCOPE_LISTITEM = 1;
147 const SCOPE_TABLE = 2;
148
149 public function __construct() {
150 $this->mode = self::INITIAL;
151 $this->dom = new DOMDocument;
152
153 $this->dom->encoding = 'UTF-8';
154 $this->dom->preserveWhiteSpace = true;
155 $this->dom->substituteEntities = true;
156 $this->dom->strictErrorChecking = false;
157 }
158
159 // Process tag tokens
160 public function emitToken($token, $mode = null) {
161 // XXX: ignore parse errors... why are we emitting them, again?
162 if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
163 if ($mode === null) $mode = $this->mode;
164
165 /*
166 $backtrace = debug_backtrace();
167 if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
168 echo $this->strConst($mode);
169 if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
170 echo "\n ";
171 token_dump($token);
172 $this->printStack();
173 $this->printActiveFormattingElements();
174 if ($this->foster_parent) echo " -> this is a foster parent mode\n";
175 if ($this->flag_frameset_ok) echo " -> frameset ok\n";
176 */
177
178 if ($this->ignore_lf_token) $this->ignore_lf_token--;
179 $this->ignored = false;
180 // indenting is a little wonky, this can be changed later on
181 switch ($mode) {
182
183 case self::INITIAL:
184
185 /* A character token that is one of U+0009 CHARACTER TABULATION,
186 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
187 if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
188 /* Ignore the token. */
189 $this->ignored = true;
190 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
191 if (
192 $token['name'] !== 'html' || !empty($token['public']) ||
193 !empty($token['system']) || $token !== 'about:legacy-compat'
194 ) {
195 /* If the DOCTYPE token's name is not a case-sensitive match
196 * for the string "html", or if the token's public identifier
197 * is not missing, or if the token's system identifier is
198 * neither missing nor a case-sensitive match for the string
199 * "about:legacy-compat", then there is a parse error (this
200 * is the DOCTYPE parse error). */
201 // DOCTYPE parse error
202 }
203 /* Append a DocumentType node to the Document node, with the name
204 * attribute set to the name given in the DOCTYPE token, or the
205 * empty string if the name was missing; the publicId attribute
206 * set to the public identifier given in the DOCTYPE token, or
207 * the empty string if the public identifier was missing; the
208 * systemId attribute set to the system identifier given in the
209 * DOCTYPE token, or the empty string if the system identifier
210 * was missing; and the other attributes specific to
211 * DocumentType objects set to null and empty lists as
212 * appropriate. Associate the DocumentType node with the
213 * Document object so that it is returned as the value of the
214 * doctype attribute of the Document object. */
215 if (!isset($token['public'])) $token['public'] = null;
216 if (!isset($token['system'])) $token['system'] = null;
217 // XDOM
218 // Yes this is hacky. I'm kind of annoyed that I can't appendChild
219 // a doctype to DOMDocument. Maybe I haven't chanted the right
220 // syllables.
221 $impl = new DOMImplementation();
222 // This call can fail for particularly pathological cases (namely,
223 // the qualifiedName parameter ($token['name']) could be missing.
224 if ($token['name']) {
225 $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
226 $this->dom->appendChild($doctype);
227 } else {
228 // It looks like libxml's not actually *able* to express this case.
229 // So... don't.
230 $this->dom->emptyDoctype = true;
231 }
232 $public = is_null($token['public']) ? false : strtolower($token['public']);
233 $system = is_null($token['system']) ? false : strtolower($token['system']);
234 $publicStartsWithForQuirks = array(
235 "+//silmaril//dtd html pro v0r11 19970101//",
236 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
237 "-//as//dtd html 3.0 aswedit + extensions//",
238 "-//ietf//dtd html 2.0 level 1//",
239 "-//ietf//dtd html 2.0 level 2//",
240 "-//ietf//dtd html 2.0 strict level 1//",
241 "-//ietf//dtd html 2.0 strict level 2//",
242 "-//ietf//dtd html 2.0 strict//",
243 "-//ietf//dtd html 2.0//",
244 "-//ietf//dtd html 2.1e//",
245 "-//ietf//dtd html 3.0//",
246 "-//ietf//dtd html 3.2 final//",
247 "-//ietf//dtd html 3.2//",
248 "-//ietf//dtd html 3//",
249 "-//ietf//dtd html level 0//",
250 "-//ietf//dtd html level 1//",
251 "-//ietf//dtd html level 2//",
252 "-//ietf//dtd html level 3//",
253 "-//ietf//dtd html strict level 0//",
254 "-//ietf//dtd html strict level 1//",
255 "-//ietf//dtd html strict level 2//",
256 "-//ietf//dtd html strict level 3//",
257 "-//ietf//dtd html strict//",
258 "-//ietf//dtd html//",
259 "-//metrius//dtd metrius presentational//",
260 "-//microsoft//dtd internet explorer 2.0 html strict//",
261 "-//microsoft//dtd internet explorer 2.0 html//",
262 "-//microsoft//dtd internet explorer 2.0 tables//",
263 "-//microsoft//dtd internet explorer 3.0 html strict//",
264 "-//microsoft//dtd internet explorer 3.0 html//",
265 "-//microsoft//dtd internet explorer 3.0 tables//",
266 "-//netscape comm. corp.//dtd html//",
267 "-//netscape comm. corp.//dtd strict html//",
268 "-//o'reilly and associates//dtd html 2.0//",
269 "-//o'reilly and associates//dtd html extended 1.0//",
270 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
271 "-//spyglass//dtd html 2.0 extended//",
272 "-//sq//dtd html 2.0 hotmetal + extensions//",
273 "-//sun microsystems corp.//dtd hotjava html//",
274 "-//sun microsystems corp.//dtd hotjava strict html//",
275 "-//w3c//dtd html 3 1995-03-24//",
276 "-//w3c//dtd html 3.2 draft//",
277 "-//w3c//dtd html 3.2 final//",
278 "-//w3c//dtd html 3.2//",
279 "-//w3c//dtd html 3.2s draft//",
280 "-//w3c//dtd html 4.0 frameset//",
281 "-//w3c//dtd html 4.0 transitional//",
282 "-//w3c//dtd html experimental 19960712//",
283 "-//w3c//dtd html experimental 970421//",
284 "-//w3c//dtd w3 html//",
285 "-//w3o//dtd w3 html 3.0//",
286 "-//webtechs//dtd mozilla html 2.0//",
287 "-//webtechs//dtd mozilla html//",
288 );
289 $publicSetToForQuirks = array(
290 "-//w3o//dtd w3 html strict 3.0//",
291 "-/w3c/dtd html 4.0 transitional/en",
292 "html",
293 );
294 $publicStartsWithAndSystemForQuirks = array(
295 "-//w3c//dtd html 4.01 frameset//",
296 "-//w3c//dtd html 4.01 transitional//",
297 );
298 $publicStartsWithForLimitedQuirks = array(
299 "-//w3c//dtd xhtml 1.0 frameset//",
300 "-//w3c//dtd xhtml 1.0 transitional//",
301 );
302 $publicStartsWithAndSystemForLimitedQuirks = array(
303 "-//w3c//dtd html 4.01 frameset//",
304 "-//w3c//dtd html 4.01 transitional//",
305 );
306 // first, do easy checks
307 if (
308 !empty($token['force-quirks']) ||
309 strtolower($token['name']) !== 'html'
310 ) {
311 $this->quirks_mode = self::QUIRKS_MODE;
312 } else {
313 do {
314 if ($system) {
315 foreach ($publicStartsWithAndSystemForQuirks as $x) {
316 if (strncmp($public, $x, strlen($x)) === 0) {
317 $this->quirks_mode = self::QUIRKS_MODE;
318 break;
319 }
320 }
321 if (!is_null($this->quirks_mode)) break;
322 foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
323 if (strncmp($public, $x, strlen($x)) === 0) {
324 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
325 break;
326 }
327 }
328 if (!is_null($this->quirks_mode)) break;
329 }
330 foreach ($publicSetToForQuirks as $x) {
331 if ($public === $x) {
332 $this->quirks_mode = self::QUIRKS_MODE;
333 break;
334 }
335 }
336 if (!is_null($this->quirks_mode)) break;
337 foreach ($publicStartsWithForLimitedQuirks as $x) {
338 if (strncmp($public, $x, strlen($x)) === 0) {
339 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
340 }
341 }
342 if (!is_null($this->quirks_mode)) break;
343 if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
344 $this->quirks_mode = self::QUIRKS_MODE;
345 break;
346 }
347 foreach ($publicStartsWithForQuirks as $x) {
348 if (strncmp($public, $x, strlen($x)) === 0) {
349 $this->quirks_mode = self::QUIRKS_MODE;
350 break;
351 }
352 }
353 if (is_null($this->quirks_mode)) {
354 $this->quirks_mode = self::NO_QUIRKS;
355 }
356 } while (false);
357 }
358 $this->mode = self::BEFORE_HTML;
359 } else {
360 // parse error
361 /* Switch the insertion mode to "before html", then reprocess the
362 * current token. */
363 $this->mode = self::BEFORE_HTML;
364 $this->quirks_mode = self::QUIRKS_MODE;
365 $this->emitToken($token);
366 }
367 break;
368
369 case self::BEFORE_HTML:
370
371 /* A DOCTYPE token */
372 if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
373 // Parse error. Ignore the token.
374 $this->ignored = true;
375
376 /* A comment token */
377 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
378 /* Append a Comment node to the Document object with the data
379 attribute set to the data given in the comment token. */
380 // XDOM
381 $comment = $this->dom->createComment($token['data']);
382 $this->dom->appendChild($comment);
383
384 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
385 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
386 or U+0020 SPACE */
387 } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
388 /* Ignore the token. */
389 $this->ignored = true;
390
391 /* A start tag whose tag name is "html" */
392 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
393 /* Create an element for the token in the HTML namespace. Append it
394 * to the Document object. Put this element in the stack of open
395 * elements. */
396 // XDOM
397 $html = $this->insertElement($token, false);
398 $this->dom->appendChild($html);
399 $this->stack[] = $html;
400
401 $this->mode = self::BEFORE_HEAD;
402
403 } else {
404 /* Create an html element. Append it to the Document object. Put
405 * this element in the stack of open elements. */
406 // XDOM
407 $html = $this->dom->createElementNS(self::NS_HTML, 'html');
408 $this->dom->appendChild($html);
409 $this->stack[] = $html;
410
411 /* Switch the insertion mode to "before head", then reprocess the
412 * current token. */
413 $this->mode = self::BEFORE_HEAD;
414 $this->emitToken($token);
415 }
416 break;
417
418 case self::BEFORE_HEAD:
419
420 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
421 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
422 or U+0020 SPACE */
423 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
424 /* Ignore the token. */
425 $this->ignored = true;
426
427 /* A comment token */
428 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
429 /* Append a Comment node to the current node with the data attribute
430 set to the data given in the comment token. */
431 $this->insertComment($token['data']);
432
433 /* A DOCTYPE token */
434 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
435 /* Parse error. Ignore the token */
436 $this->ignored = true;
437 // parse error
438
439 /* A start tag token with the tag name "html" */
440 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
441 /* Process the token using the rules for the "in body"
442 * insertion mode. */
443 $this->processWithRulesFor($token, self::IN_BODY);
444
445 /* A start tag token with the tag name "head" */
446 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
447 /* Insert an HTML element for the token. */
448 $element = $this->insertElement($token);
449
450 /* Set the head element pointer to this new element node. */
451 $this->head_pointer = $element;
452
453 /* Change the insertion mode to "in head". */
454 $this->mode = self::IN_HEAD;
455
456 /* An end tag whose tag name is one of: "head", "body", "html", "br" */
457 } elseif(
458 $token['type'] === HTML5_Tokenizer::ENDTAG && (
459 $token['name'] === 'head' || $token['name'] === 'body' ||
460 $token['name'] === 'html' || $token['name'] === 'br'
461 )) {
462 /* Act as if a start tag token with the tag name "head" and no
463 * attributes had been seen, then reprocess the current token. */
464 $this->emitToken(array(
465 'name' => 'head',
466 'type' => HTML5_Tokenizer::STARTTAG,
467 'attr' => array()
468 ));
469 $this->emitToken($token);
470
471 /* Any other end tag */
472 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
473 /* Parse error. Ignore the token. */
474 $this->ignored = true;
475
476 } else {
477 /* Act as if a start tag token with the tag name "head" and no
478 * attributes had been seen, then reprocess the current token.
479 * Note: This will result in an empty head element being
480 * generated, with the current token being reprocessed in the
481 * "after head" insertion mode. */
482 $this->emitToken(array(
483 'name' => 'head',
484 'type' => HTML5_Tokenizer::STARTTAG,
485 'attr' => array()
486 ));
487 $this->emitToken($token);
488 }
489 break;
490
491 case self::IN_HEAD:
492
493 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
494 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
495 or U+0020 SPACE. */
496 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
497 /* Insert the character into the current node. */
498 $this->insertText($token['data']);
499
500 /* A comment token */
501 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
502 /* Append a Comment node to the current node with the data attribute
503 set to the data given in the comment token. */
504 $this->insertComment($token['data']);
505
506 /* A DOCTYPE token */
507 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
508 /* Parse error. Ignore the token. */
509 $this->ignored = true;
510 // parse error
511
512 /* A start tag whose tag name is "html" */
513 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
514 $token['name'] === 'html') {
515 $this->processWithRulesFor($token, self::IN_BODY);
516
517 /* A start tag whose tag name is one of: "base", "command", "link" */
518 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
519 ($token['name'] === 'base' || $token['name'] === 'command' ||
520 $token['name'] === 'link')) {
521 /* Insert an HTML element for the token. Immediately pop the
522 * current node off the stack of open elements. */
523 $this->insertElement($token);
524 array_pop($this->stack);
525
526 // YYY: Acknowledge the token's self-closing flag, if it is set.
527
528 /* A start tag whose tag name is "meta" */
529 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
530 /* Insert an HTML element for the token. Immediately pop the
531 * current node off the stack of open elements. */
532 $this->insertElement($token);
533 array_pop($this->stack);
534
535 // XERROR: Acknowledge the token's self-closing flag, if it is set.
536
537 // XENCODING: If the element has a charset attribute, and its value is a
538 // supported encoding, and the confidence is currently tentative,
539 // then change the encoding to the encoding given by the value of
540 // the charset attribute.
541 //
542 // Otherwise, if the element has a content attribute, and applying
543 // the algorithm for extracting an encoding from a Content-Type to
544 // its value returns a supported encoding encoding, and the
545 // confidence is currently tentative, then change the encoding to
546 // the encoding encoding.
547
548 /* A start tag with the tag name "title" */
549 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
550 $this->insertRCDATAElement($token);
551
552 /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
553 * A start tag whose tag name is one of: "noframes", "style" */
554 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
555 ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
556 // XSCRIPT: Scripting flag not respected
557 $this->insertCDATAElement($token);
558
559 // XSCRIPT: Scripting flag disable not implemented
560
561 /* A start tag with the tag name "script" */
562 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
563 /* 1. Create an element for the token in the HTML namespace. */
564 $node = $this->insertElement($token, false);
565
566 /* 2. Mark the element as being "parser-inserted" */
567 // Uhhh... XSCRIPT
568
569 /* 3. If the parser was originally created for the HTML
570 * fragment parsing algorithm, then mark the script element as
571 * "already executed". (fragment case) */
572 // ditto... XSCRIPT
573
574 /* 4. Append the new element to the current node and push it onto
575 * the stack of open elements. */
576 end($this->stack)->appendChild($node);
577 $this->stack[] = $node;
578 // I guess we could squash these together
579
580 /* 6. Let the original insertion mode be the current insertion mode. */
581 $this->original_mode = $this->mode;
582 /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
583 $this->mode = self::IN_CDATA_RCDATA;
584 /* 5. Switch the tokeniser's content model flag to the CDATA state. */
585 $this->content_model = HTML5_Tokenizer::CDATA;
586
587 /* An end tag with the tag name "head" */
588 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
589 /* Pop the current node (which will be the head element) off the stack of open elements. */
590 array_pop($this->stack);
591
592 /* Change the insertion mode to "after head". */
593 $this->mode = self::AFTER_HEAD;
594
595 // Slight logic inversion here to minimize duplication
596 /* A start tag with the tag name "head". */
597 /* An end tag whose tag name is not one of: "body", "html", "br" */
598 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
599 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
600 $token['name'] !== 'body' && $token['name'] !== 'br')) {
601 // Parse error. Ignore the token.
602 $this->ignored = true;
603
604 /* Anything else */
605 } else {
606 /* Act as if an end tag token with the tag name "head" had been
607 * seen, and reprocess the current token. */
608 $this->emitToken(array(
609 'name' => 'head',
610 'type' => HTML5_Tokenizer::ENDTAG
611 ));
612
613 /* Then, reprocess the current token. */
614 $this->emitToken($token);
615 }
616 break;
617
618 case self::IN_HEAD_NOSCRIPT:
619 if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
620 // parse error
621 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
622 $this->processWithRulesFor($token, self::IN_BODY);
623 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
624 /* Pop the current node (which will be a noscript element) from the
625 * stack of open elements; the new current node will be a head
626 * element. */
627 array_pop($this->stack);
628 $this->mode = self::IN_HEAD;
629 } elseif (
630 ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
631 ($token['type'] === HTML5_Tokenizer::COMMENT) ||
632 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
633 $token['name'] === 'link' || $token['name'] === 'meta' ||
634 $token['name'] === 'noframes' || $token['name'] === 'style'))) {
635 $this->processWithRulesFor($token, self::IN_HEAD);
636 // inverted logic
637 } elseif (
638 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
639 $token['name'] === 'head' || $token['name'] === 'noscript')) ||
640 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
641 $token['name'] !== 'br')) {
642 // parse error
643 } else {
644 // parse error
645 $this->emitToken(array(
646 'type' => HTML5_Tokenizer::ENDTAG,
647 'name' => 'noscript',
648 ));
649 $this->emitToken($token);
650 }
651 break;
652
653 case self::AFTER_HEAD:
654 /* Handle the token as follows: */
655
656 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
657 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
658 or U+0020 SPACE */
659 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
660 /* Append the character to the current node. */
661 $this->insertText($token['data']);
662
663 /* A comment token */
664 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
665 /* Append a Comment node to the current node with the data attribute
666 set to the data given in the comment token. */
667 $this->insertComment($token['data']);
668
669 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
670 // parse error
671
672 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
673 $this->processWithRulesFor($token, self::IN_BODY);
674
675 /* A start tag token with the tag name "body" */
676 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
677 $this->insertElement($token);
678
679 /* Set the frameset-ok flag to "not ok". */
680 $this->flag_frameset_ok = false;
681
682 /* Change the insertion mode to "in body". */
683 $this->mode = self::IN_BODY;
684
685 /* A start tag token with the tag name "frameset" */
686 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
687 /* Insert a frameset element for the token. */
688 $this->insertElement($token);
689
690 /* Change the insertion mode to "in frameset". */
691 $this->mode = self::IN_FRAMESET;
692
693 /* A start tag token whose tag name is one of: "base", "link", "meta",
694 "script", "style", "title" */
695 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
696 array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
697 // parse error
698 /* Push the node pointed to by the head element pointer onto the
699 * stack of open elements. */
700 $this->stack[] = $this->head_pointer;
701 $this->processWithRulesFor($token, self::IN_HEAD);
702 array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
703
704 // inversion of specification
705 } elseif(
706 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
707 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
708 $token['name'] !== 'body' && $token['name'] !== 'html' &&
709 $token['name'] !== 'br')) {
710 // parse error
711
712 /* Anything else */
713 } else {
714 $this->emitToken(array(
715 'name' => 'body',
716 'type' => HTML5_Tokenizer::STARTTAG,
717 'attr' => array()
718 ));
719 $this->flag_frameset_ok = true;
720 $this->emitToken($token);
721 }
722 break;
723
724 case self::IN_BODY:
725 /* Handle the token as follows: */
726
727 switch($token['type']) {
728 /* A character token */
729 case HTML5_Tokenizer::CHARACTER:
730 case HTML5_Tokenizer::SPACECHARACTER:
731 /* Reconstruct the active formatting elements, if any. */
732 $this->reconstructActiveFormattingElements();
733
734 /* Append the token's character to the current node. */
735 $this->insertText($token['data']);
736
737 /* If the token is not one of U+0009 CHARACTER TABULATION,
738 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
739 * SPACE, then set the frameset-ok flag to "not ok". */
740 // i.e., if any of the characters is not whitespace
741 if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
742 $this->flag_frameset_ok = false;
743 }
744 break;
745
746 /* A comment token */
747 case HTML5_Tokenizer::COMMENT:
748 /* Append a Comment node to the current node with the data
749 attribute set to the data given in the comment token. */
750 $this->insertComment($token['data']);
751 break;
752
753 case HTML5_Tokenizer::DOCTYPE:
754 // parse error
755 break;
756
757 case HTML5_Tokenizer::EOF:
758 // parse error
759 break;
760
761 case HTML5_Tokenizer::STARTTAG:
762 switch($token['name']) {
763 case 'html':
764 // parse error
765 /* For each attribute on the token, check to see if the
766 * attribute is already present on the top element of the
767 * stack of open elements. If it is not, add the attribute
768 * and its corresponding value to that element. */
769 foreach($token['attr'] as $attr) {
770 if(!$this->stack[0]->hasAttribute($attr['name'])) {
771 $this->stack[0]->setAttribute($attr['name'], $attr['value']);
772 }
773 }
774 break;
775
776 case 'base': case 'command': case 'link': case 'meta': case 'noframes':
777 case 'script': case 'style': case 'title':
778 /* Process the token as if the insertion mode had been "in
779 head". */
780 $this->processWithRulesFor($token, self::IN_HEAD);
781 break;
782
783 /* A start tag token with the tag name "body" */
784 case 'body':
785 /* Parse error. If the second element on the stack of open
786 elements is not a body element, or, if the stack of open
787 elements has only one node on it, then ignore the token.
788 (fragment case) */
789 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
790 $this->ignored = true;
791 // Ignore
792
793 /* Otherwise, for each attribute on the token, check to see
794 if the attribute is already present on the body element (the
795 second element) on the stack of open elements. If it is not,
796 add the attribute and its corresponding value to that
797 element. */
798 } else {
799 foreach($token['attr'] as $attr) {
800 if(!$this->stack[1]->hasAttribute($attr['name'])) {
801 $this->stack[1]->setAttribute($attr['name'], $attr['value']);
802 }
803 }
804 }
805 break;
806
807 case 'frameset':
808 // parse error
809 /* If the second element on the stack of open elements is
810 * not a body element, or, if the stack of open elements
811 * has only one node on it, then ignore the token.
812 * (fragment case) */
813 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
814 $this->ignored = true;
815 // Ignore
816 } elseif (!$this->flag_frameset_ok) {
817 $this->ignored = true;
818 // Ignore
819 } else {
820 /* 1. Remove the second element on the stack of open
821 * elements from its parent node, if it has one. */
822 if($this->stack[1]->parentNode) {
823 $this->stack[1]->parentNode->removeChild($this->stack[1]);
824 }
825
826 /* 2. Pop all the nodes from the bottom of the stack of
827 * open elements, from the current node up to the root
828 * html element. */
829 array_splice($this->stack, 1);
830
831 $this->insertElement($token);
832 $this->mode = self::IN_FRAMESET;
833 }
834 break;
835
836 // in spec, there is a diversion here
837
838 case 'address': case 'article': case 'aside': case 'blockquote':
839 case 'center': case 'datagrid': case 'details': case 'dir':
840 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
841 case 'header': case 'hgroup': case 'menu': case 'nav':
842 case 'ol': case 'p': case 'section': case 'ul':
843 /* If the stack of open elements has a p element in scope,
844 then act as if an end tag with the tag name p had been
845 seen. */
846 if($this->elementInScope('p')) {
847 $this->emitToken(array(
848 'name' => 'p',
849 'type' => HTML5_Tokenizer::ENDTAG
850 ));
851 }
852
853 /* Insert an HTML element for the token. */
854 $this->insertElement($token);
855 break;
856
857 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
858 "h5", "h6" */
859 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
860 /* If the stack of open elements has a p element in scope,
861 then act as if an end tag with the tag name p had been seen. */
862 if($this->elementInScope('p')) {
863 $this->emitToken(array(
864 'name' => 'p',
865 'type' => HTML5_Tokenizer::ENDTAG
866 ));
867 }
868
869 /* If the current node is an element whose tag name is one
870 * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
871 * parse error; pop the current node off the stack of open
872 * elements. */
873 $peek = array_pop($this->stack);
874 if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
875 // parse error
876 } else {
877 $this->stack[] = $peek;
878 }
879
880 /* Insert an HTML element for the token. */
881 $this->insertElement($token);
882 break;
883
884 case 'pre': case 'listing':
885 /* If the stack of open elements has a p element in scope,
886 then act as if an end tag with the tag name p had been seen. */
887 if($this->elementInScope('p')) {
888 $this->emitToken(array(
889 'name' => 'p',
890 'type' => HTML5_Tokenizer::ENDTAG
891 ));
892 }
893 $this->insertElement($token);
894 /* If the next token is a U+000A LINE FEED (LF) character
895 * token, then ignore that token and move on to the next
896 * one. (Newlines at the start of pre blocks are ignored as
897 * an authoring convenience.) */
898 $this->ignore_lf_token = 2;
899 $this->flag_frameset_ok = false;
900 break;
901
902 /* A start tag whose tag name is "form" */
903 case 'form':
904 /* If the form element pointer is not null, ignore the
905 token with a parse error. */
906 if($this->form_pointer !== null) {
907 $this->ignored = true;
908 // Ignore.
909
910 /* Otherwise: */
911 } else {
912 /* If the stack of open elements has a p element in
913 scope, then act as if an end tag with the tag name p
914 had been seen. */
915 if($this->elementInScope('p')) {
916 $this->emitToken(array(
917 'name' => 'p',
918 'type' => HTML5_Tokenizer::ENDTAG
919 ));
920 }
921
922 /* Insert an HTML element for the token, and set the
923 form element pointer to point to the element created. */
924 $element = $this->insertElement($token);
925 $this->form_pointer = $element;
926 }
927 break;
928
929 // condensed specification
930 case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
931 /* 1. Set the frameset-ok flag to "not ok". */
932 $this->flag_frameset_ok = false;
933
934 $stack_length = count($this->stack) - 1;
935 for($n = $stack_length; 0 <= $n; $n--) {
936 /* 2. Initialise node to be the current node (the
937 bottommost node of the stack). */
938 $stop = false;
939 $node = $this->stack[$n];
940 $cat = $this->getElementCategory($node);
941
942 // for case 'li':
943 /* 3. If node is an li element, then act as if an end
944 * tag with the tag name "li" had been seen, then jump
945 * to the last step. */
946 // for case 'dc': case 'dd': case 'ds': case 'dt':
947 /* If node is a dc, dd, ds or dt element, then act as if an end
948 * tag with the same tag name as node had been seen, then
949 * jump to the last step. */
950 if(($token['name'] === 'li' && $node->tagName === 'li') ||
951 ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
952 $this->emitToken(array(
953 'type' => HTML5_Tokenizer::ENDTAG,
954 'name' => $node->tagName,
955 ));
956 break;
957 }
958
959 /* 4. If node is not in the formatting category, and is
960 not in the phrasing category, and is not an address,
961 div or p element, then stop this algorithm. */
962 if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
963 $node->tagName !== 'address' && $node->tagName !== 'div' &&
964 $node->tagName !== 'p') {
965 break;
966 }
967
968 /* 5. Otherwise, set node to the previous entry in the
969 * stack of open elements and return to step 2. */
970 }
971
972 /* 6. This is the last step. */
973
974 /* If the stack of open elements has a p element in scope,
975 then act as if an end tag with the tag name p had been
976 seen. */
977 if($this->elementInScope('p')) {
978 $this->emitToken(array(
979 'name' => 'p',
980 'type' => HTML5_Tokenizer::ENDTAG
981 ));
982 }
983
984 /* Finally, insert an HTML element with the same tag
985 name as the token's. */
986 $this->insertElement($token);
987 break;
988
989 /* A start tag token whose tag name is "plaintext" */
990 case 'plaintext':
991 /* If the stack of open elements has a p element in scope,
992 then act as if an end tag with the tag name p had been
993 seen. */
994 if($this->elementInScope('p')) {
995 $this->emitToken(array(
996 'name' => 'p',
997 'type' => HTML5_Tokenizer::ENDTAG
998 ));
999 }
1000
1001 /* Insert an HTML element for the token. */
1002 $this->insertElement($token);
1003
1004 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
1005 break;
1006
1007 // more diversions
1008
1009 /* A start tag whose tag name is "a" */
1010 case 'a':
1011 /* If the list of active formatting elements contains
1012 an element whose tag name is "a" between the end of the
1013 list and the last marker on the list (or the start of
1014 the list if there is no marker on the list), then this
1015 is a parse error; act as if an end tag with the tag name
1016 "a" had been seen, then remove that element from the list
1017 of active formatting elements and the stack of open
1018 elements if the end tag didn't already remove it (it
1019 might not have if the element is not in table scope). */
1020 $leng = count($this->a_formatting);
1021
1022 for($n = $leng - 1; $n >= 0; $n--) {
1023 if($this->a_formatting[$n] === self::MARKER) {
1024 break;
1025
1026 } elseif($this->a_formatting[$n]->tagName === 'a') {
1027 $a = $this->a_formatting[$n];
1028 $this->emitToken(array(
1029 'name' => 'a',
1030 'type' => HTML5_Tokenizer::ENDTAG
1031 ));
1032 if (in_array($a, $this->a_formatting)) {
1033 $a_i = array_search($a, $this->a_formatting, true);
1034 if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
1035 }
1036 if (in_array($a, $this->stack)) {
1037 $a_i = array_search($a, $this->stack, true);
1038 if ($a_i !== false) array_splice($this->stack, $a_i, 1);
1039 }
1040 break;
1041 }
1042 }
1043
1044 /* Reconstruct the active formatting elements, if any. */
1045 $this->reconstructActiveFormattingElements();
1046
1047 /* Insert an HTML element for the token. */
1048 $el = $this->insertElement($token);
1049
1050 /* Add that element to the list of active formatting
1051 elements. */
1052 $this->a_formatting[] = $el;
1053 break;
1054
1055 case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1056 case 's': case 'small': case 'strike':
1057 case 'strong': case 'tt': case 'u':
1058 /* Reconstruct the active formatting elements, if any. */
1059 $this->reconstructActiveFormattingElements();
1060
1061 /* Insert an HTML element for the token. */
1062 $el = $this->insertElement($token);
1063
1064 /* Add that element to the list of active formatting
1065 elements. */
1066 $this->a_formatting[] = $el;
1067 break;
1068
1069 case 'nobr':
1070 /* Reconstruct the active formatting elements, if any. */
1071 $this->reconstructActiveFormattingElements();
1072
1073 /* If the stack of open elements has a nobr element in
1074 * scope, then this is a parse error; act as if an end tag
1075 * with the tag name "nobr" had been seen, then once again
1076 * reconstruct the active formatting elements, if any. */
1077 if ($this->elementInScope('nobr')) {
1078 $this->emitToken(array(
1079 'name' => 'nobr',
1080 'type' => HTML5_Tokenizer::ENDTAG,
1081 ));
1082 $this->reconstructActiveFormattingElements();
1083 }
1084
1085 /* Insert an HTML element for the token. */
1086 $el = $this->insertElement($token);
1087
1088 /* Add that element to the list of active formatting
1089 elements. */
1090 $this->a_formatting[] = $el;
1091 break;
1092
1093 // another diversion
1094
1095 /* A start tag token whose tag name is "button" */
1096 case 'button':
1097 /* If the stack of open elements has a button element in scope,
1098 then this is a parse error; act as if an end tag with the tag
1099 name "button" had been seen, then reprocess the token. (We don't
1100 do that. Unnecessary.) (I hope you're right! -- ezyang) */
1101 if($this->elementInScope('button')) {
1102 $this->emitToken(array(
1103 'name' => 'button',
1104 'type' => HTML5_Tokenizer::ENDTAG
1105 ));
1106 }
1107
1108 /* Reconstruct the active formatting elements, if any. */
1109 $this->reconstructActiveFormattingElements();
1110
1111 /* Insert an HTML element for the token. */
1112 $this->insertElement($token);
1113
1114 /* Insert a marker at the end of the list of active
1115 formatting elements. */
1116 $this->a_formatting[] = self::MARKER;
1117
1118 $this->flag_frameset_ok = false;
1119 break;
1120
1121 case 'applet': case 'marquee': case 'object':
1122 /* Reconstruct the active formatting elements, if any. */
1123 $this->reconstructActiveFormattingElements();
1124
1125 /* Insert an HTML element for the token. */
1126 $this->insertElement($token);
1127
1128 /* Insert a marker at the end of the list of active
1129 formatting elements. */
1130 $this->a_formatting[] = self::MARKER;
1131
1132 $this->flag_frameset_ok = false;
1133 break;
1134
1135 // spec diversion
1136
1137 /* A start tag whose tag name is "table" */
1138 case 'table':
1139 /* If the Document is not set to quirks mode, and the
1140 * stack of open elements has a p element in scope, then
1141 * act as if an end tag with the tag name "p" had been
1142 * seen. */
1143 if($this->quirks_mode !== self::QUIRKS_MODE &&
1144 $this->elementInScope('p')) {
1145 $this->emitToken(array(
1146 'name' => 'p',
1147 'type' => HTML5_Tokenizer::ENDTAG
1148 ));
1149 }
1150
1151 /* Insert an HTML element for the token. */
1152 $this->insertElement($token);
1153
1154 $this->flag_frameset_ok = false;
1155
1156 /* Change the insertion mode to "in table". */
1157 $this->mode = self::IN_TABLE;
1158 break;
1159
1160 /* A start tag whose tag name is one of: "area", "basefont",
1161 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1162 case 'area': case 'basefont': case 'bgsound': case 'br':
1163 case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1164 case 'wbr':
1165 /* Reconstruct the active formatting elements, if any. */
1166 $this->reconstructActiveFormattingElements();
1167
1168 /* Insert an HTML element for the token. */
1169 $this->insertElement($token);
1170
1171 /* Immediately pop the current node off the stack of open elements. */
1172 array_pop($this->stack);
1173
1174 // YYY: Acknowledge the token's self-closing flag, if it is set.
1175
1176 $this->flag_frameset_ok = false;
1177 break;
1178
1179 case 'param': case 'source':
1180 /* Insert an HTML element for the token. */
1181 $this->insertElement($token);
1182
1183 /* Immediately pop the current node off the stack of open elements. */
1184 array_pop($this->stack);
1185
1186 // YYY: Acknowledge the token's self-closing flag, if it is set.
1187 break;
1188
1189 /* A start tag whose tag name is "hr" */
1190 case 'hr':
1191 /* If the stack of open elements has a p element in scope,
1192 then act as if an end tag with the tag name p had been seen. */
1193 if($this->elementInScope('p')) {
1194 $this->emitToken(array(
1195 'name' => 'p',
1196 'type' => HTML5_Tokenizer::ENDTAG
1197 ));
1198 }
1199
1200 /* Insert an HTML element for the token. */
1201 $this->insertElement($token);
1202
1203 /* Immediately pop the current node off the stack of open elements. */
1204 array_pop($this->stack);
1205
1206 // YYY: Acknowledge the token's self-closing flag, if it is set.
1207
1208 $this->flag_frameset_ok = false;
1209 break;
1210
1211 /* A start tag whose tag name is "image" */
1212 case 'image':
1213 /* Parse error. Change the token's tag name to "img" and
1214 reprocess it. (Don't ask.) */
1215 $token['name'] = 'img';
1216 $this->emitToken($token);
1217 break;
1218
1219 /* A start tag whose tag name is "isindex" */
1220 case 'isindex':
1221 /* Parse error. */
1222
1223 /* If the form element pointer is not null,
1224 then ignore the token. */
1225 if($this->form_pointer === null) {
1226 /* Act as if a start tag token with the tag name "form" had
1227 been seen. */
1228 /* If the token has an attribute called "action", set
1229 * the action attribute on the resulting form
1230 * element to the value of the "action" attribute of
1231 * the token. */
1232 $attr = array();
1233 $action = $this->getAttr($token, 'action');
1234 if ($action !== false) {
1235 $attr[] = array('name' => 'action', 'value' => $action);
1236 }
1237 $this->emitToken(array(
1238 'name' => 'form',
1239 'type' => HTML5_Tokenizer::STARTTAG,
1240 'attr' => $attr
1241 ));
1242
1243 /* Act as if a start tag token with the tag name "hr" had
1244 been seen. */
1245 $this->emitToken(array(
1246 'name' => 'hr',
1247 'type' => HTML5_Tokenizer::STARTTAG,
1248 'attr' => array()
1249 ));
1250
1251 /* Act as if a start tag token with the tag name "label"
1252 had been seen. */
1253 $this->emitToken(array(
1254 'name' => 'label',
1255 'type' => HTML5_Tokenizer::STARTTAG,
1256 'attr' => array()
1257 ));
1258
1259 /* Act as if a stream of character tokens had been seen. */
1260 $prompt = $this->getAttr($token, 'prompt');
1261 if ($prompt === false) {
1262 $prompt = 'This is a searchable index. '.
1263 'Insert your search keywords here: ';
1264 }
1265 $this->emitToken(array(
1266 'data' => $prompt,
1267 'type' => HTML5_Tokenizer::CHARACTER,
1268 ));
1269
1270 /* Act as if a start tag token with the tag name "input"
1271 had been seen, with all the attributes from the "isindex"
1272 token, except with the "name" attribute set to the value
1273 "isindex" (ignoring any explicit "name" attribute). */
1274 $attr = array();
1275 foreach ($token['attr'] as $keypair) {
1276 if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1277 $keypair['name'] === 'prompt') continue;
1278 $attr[] = $keypair;
1279 }
1280 $attr[] = array('name' => 'name', 'value' => 'isindex');
1281
1282 $this->emitToken(array(
1283 'name' => 'input',
1284 'type' => HTML5_Tokenizer::STARTTAG,
1285 'attr' => $attr
1286 ));
1287
1288 /* Act as if an end tag token with the tag name "label"
1289 had been seen. */
1290 $this->emitToken(array(
1291 'name' => 'label',
1292 'type' => HTML5_Tokenizer::ENDTAG
1293 ));
1294
1295 /* Act as if a start tag token with the tag name "hr" had
1296 been seen. */
1297 $this->emitToken(array(
1298 'name' => 'hr',
1299 'type' => HTML5_Tokenizer::STARTTAG
1300 ));
1301
1302 /* Act as if an end tag token with the tag name "form" had
1303 been seen. */
1304 $this->emitToken(array(
1305 'name' => 'form',
1306 'type' => HTML5_Tokenizer::ENDTAG
1307 ));
1308 } else {
1309 $this->ignored = true;
1310 }
1311 break;
1312
1313 /* A start tag whose tag name is "textarea" */
1314 case 'textarea':
1315 $this->insertElement($token);
1316
1317 /* If the next token is a U+000A LINE FEED (LF)
1318 * character token, then ignore that token and move on to
1319 * the next one. (Newlines at the start of textarea
1320 * elements are ignored as an authoring convenience.)
1321 * need flag, see also <pre> */
1322 $this->ignore_lf_token = 2;
1323
1324 $this->original_mode = $this->mode;
1325 $this->flag_frameset_ok = false;
1326 $this->mode = self::IN_CDATA_RCDATA;
1327
1328 /* Switch the tokeniser's content model flag to the
1329 RCDATA state. */
1330 $this->content_model = HTML5_Tokenizer::RCDATA;
1331 break;
1332
1333 /* A start tag token whose tag name is "xmp" */
1334 case 'xmp':
1335 /* If the stack of open elements has a p element in
1336 scope, then act as if an end tag with the tag name
1337 "p" has been seen. */
1338 if ($this->elementInScope('p')) {
1339 $this->emitToken(array(
1340 'name' => 'p',
1341 'type' => HTML5_Tokenizer::ENDTAG
1342 ));
1343 }
1344
1345 /* Reconstruct the active formatting elements, if any. */
1346 $this->reconstructActiveFormattingElements();
1347
1348 $this->flag_frameset_ok = false;
1349
1350 $this->insertCDATAElement($token);
1351 break;
1352
1353 case 'iframe':
1354 $this->flag_frameset_ok = false;
1355 $this->insertCDATAElement($token);
1356 break;
1357
1358 case 'noembed': case 'noscript':
1359 // XSCRIPT: should check scripting flag
1360 $this->insertCDATAElement($token);
1361 break;
1362
1363 /* A start tag whose tag name is "select" */
1364 case 'select':
1365 /* Reconstruct the active formatting elements, if any. */
1366 $this->reconstructActiveFormattingElements();
1367
1368 /* Insert an HTML element for the token. */
1369 $this->insertElement($token);
1370
1371 $this->flag_frameset_ok = false;
1372
1373 /* If the insertion mode is one of in table", "in caption",
1374 * "in column group", "in table body", "in row", or "in
1375 * cell", then switch the insertion mode to "in select in
1376 * table". Otherwise, switch the insertion mode to "in
1377 * select". */
1378 if (
1379 $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
1380 $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
1381 $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
1382 ) {
1383 $this->mode = self::IN_SELECT_IN_TABLE;
1384 } else {
1385 $this->mode = self::IN_SELECT;
1386 }
1387 break;
1388
1389 case 'option': case 'optgroup':
1390 if ($this->elementInScope('option')) {
1391 $this->emitToken(array(
1392 'name' => 'option',
1393 'type' => HTML5_Tokenizer::ENDTAG,
1394 ));
1395 }
1396 $this->reconstructActiveFormattingElements();
1397 $this->insertElement($token);
1398 break;
1399
1400 case 'rp': case 'rt':
1401 /* If the stack of open elements has a ruby element in scope, then generate
1402 * implied end tags. If the current node is not then a ruby element, this is
1403 * a parse error; pop all the nodes from the current node up to the node
1404 * immediately before the bottommost ruby element on the stack of open elements.
1405 */
1406 if ($this->elementInScope('ruby')) {
1407 $this->generateImpliedEndTags();
1408 }
1409 $peek = false;
1410 do {
1411 if ($peek) {
1412 // parse error
1413 }
1414 $peek = array_pop($this->stack);
1415 } while ($peek->tagName !== 'ruby');
1416 $this->stack[] = $peek; // we popped one too many
1417 $this->insertElement($token);
1418 break;
1419
1420 // spec diversion
1421
1422 case 'math':
1423 $this->reconstructActiveFormattingElements();
1424 $token = $this->adjustMathMLAttributes($token);
1425 $token = $this->adjustForeignAttributes($token);
1426 $this->insertForeignElement($token, self::NS_MATHML);
1427 if (isset($token['self-closing'])) {
1428 // XERROR: acknowledge the token's self-closing flag
1429 array_pop($this->stack);
1430 }
1431 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1432 $this->secondary_mode = $this->mode;
1433 $this->mode = self::IN_FOREIGN_CONTENT;
1434 }
1435 break;
1436
1437 case 'svg':
1438 $this->reconstructActiveFormattingElements();
1439 $token = $this->adjustSVGAttributes($token);
1440 $token = $this->adjustForeignAttributes($token);
1441 $this->insertForeignElement($token, self::NS_SVG);
1442 if (isset($token['self-closing'])) {
1443 // XERROR: acknowledge the token's self-closing flag
1444 array_pop($this->stack);
1445 }
1446 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1447 $this->secondary_mode = $this->mode;
1448 $this->mode = self::IN_FOREIGN_CONTENT;
1449 }
1450 break;
1451
1452 case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1453 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1454 // parse error
1455 break;
1456
1457 /* A start tag token not covered by the previous entries */
1458 default:
1459 /* Reconstruct the active formatting elements, if any. */
1460 $this->reconstructActiveFormattingElements();
1461
1462 $this->insertElement($token);
1463 /* This element will be a phrasing element. */
1464 break;
1465 }
1466 break;
1467
1468 case HTML5_Tokenizer::ENDTAG:
1469 switch($token['name']) {
1470 /* An end tag with the tag name "body" */
1471 case 'body':
1472 /* If the stack of open elements does not have a body
1473 * element in scope, this is a parse error; ignore the
1474 * token. */
1475 if(!$this->elementInScope('body')) {
1476 $this->ignored = true;
1477
1478 /* Otherwise, if there is a node in the stack of open
1479 * elements that is not either a dc element, a dd element,
1480 * a ds element, a dt element, an li element, an optgroup
1481 * element, an option element, a p element, an rp element,
1482 * an rt element, a tbody element, a td element, a tfoot
1483 * element, a th element, a thead element, a tr element,
1484 * the body element, or the html element, then this is a
1485 * parse error.
1486 */
1487 } else {
1488 // XERROR: implement this check for parse error
1489 }
1490
1491 /* Change the insertion mode to "after body". */
1492 $this->mode = self::AFTER_BODY;
1493 break;
1494
1495 /* An end tag with the tag name "html" */
1496 case 'html':
1497 /* Act as if an end tag with tag name "body" had been seen,
1498 then, if that token wasn't ignored, reprocess the current
1499 token. */
1500 $this->emitToken(array(
1501 'name' => 'body',
1502 'type' => HTML5_Tokenizer::ENDTAG
1503 ));
1504
1505 if (!$this->ignored) $this->emitToken($token);
1506 break;
1507
1508 case 'address': case 'article': case 'aside': case 'blockquote':
1509 case 'center': case 'datagrid': case 'details': case 'dir':
1510 case 'div': case 'dl': case 'fieldset': case 'footer':
1511 case 'header': case 'hgroup': case 'listing': case 'menu':
1512 case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
1513 /* If the stack of open elements has an element in scope
1514 with the same tag name as that of the token, then generate
1515 implied end tags. */
1516 if($this->elementInScope($token['name'])) {
1517 $this->generateImpliedEndTags();
1518
1519 /* Now, if the current node is not an element with
1520 the same tag name as that of the token, then this
1521 is a parse error. */
1522 // XERROR: implement parse error logic
1523
1524 /* If the stack of open elements has an element in
1525 scope with the same tag name as that of the token,
1526 then pop elements from this stack until an element
1527 with that tag name has been popped from the stack. */
1528 do {
1529 $node = array_pop($this->stack);
1530 } while ($node->tagName !== $token['name']);
1531 } else {
1532 // parse error
1533 }
1534 break;
1535
1536 /* An end tag whose tag name is "form" */
1537 case 'form':
1538 /* Let node be the element that the form element pointer is set to. */
1539 $node = $this->form_pointer;
1540 /* Set the form element pointer to null. */
1541 $this->form_pointer = null;
1542 /* If node is null or the stack of open elements does not
1543 * have node in scope, then this is a parse error; ignore the token. */
1544 if ($node === null || !in_array($node, $this->stack)) {
1545 // parse error
1546 $this->ignored = true;
1547 } else {
1548 /* 1. Generate implied end tags. */
1549 $this->generateImpliedEndTags();
1550 /* 2. If the current node is not node, then this is a parse error. */
1551 if (end($this->stack) !== $node) {
1552 // parse error
1553 }
1554 /* 3. Remove node from the stack of open elements. */
1555 array_splice($this->stack, array_search($node, $this->stack, true), 1);
1556 }
1557
1558 break;
1559
1560 /* An end tag whose tag name is "p" */
1561 case 'p':
1562 /* If the stack of open elements has a p element in scope,
1563 then generate implied end tags, except for p elements. */
1564 if($this->elementInScope('p')) {
1565 /* Generate implied end tags, except for elements with
1566 * the same tag name as the token. */
1567 $this->generateImpliedEndTags(array('p'));
1568
1569 /* If the current node is not a p element, then this is
1570 a parse error. */
1571 // XERROR: implement
1572
1573 /* Pop elements from the stack of open elements until
1574 * an element with the same tag name as the token has
1575 * been popped from the stack. */
1576 do {
1577 $node = array_pop($this->stack);
1578 } while ($node->tagName !== 'p');
1579
1580 } else {
1581 // parse error
1582 $this->emitToken(array(
1583 'name' => 'p',
1584 'type' => HTML5_Tokenizer::STARTTAG,
1585 ));
1586 $this->emitToken($token);
1587 }
1588 break;
1589
1590 /* An end tag whose tag name is "li" */
1591 case 'li':
1592 /* If the stack of open elements does not have an element
1593 * in list item scope with the same tag name as that of the
1594 * token, then this is a parse error; ignore the token. */
1595 if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
1596 /* Generate implied end tags, except for elements with the
1597 * same tag name as the token. */
1598 $this->generateImpliedEndTags(array($token['name']));
1599 /* If the current node is not an element with the same tag
1600 * name as that of the token, then this is a parse error. */
1601 // XERROR: parse error
1602 /* Pop elements from the stack of open elements until an
1603 * element with the same tag name as the token has been
1604 * popped from the stack. */
1605 do {
1606 $node = array_pop($this->stack);
1607 } while ($node->tagName !== $token['name']);
1608 } else {
1609 // XERROR: parse error
1610 }
1611 break;
1612
1613 /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
1614 case 'dc': case 'dd': case 'ds': case 'dt':
1615 if($this->elementInScope($token['name'])) {
1616 $this->generateImpliedEndTags(array($token['name']));
1617
1618 /* If the current node is not an element with the same
1619 tag name as the token, then this is a parse error. */
1620 // XERROR: implement parse error
1621
1622 /* Pop elements from the stack of open elements until
1623 * an element with the same tag name as the token has
1624 * been popped from the stack. */
1625 do {
1626 $node = array_pop($this->stack);
1627 } while ($node->tagName !== $token['name']);
1628
1629 } else {
1630 // XERROR: parse error
1631 }
1632 break;
1633
1634 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1635 "h5", "h6" */
1636 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1637 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1638
1639 /* If the stack of open elements has in scope an element whose
1640 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1641 generate implied end tags. */
1642 if($this->elementInScope($elements)) {
1643 $this->generateImpliedEndTags();
1644
1645 /* Now, if the current node is not an element with the same
1646 tag name as that of the token, then this is a parse error. */
1647 // XERROR: implement parse error
1648
1649 /* If the stack of open elements has in scope an element
1650 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1651 "h6", then pop elements from the stack until an element
1652 with one of those tag names has been popped from the stack. */
1653 do {
1654 $node = array_pop($this->stack);
1655 } while (!in_array($node->tagName, $elements));
1656 } else {
1657 // parse error
1658 }
1659 break;
1660
1661 /* An end tag whose tag name is one of: "a", "b", "big", "em",
1662 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1663 case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1664 case 'i': case 'nobr': case 's': case 'small': case 'strike':
1665 case 'strong': case 'tt': case 'u':
1666 // XERROR: generally speaking this needs parse error logic
1667 /* 1. Let the formatting element be the last element in
1668 the list of active formatting elements that:
1669 * is between the end of the list and the last scope
1670 marker in the list, if any, or the start of the list
1671 otherwise, and
1672 * has the same tag name as the token.
1673 */
1674 while(true) {
1675 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
1676 if($this->a_formatting[$a] === self::MARKER) {
1677 break;
1678
1679 } elseif($this->a_formatting[$a]->tagName === $token['name']) {
1680 $formatting_element = $this->a_formatting[$a];
1681 $in_stack = in_array($formatting_element, $this->stack, true);
1682 $fe_af_pos = $a;
1683 break;
1684 }
1685 }
1686
1687 /* If there is no such node, or, if that node is
1688 also in the stack of open elements but the element
1689 is not in scope, then this is a parse error. Abort
1690 these steps. The token is ignored. */
1691 if(!isset($formatting_element) || ($in_stack &&
1692 !$this->elementInScope($token['name']))) {
1693 $this->ignored = true;
1694 break;
1695
1696 /* Otherwise, if there is such a node, but that node
1697 is not in the stack of open elements, then this is a
1698 parse error; remove the element from the list, and
1699 abort these steps. */
1700 } elseif(isset($formatting_element) && !$in_stack) {
1701 unset($this->a_formatting[$fe_af_pos]);
1702 $this->a_formatting = array_merge($this->a_formatting);
1703 break;
1704 }
1705
1706 /* Otherwise, there is a formatting element and that
1707 * element is in the stack and is in scope. If the
1708 * element is not the current node, this is a parse
1709 * error. In any case, proceed with the algorithm as
1710 * written in the following steps. */
1711 // XERROR: implement me
1712
1713 /* 2. Let the furthest block be the topmost node in the
1714 stack of open elements that is lower in the stack
1715 than the formatting element, and is not an element in
1716 the phrasing or formatting categories. There might
1717 not be one. */
1718 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1719 $length = count($this->stack);
1720
1721 for($s = $fe_s_pos + 1; $s < $length; $s++) {
1722 $category = $this->getElementCategory($this->stack[$s]);
1723
1724 if($category !== self::PHRASING && $category !== self::FORMATTING) {
1725 $furthest_block = $this->stack[$s];
1726 break;
1727 }
1728 }
1729
1730 /* 3. If there is no furthest block, then the UA must
1731 skip the subsequent steps and instead just pop all
1732 the nodes from the bottom of the stack of open
1733 elements, from the current node up to the formatting
1734 element, and remove the formatting element from the
1735 list of active formatting elements. */
1736 if(!isset($furthest_block)) {
1737 for($n = $length - 1; $n >= $fe_s_pos; $n--) {
1738 array_pop($this->stack);
1739 }
1740
1741 unset($this->a_formatting[$fe_af_pos]);
1742 $this->a_formatting = array_merge($this->a_formatting);
1743 break;
1744 }
1745
1746 /* 4. Let the common ancestor be the element
1747 immediately above the formatting element in the stack
1748 of open elements. */
1749 $common_ancestor = $this->stack[$fe_s_pos - 1];
1750
1751 /* 5. Let a bookmark note the position of the
1752 formatting element in the list of active formatting
1753 elements relative to the elements on either side
1754 of it in the list. */
1755 $bookmark = $fe_af_pos;
1756
1757 /* 6. Let node and last node be the furthest block.
1758 Follow these steps: */
1759 $node = $furthest_block;
1760 $last_node = $furthest_block;
1761
1762 while(true) {
1763 for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
1764 /* 6.1 Let node be the element immediately
1765 prior to node in the stack of open elements. */
1766 $node = $this->stack[$n];
1767
1768 /* 6.2 If node is not in the list of active
1769 formatting elements, then remove node from
1770 the stack of open elements and then go back
1771 to step 1. */
1772 if(!in_array($node, $this->a_formatting, true)) {
1773 array_splice($this->stack, $n, 1);
1774
1775 } else {
1776 break;
1777 }
1778 }
1779
1780 /* 6.3 Otherwise, if node is the formatting
1781 element, then go to the next step in the overall
1782 algorithm. */
1783 if($node === $formatting_element) {
1784 break;
1785
1786 /* 6.4 Otherwise, if last node is the furthest
1787 block, then move the aforementioned bookmark to
1788 be immediately after the node in the list of
1789 active formatting elements. */
1790 } elseif($last_node === $furthest_block) {
1791 $bookmark = array_search($node, $this->a_formatting, true) + 1;
1792 }
1793
1794 /* 6.5 Create an element for the token for which
1795 * the element node was created, replace the entry
1796 * for node in the list of active formatting
1797 * elements with an entry for the new element,
1798 * replace the entry for node in the stack of open
1799 * elements with an entry for the new element, and
1800 * let node be the new element. */
1801 // we don't know what the token is anymore
1802 // XDOM
1803 $clone = $node->cloneNode();
1804 $a_pos = array_search($node, $this->a_formatting, true);
1805 $s_pos = array_search($node, $this->stack, true);
1806 $this->a_formatting[$a_pos] = $clone;
1807 $this->stack[$s_pos] = $clone;
1808 $node = $clone;
1809
1810 /* 6.6 Insert last node into node, first removing
1811 it from its previous parent node if any. */
1812 // XDOM
1813 if($last_node->parentNode !== null) {
1814 $last_node->parentNode->removeChild($last_node);
1815 }
1816
1817 // XDOM
1818 $node->appendChild($last_node);
1819
1820 /* 6.7 Let last node be node. */
1821 $last_node = $node;
1822
1823 /* 6.8 Return to step 1 of this inner set of steps. */
1824 }
1825
1826 /* 7. If the common ancestor node is a table, tbody,
1827 * tfoot, thead, or tr element, then, foster parent
1828 * whatever last node ended up being in the previous
1829 * step, first removing it from its previous parent
1830 * node if any. */
1831 // XDOM
1832 if ($last_node->parentNode) { // common step
1833 $last_node->parentNode->removeChild($last_node);
1834 }
1835 if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1836 $this->fosterParent($last_node);
1837 /* Otherwise, append whatever last node ended up being
1838 * in the previous step to the common ancestor node,
1839 * first removing it from its previous parent node if
1840 * any. */
1841 } else {
1842 // XDOM
1843 $common_ancestor->appendChild($last_node);
1844 }
1845
1846 /* 8. Create an element for the token for which the
1847 * formatting element was created. */
1848 // XDOM
1849 $clone = $formatting_element->cloneNode();
1850
1851 /* 9. Take all of the child nodes of the furthest
1852 block and append them to the element created in the
1853 last step. */
1854 // XDOM
1855 while($furthest_block->hasChildNodes()) {
1856 $child = $furthest_block->firstChild;
1857 $furthest_block->removeChild($child);
1858 $clone->appendChild($child);
1859 }
1860
1861 /* 10. Append that clone to the furthest block. */
1862 // XDOM
1863 $furthest_block->appendChild($clone);
1864
1865 /* 11. Remove the formatting element from the list
1866 of active formatting elements, and insert the new element
1867 into the list of active formatting elements at the
1868 position of the aforementioned bookmark. */
1869 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
1870 array_splice($this->a_formatting, $fe_af_pos, 1);
1871
1872 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
1873 $af_part2 = array_slice($this->a_formatting, $bookmark);
1874 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
1875
1876 /* 12. Remove the formatting element from the stack
1877 of open elements, and insert the new element into the stack
1878 of open elements immediately below the position of the
1879 furthest block in that stack. */
1880 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1881 array_splice($this->stack, $fe_s_pos, 1);
1882
1883 $fb_s_pos = array_search($furthest_block, $this->stack, true);
1884 $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
1885 $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
1886 $this->stack = array_merge($s_part1, array($clone), $s_part2);
1887
1888 /* 13. Jump back to step 1 in this series of steps. */
1889 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1890 }
1891 break;
1892
1893 case 'applet': case 'button': case 'marquee': case 'object':
1894 /* If the stack of open elements has an element in scope whose
1895 tag name matches the tag name of the token, then generate implied
1896 tags. */
1897 if($this->elementInScope($token['name'])) {
1898 $this->generateImpliedEndTags();
1899
1900 /* Now, if the current node is not an element with the same
1901 tag name as the token, then this is a parse error. */
1902 // XERROR: implement logic
1903
1904 /* Pop elements from the stack of open elements until
1905 * an element with the same tag name as the token has
1906 * been popped from the stack. */
1907 do {
1908 $node = array_pop($this->stack);
1909 } while ($node->tagName !== $token['name']);
1910
1911 /* Clear the list of active formatting elements up to the
1912 * last marker. */
1913 $keys = array_keys($this->a_formatting, self::MARKER, true);
1914 $marker = end($keys);
1915
1916 for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
1917 array_pop($this->a_formatting);
1918 }
1919 } else {
1920 // parse error
1921 }
1922 break;
1923
1924 case 'br':
1925 // Parse error
1926 $this->emitToken(array(
1927 'name' => 'br',
1928 'type' => HTML5_Tokenizer::STARTTAG,
1929 ));
1930 break;
1931
1932 /* An end tag token not covered by the previous entries */
1933 default:
1934 for($n = count($this->stack) - 1; $n >= 0; $n--) {
1935 /* Initialise node to be the current node (the bottommost
1936 node of the stack). */
1937 $node = $this->stack[$n];
1938
1939 /* If node has the same tag name as the end tag token,
1940 then: */
1941 if($token['name'] === $node->tagName) {
1942 /* Generate implied end tags. */
1943 $this->generateImpliedEndTags();
1944
1945 /* If the tag name of the end tag token does not
1946 match the tag name of the current node, this is a
1947 parse error. */
1948 // XERROR: implement this
1949
1950 /* Pop all the nodes from the current node up to
1951 node, including node, then stop these steps. */
1952 // XSKETCHY
1953 do {
1954 $pop = array_pop($this->stack);
1955 } while ($pop !== $node);
1956 break;
1957
1958 } else {
1959 $category = $this->getElementCategory($node);
1960
1961 if($category !== self::FORMATTING && $category !== self::PHRASING) {
1962 /* Otherwise, if node is in neither the formatting
1963 category nor the phrasing category, then this is a
1964 parse error. Stop this algorithm. The end tag token
1965 is ignored. */
1966 $this->ignored = true;
1967 break;
1968 // parse error
1969 }
1970 }
1971 /* Set node to the previous entry in the stack of open elements. Loop. */
1972 }
1973 break;
1974 }
1975 break;
1976 }
1977 break;
1978
1979 case self::IN_CDATA_RCDATA:
1980 if (
1981 $token['type'] === HTML5_Tokenizer::CHARACTER ||
1982 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
1983 ) {
1984 $this->insertText($token['data']);
1985 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
1986 // parse error
1987 /* If the current node is a script element, mark the script
1988 * element as "already executed". */
1989 // probably not necessary
1990 array_pop($this->stack);
1991 $this->mode = $this->original_mode;
1992 $this->emitToken($token);
1993 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
1994 array_pop($this->stack);
1995 $this->mode = $this->original_mode;
1996 // we're ignoring all of the execution stuff
1997 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
1998 array_pop($this->stack);
1999 $this->mode = $this->original_mode;
2000 }
2001 break;
2002
2003 case self::IN_TABLE:
2004 $clear = array('html', 'table');
2005
2006 /* A character token */
2007 if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2008 $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2009 /* Let the pending table character tokens
2010 * be an empty list of tokens. */
2011 $this->pendingTableCharacters = "";
2012 $this->pendingTableCharactersDirty = false;
2013 /* Let the original insertion mode be the current
2014 * insertion mode. */
2015 $this->original_mode = $this->mode;
2016 /* Switch the insertion mode to
2017 * "in table text" and
2018 * reprocess the token. */
2019 $this->mode = self::IN_TABLE_TEXT;
2020 $this->emitToken($token);
2021
2022 /* A comment token */
2023 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2024 /* Append a Comment node to the current node with the data
2025 attribute set to the data given in the comment token. */
2026 $this->insertComment($token['data']);
2027
2028 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2029 // parse error
2030
2031 /* A start tag whose tag name is "caption" */
2032 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2033 $token['name'] === 'caption') {
2034 /* Clear the stack back to a table context. */
2035 $this->clearStackToTableContext($clear);
2036
2037 /* Insert a marker at the end of the list of active
2038 formatting elements. */
2039 $this->a_formatting[] = self::MARKER;
2040
2041 /* Insert an HTML element for the token, then switch the
2042 insertion mode to "in caption". */
2043 $this->insertElement($token);
2044 $this->mode = self::IN_CAPTION;
2045
2046 /* A start tag whose tag name is "colgroup" */
2047 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2048 $token['name'] === 'colgroup') {
2049 /* Clear the stack back to a table context. */
2050 $this->clearStackToTableContext($clear);
2051
2052 /* Insert an HTML element for the token, then switch the
2053 insertion mode to "in column group". */
2054 $this->insertElement($token);
2055 $this->mode = self::IN_COLUMN_GROUP;
2056
2057 /* A start tag whose tag name is "col" */
2058 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2059 $token['name'] === 'col') {
2060 $this->emitToken(array(
2061 'name' => 'colgroup',
2062 'type' => HTML5_Tokenizer::STARTTAG,
2063 'attr' => array()
2064 ));
2065
2066 $this->emitToken($token);
2067
2068 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2069 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2070 array('tbody', 'tfoot', 'thead'))) {
2071 /* Clear the stack back to a table context. */
2072 $this->clearStackToTableContext($clear);
2073
2074 /* Insert an HTML element for the token, then switch the insertion
2075 mode to "in table body". */
2076 $this->insertElement($token);
2077 $this->mode = self::IN_TABLE_BODY;
2078
2079 /* A start tag whose tag name is one of: "td", "th", "tr" */
2080 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2081 in_array($token['name'], array('td', 'th', 'tr'))) {
2082 /* Act as if a start tag token with the tag name "tbody" had been
2083 seen, then reprocess the current token. */
2084 $this->emitToken(array(
2085 'name' => 'tbody',
2086 'type' => HTML5_Tokenizer::STARTTAG,
2087 'attr' => array()
2088 ));
2089
2090 $this->emitToken($token);
2091
2092 /* A start tag whose tag name is "table" */
2093 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2094 $token['name'] === 'table') {
2095 /* Parse error. Act as if an end tag token with the tag name "table"
2096 had been seen, then, if that token wasn't ignored, reprocess the
2097 current token. */
2098 $this->emitToken(array(
2099 'name' => 'table',
2100 'type' => HTML5_Tokenizer::ENDTAG
2101 ));
2102
2103 if (!$this->ignored) $this->emitToken($token);
2104
2105 /* An end tag whose tag name is "table" */
2106 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2107 $token['name'] === 'table') {
2108 /* If the stack of open elements does not have an element in table
2109 scope with the same tag name as the token, this is a parse error.
2110 Ignore the token. (fragment case) */
2111 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2112 $this->ignored = true;
2113
2114 /* Otherwise: */
2115 } else {
2116 do {
2117 $node = array_pop($this->stack);
2118 } while ($node->tagName !== 'table');
2119
2120 /* Reset the insertion mode appropriately. */
2121 $this->resetInsertionMode();
2122 }
2123
2124 /* An end tag whose tag name is one of: "body", "caption", "col",
2125 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2126 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2127 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2128 'tfoot', 'th', 'thead', 'tr'))) {
2129 // Parse error. Ignore the token.
2130
2131 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2132 ($token['name'] === 'style' || $token['name'] === 'script')) {
2133 $this->processWithRulesFor($token, self::IN_HEAD);
2134
2135 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
2136 // assignment is intentional
2137 /* If the token does not have an attribute with the name "type", or
2138 * if it does, but that attribute's value is not an ASCII
2139 * case-insensitive match for the string "hidden", then: act as
2140 * described in the "anything else" entry below. */
2141 ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2142 // I.e., if its an input with the type attribute == 'hidden'
2143 /* Otherwise */
2144 // parse error
2145 $this->insertElement($token);
2146 array_pop($this->stack);
2147 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2148 /* If the current node is not the root html element, then this is a parse error. */
2149 if (end($this->stack)->tagName !== 'html') {
2150 // Note: It can only be the current node in the fragment case.
2151 // parse error
2152 }
2153 /* Stop parsing. */
2154 /* Anything else */
2155 } else {
2156 /* Parse error. Process the token as if the insertion mode was "in
2157 body", with the following exception: */
2158
2159 $old = $this->foster_parent;
2160 $this->foster_parent = true;
2161 $this->processWithRulesFor($token, self::IN_BODY);
2162 $this->foster_parent = $old;
2163 }
2164 break;
2165
2166 case self::IN_TABLE_TEXT:
2167 /* A character token */
2168 if($token['type'] === HTML5_Tokenizer::CHARACTER) {
2169 /* Append the character token to the pending table
2170 * character tokens list. */
2171 $this->pendingTableCharacters .= $token['data'];
2172 $this->pendingTableCharactersDirty = true;
2173 } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2174 $this->pendingTableCharacters .= $token['data'];
2175 /* Anything else */
2176 } else {
2177 if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
2178 /* If any of the tokens in the pending table character tokens list
2179 * are character tokens that are not one of U+0009 CHARACTER
2180 * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2181 * U+0020 SPACE, then reprocess those character tokens using the
2182 * rules given in the "anything else" entry in the in table"
2183 * insertion mode.*/
2184 if ($this->pendingTableCharactersDirty) {
2185 /* Parse error. Process the token using the rules for the
2186 * "in body" insertion mode, except that if the current
2187 * node is a table, tbody, tfoot, thead, or tr element,
2188 * then, whenever a node would be inserted into the current
2189 * node, it must instead be foster parented. */
2190 // XERROR
2191 $old = $this->foster_parent;
2192 $this->foster_parent = true;
2193 $text_token = array(
2194 'type' => HTML5_Tokenizer::CHARACTER,
2195 'data' => $this->pendingTableCharacters,
2196 );
2197 $this->processWithRulesFor($text_token, self::IN_BODY);
2198 $this->foster_parent = $old;
2199
2200 /* Otherwise, insert the characters given by the pending table
2201 * character tokens list into the current node. */
2202 } else {
2203 $this->insertText($this->pendingTableCharacters);
2204 }
2205 $this->pendingTableCharacters = null;
2206 $this->pendingTableCharactersNull = null;
2207 }
2208
2209 /* Switch the insertion mode to the original insertion mode and
2210 * reprocess the token.
2211 */
2212 $this->mode = $this->original_mode;
2213 $this->emitToken($token);
2214 }
2215 break;
2216
2217 case self::IN_CAPTION:
2218 /* An end tag whose tag name is "caption" */
2219 if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
2220 /* If the stack of open elements does not have an element in table
2221 scope with the same tag name as the token, this is a parse error.
2222 Ignore the token. (fragment case) */
2223 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2224 $this->ignored = true;
2225 // Ignore
2226
2227 /* Otherwise: */
2228 } else {
2229 /* Generate implied end tags. */
2230 $this->generateImpliedEndTags();
2231
2232 /* Now, if the current node is not a caption element, then this
2233 is a parse error. */
2234 // XERROR: implement
2235
2236 /* Pop elements from this stack until a caption element has
2237 been popped from the stack. */
2238 do {
2239 $node = array_pop($this->stack);
2240 } while ($node->tagName !== 'caption');
2241
2242 /* Clear the list of active formatting elements up to the last
2243 marker. */
2244 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2245
2246 /* Switch the insertion mode to "in table". */
2247 $this->mode = self::IN_TABLE;
2248 }
2249
2250 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2251 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2252 name is "table" */
2253 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2254 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2255 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2256 $token['name'] === 'table')) {
2257 /* Parse error. Act as if an end tag with the tag name "caption"
2258 had been seen, then, if that token wasn't ignored, reprocess the
2259 current token. */
2260 $this->emitToken(array(
2261 'name' => 'caption',
2262 'type' => HTML5_Tokenizer::ENDTAG
2263 ));
2264
2265 if (!$this->ignored) $this->emitToken($token);
2266
2267 /* An end tag whose tag name is one of: "body", "col", "colgroup",
2268 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2269 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2270 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2271 'thead', 'tr'))) {
2272 // Parse error. Ignore the token.
2273 $this->ignored = true;
2274
2275 /* Anything else */
2276 } else {
2277 /* Process the token as if the insertion mode was "in body". */
2278 $this->processWithRulesFor($token, self::IN_BODY);
2279 }
2280 break;
2281
2282 case self::IN_COLUMN_GROUP:
2283 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2284 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2285 or U+0020 SPACE */
2286 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2287 /* Append the character to the current node. */
2288 $this->insertText($token['data']);
2289
2290 /* A comment token */
2291 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2292 /* Append a Comment node to the current node with the data
2293 attribute set to the data given in the comment token. */
2294 $this->insertToken($token['data']);
2295
2296 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2297 // parse error
2298
2299 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2300 $this->processWithRulesFor($token, self::IN_BODY);
2301
2302 /* A start tag whose tag name is "col" */
2303 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
2304 /* Insert a col element for the token. Immediately pop the current
2305 node off the stack of open elements. */
2306 $this->insertElement($token);
2307 array_pop($this->stack);
2308 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2309
2310 /* An end tag whose tag name is "colgroup" */
2311 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2312 $token['name'] === 'colgroup') {
2313 /* If the current node is the root html element, then this is a
2314 parse error, ignore the token. (fragment case) */
2315 if(end($this->stack)->tagName === 'html') {
2316 $this->ignored = true;
2317
2318 /* Otherwise, pop the current node (which will be a colgroup
2319 element) from the stack of open elements. Switch the insertion
2320 mode to "in table". */
2321 } else {
2322 array_pop($this->stack);
2323 $this->mode = self::IN_TABLE;
2324 }
2325
2326 /* An end tag whose tag name is "col" */
2327 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
2328 /* Parse error. Ignore the token. */
2329 $this->ignored = true;
2330
2331 /* An end-of-file token */
2332 /* If the current node is the root html element */
2333 } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
2334 /* Stop parsing */
2335
2336 /* Anything else */
2337 } else {
2338 /* Act as if an end tag with the tag name "colgroup" had been seen,
2339 and then, if that token wasn't ignored, reprocess the current token. */
2340 $this->emitToken(array(
2341 'name' => 'colgroup',
2342 'type' => HTML5_Tokenizer::ENDTAG
2343 ));
2344
2345 if (!$this->ignored) $this->emitToken($token);
2346 }
2347 break;
2348
2349 case self::IN_TABLE_BODY:
2350 $clear = array('tbody', 'tfoot', 'thead', 'html');
2351
2352 /* A start tag whose tag name is "tr" */
2353 if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
2354 /* Clear the stack back to a table body context. */
2355 $this->clearStackToTableContext($clear);
2356
2357 /* Insert a tr element for the token, then switch the insertion
2358 mode to "in row". */
2359 $this->insertElement($token);
2360 $this->mode = self::IN_ROW;
2361
2362 /* A start tag whose tag name is one of: "th", "td" */
2363 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2364 ($token['name'] === 'th' || $token['name'] === 'td')) {
2365 /* Parse error. Act as if a start tag with the tag name "tr" had
2366 been seen, then reprocess the current token. */
2367 $this->emitToken(array(
2368 'name' => 'tr',
2369 'type' => HTML5_Tokenizer::STARTTAG,
2370 'attr' => array()
2371 ));
2372
2373 $this->emitToken($token);
2374
2375 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2376 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2377 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2378 /* If the stack of open elements does not have an element in table
2379 scope with the same tag name as the token, this is a parse error.
2380 Ignore the token. */
2381 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2382 // Parse error
2383 $this->ignored = true;
2384
2385 /* Otherwise: */
2386 } else {
2387 /* Clear the stack back to a table body context. */
2388 $this->clearStackToTableContext($clear);
2389
2390 /* Pop the current node from the stack of open elements. Switch
2391 the insertion mode to "in table". */
2392 array_pop($this->stack);
2393 $this->mode = self::IN_TABLE;
2394 }
2395
2396 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2397 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2398 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2399 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2400 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2401 /* If the stack of open elements does not have a tbody, thead, or
2402 tfoot element in table scope, this is a parse error. Ignore the
2403 token. (fragment case) */
2404 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
2405 // parse error
2406 $this->ignored = true;
2407
2408 /* Otherwise: */
2409 } else {
2410 /* Clear the stack back to a table body context. */
2411 $this->clearStackToTableContext($clear);
2412
2413 /* Act as if an end tag with the same tag name as the current
2414 node ("tbody", "tfoot", or "thead") had been seen, then
2415 reprocess the current token. */
2416 $this->emitToken(array(
2417 'name' => end($this->stack)->tagName,
2418 'type' => HTML5_Tokenizer::ENDTAG
2419 ));
2420
2421 $this->emitToken($token);
2422 }
2423
2424 /* An end tag whose tag name is one of: "body", "caption", "col",
2425 "colgroup", "html", "td", "th", "tr" */
2426 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2427 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2428 /* Parse error. Ignore the token. */
2429 $this->ignored = true;
2430
2431 /* Anything else */
2432 } else {
2433 /* Process the token as if the insertion mode was "in table". */
2434 $this->processWithRulesFor($token, self::IN_TABLE);
2435 }
2436 break;
2437
2438 case self::IN_ROW:
2439 $clear = array('tr', 'html');
2440
2441 /* A start tag whose tag name is one of: "th", "td" */
2442 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2443 ($token['name'] === 'th' || $token['name'] === 'td')) {
2444 /* Clear the stack back to a table row context. */
2445 $this->clearStackToTableContext($clear);
2446
2447 /* Insert an HTML element for the token, then switch the insertion
2448 mode to "in cell". */
2449 $this->insertElement($token);
2450 $this->mode = self::IN_CELL;
2451
2452 /* Insert a marker at the end of the list of active formatting
2453 elements. */
2454 $this->a_formatting[] = self::MARKER;
2455
2456 /* An end tag whose tag name is "tr" */
2457 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
2458 /* If the stack of open elements does not have an element in table
2459 scope with the same tag name as the token, this is a parse error.
2460 Ignore the token. (fragment case) */
2461 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2462 // Ignore.
2463 $this->ignored = true;
2464
2465 /* Otherwise: */
2466 } else {
2467 /* Clear the stack back to a table row context. */
2468 $this->clearStackToTableContext($clear);
2469
2470 /* Pop the current node (which will be a tr element) from the
2471 stack of open elements. Switch the insertion mode to "in table
2472 body". */
2473 array_pop($this->stack);
2474 $this->mode = self::IN_TABLE_BODY;
2475 }
2476
2477 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2478 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2479 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2480 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2481 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2482 /* Act as if an end tag with the tag name "tr" had been seen, then,
2483 if that token wasn't ignored, reprocess the current token. */
2484 $this->emitToken(array(
2485 'name' => 'tr',
2486 'type' => HTML5_Tokenizer::ENDTAG
2487 ));
2488 if (!$this->ignored) $this->emitToken($token);
2489
2490 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2491 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2492 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2493 /* If the stack of open elements does not have an element in table
2494 scope with the same tag name as the token, this is a parse error.
2495 Ignore the token. */
2496 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2497 $this->ignored = true;
2498
2499 /* Otherwise: */
2500 } else {
2501 /* Otherwise, act as if an end tag with the tag name "tr" had
2502 been seen, then reprocess the current token. */
2503 $this->emitToken(array(
2504 'name' => 'tr',
2505 'type' => HTML5_Tokenizer::ENDTAG
2506 ));
2507
2508 $this->emitToken($token);
2509 }
2510
2511 /* An end tag whose tag name is one of: "body", "caption", "col",
2512 "colgroup", "html", "td", "th" */
2513 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2514 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2515 /* Parse error. Ignore the token. */
2516 $this->ignored = true;
2517
2518 /* Anything else */
2519 } else {
2520 /* Process the token as if the insertion mode was "in table". */
2521 $this->processWithRulesFor($token, self::IN_TABLE);
2522 }
2523 break;
2524
2525 case self::IN_CELL:
2526 /* An end tag whose tag name is one of: "td", "th" */
2527 if($token['type'] === HTML5_Tokenizer::ENDTAG &&
2528 ($token['name'] === 'td' || $token['name'] === 'th')) {
2529 /* If the stack of open elements does not have an element in table
2530 scope with the same tag name as that of the token, then this is a
2531 parse error and the token must be ignored. */
2532 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2533 $this->ignored = true;
2534
2535 /* Otherwise: */
2536 } else {
2537 /* Generate implied end tags, except for elements with the same
2538 tag name as the token. */
2539 $this->generateImpliedEndTags(array($token['name']));
2540
2541 /* Now, if the current node is not an element with the same tag
2542 name as the token, then this is a parse error. */
2543 // XERROR: Implement parse error code
2544
2545 /* Pop elements from this stack until an element with the same
2546 tag name as the token has been popped from the stack. */
2547 do {
2548 $node = array_pop($this->stack);
2549 } while ($node->tagName !== $token['name']);
2550
2551 /* Clear the list of active formatting elements up to the last
2552 marker. */
2553 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2554
2555 /* Switch the insertion mode to "in row". (The current node
2556 will be a tr element at this point.) */
2557 $this->mode = self::IN_ROW;
2558 }
2559
2560 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2561 "tbody", "td", "tfoot", "th", "thead", "tr" */
2562 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2563 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2564 'thead', 'tr'))) {
2565 /* If the stack of open elements does not have a td or th element
2566 in table scope, then this is a parse error; ignore the token.
2567 (fragment case) */
2568 if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
2569 // parse error
2570 $this->ignored = true;
2571
2572 /* Otherwise, close the cell (see below) and reprocess the current
2573 token. */
2574 } else {
2575 $this->closeCell();
2576 $this->emitToken($token);
2577 }
2578
2579 /* An end tag whose tag name is one of: "body", "caption", "col",
2580 "colgroup", "html" */
2581 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2582 array('body', 'caption', 'col', 'colgroup', 'html'))) {
2583 /* Parse error. Ignore the token. */
2584 $this->ignored = true;
2585
2586 /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2587 "thead", "tr" */
2588 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2589 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2590 /* If the stack of open elements does not have a td or th element
2591 in table scope, then this is a parse error; ignore the token.
2592 (innerHTML case) */
2593 if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
2594 // Parse error
2595 $this->ignored = true;
2596
2597 /* Otherwise, close the cell (see below) and reprocess the current
2598 token. */
2599 } else {
2600 $this->closeCell();
2601 $this->emitToken($token);
2602 }
2603
2604 /* Anything else */
2605 } else {
2606 /* Process the token as if the insertion mode was "in body". */
2607 $this->processWithRulesFor($token, self::IN_BODY);
2608 }
2609 break;
2610
2611 case self::IN_SELECT:
2612 /* Handle the token as follows: */
2613
2614 /* A character token */
2615 if(
2616 $token['type'] === HTML5_Tokenizer::CHARACTER ||
2617 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2618 ) {
2619 /* Append the token's character to the current node. */
2620 $this->insertText($token['data']);
2621
2622 /* A comment token */
2623 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2624 /* Append a Comment node to the current node with the data
2625 attribute set to the data given in the comment token. */
2626 $this->insertComment($token['data']);
2627
2628 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2629 // parse error
2630
2631 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2632 $this->processWithRulesFor($token, self::INBODY);
2633
2634 /* A start tag token whose tag name is "option" */
2635 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2636 $token['name'] === 'option') {
2637 /* If the current node is an option element, act as if an end tag
2638 with the tag name "option" had been seen. */
2639 if(end($this->stack)->tagName === 'option') {
2640 $this->emitToken(array(
2641 'name' => 'option',
2642 'type' => HTML5_Tokenizer::ENDTAG
2643 ));
2644 }
2645
2646 /* Insert an HTML element for the token. */
2647 $this->insertElement($token);
2648
2649 /* A start tag token whose tag name is "optgroup" */
2650 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2651 $token['name'] === 'optgroup') {
2652 /* If the current node is an option element, act as if an end tag
2653 with the tag name "option" had been seen. */
2654 if(end($this->stack)->tagName === 'option') {
2655 $this->emitToken(array(
2656 'name' => 'option',
2657 'type' => HTML5_Tokenizer::ENDTAG
2658 ));
2659 }
2660
2661 /* If the current node is an optgroup element, act as if an end tag
2662 with the tag name "optgroup" had been seen. */
2663 if(end($this->stack)->tagName === 'optgroup') {
2664 $this->emitToken(array(
2665 'name' => 'optgroup',
2666 'type' => HTML5_Tokenizer::ENDTAG
2667 ));
2668 }
2669
2670 /* Insert an HTML element for the token. */
2671 $this->insertElement($token);
2672
2673 /* An end tag token whose tag name is "optgroup" */
2674 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2675 $token['name'] === 'optgroup') {
2676 /* First, if the current node is an option element, and the node
2677 immediately before it in the stack of open elements is an optgroup
2678 element, then act as if an end tag with the tag name "option" had
2679 been seen. */
2680 $elements_in_stack = count($this->stack);
2681
2682 if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
2683 $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
2684 $this->emitToken(array(
2685 'name' => 'option',
2686 'type' => HTML5_Tokenizer::ENDTAG
2687 ));
2688 }
2689
2690 /* If the current node is an optgroup element, then pop that node
2691 from the stack of open elements. Otherwise, this is a parse error,
2692 ignore the token. */
2693 if(end($this->stack)->tagName === 'optgroup') {
2694 array_pop($this->stack);
2695 } else {
2696 // parse error
2697 $this->ignored = true;
2698 }
2699
2700 /* An end tag token whose tag name is "option" */
2701 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2702 $token['name'] === 'option') {
2703 /* If the current node is an option element, then pop that node
2704 from the stack of open elements. Otherwise, this is a parse error,
2705 ignore the token. */
2706 if(end($this->stack)->tagName === 'option') {
2707 array_pop($this->stack);
2708 } else {
2709 // parse error
2710 $this->ignored = true;
2711 }
2712
2713 /* An end tag whose tag name is "select" */
2714 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2715 $token['name'] === 'select') {
2716 /* If the stack of open elements does not have an element in table
2717 scope with the same tag name as the token, this is a parse error.
2718 Ignore the token. (fragment case) */
2719 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2720 $this->ignored = true;
2721 // parse error
2722
2723 /* Otherwise: */
2724 } else {
2725 /* Pop elements from the stack of open elements until a select
2726 element has been popped from the stack. */
2727 do {
2728 $node = array_pop($this->stack);
2729 } while ($node->tagName !== 'select');
2730
2731 /* Reset the insertion mode appropriately. */
2732 $this->resetInsertionMode();
2733 }
2734
2735 /* A start tag whose tag name is "select" */
2736 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
2737 /* Parse error. Act as if the token had been an end tag with the
2738 tag name "select" instead. */
2739 $this->emitToken(array(
2740 'name' => 'select',
2741 'type' => HTML5_Tokenizer::ENDTAG
2742 ));
2743
2744 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2745 ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) {
2746 // parse error
2747 $this->emitToken(array(
2748 'name' => 'select',
2749 'type' => HTML5_Tokenizer::ENDTAG
2750 ));
2751 $this->emitToken($token);
2752
2753 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
2754 $this->processWithRulesFor($token, self::IN_HEAD);
2755
2756 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2757 // XERROR: If the current node is not the root html element, then this is a parse error.
2758 /* Stop parsing */
2759
2760 /* Anything else */
2761 } else {
2762 /* Parse error. Ignore the token. */
2763 $this->ignored = true;
2764 }
2765 break;
2766
2767 case self::IN_SELECT_IN_TABLE:
2768
2769 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2770 in_array($token['name'], array('caption', 'table', 'tbody',
2771 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2772 // parse error
2773 $this->emitToken(array(
2774 'name' => 'select',
2775 'type' => HTML5_Tokenizer::ENDTAG,
2776 ));
2777 $this->emitToken($token);
2778
2779 /* An end tag whose tag name is one of: "caption", "table", "tbody",
2780 "tfoot", "thead", "tr", "td", "th" */
2781 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2782 in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2783 /* Parse error. */
2784 // parse error
2785
2786 /* If the stack of open elements has an element in table scope with
2787 the same tag name as that of the token, then act as if an end tag
2788 with the tag name "select" had been seen, and reprocess the token.
2789 Otherwise, ignore the token. */
2790 if($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2791 $this->emitToken(array(
2792 'name' => 'select',
2793 'type' => HTML5_Tokenizer::ENDTAG
2794 ));
2795
2796 $this->emitToken($token);
2797 } else {
2798 $this->ignored = true;
2799 }
2800 } else {
2801 $this->processWithRulesFor($token, self::IN_SELECT);
2802 }
2803 break;
2804
2805 case self::IN_FOREIGN_CONTENT:
2806 if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2807 $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2808 $this->insertText($token['data']);
2809 } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2810 $this->insertComment($token['data']);
2811 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2812 // XERROR: parse error
2813 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2814 $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
2815 // XDOM
2816 end($this->stack)->namespaceURI === self::NS_SVG) {
2817 array_pop($this->stack);
2818 // a bunch of script running mumbo jumbo
2819 } elseif (
2820 ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2821 ((
2822 $token['name'] !== 'mglyph' &&
2823 $token['name'] !== 'malignmark' &&
2824 // XDOM
2825 end($this->stack)->namespaceURI === self::NS_MATHML &&
2826 in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2827 ) ||
2828 (
2829 $token['name'] === 'svg' &&
2830 // XDOM
2831 end($this->stack)->namespaceURI === self::NS_MATHML &&
2832 end($this->stack)->tagName === 'annotation-xml'
2833 ) ||
2834 (
2835 // XDOM
2836 end($this->stack)->namespaceURI === self::NS_SVG &&
2837 in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
2838 ) ||
2839 (
2840 // XSKETCHY && XDOM
2841 end($this->stack)->namespaceURI === self::NS_HTML
2842 ))
2843 ) || $token['type'] === HTML5_Tokenizer::ENDTAG
2844 ) {
2845 $this->processWithRulesFor($token, $this->secondary_mode);
2846 /* If, after doing so, the insertion mode is still "in foreign
2847 * content", but there is no element in scope that has a namespace
2848 * other than the HTML namespace, switch the insertion mode to the
2849 * secondary insertion mode. */
2850 if ($this->mode === self::IN_FOREIGN_CONTENT) {
2851 $found = false;
2852 // this basically duplicates elementInScope()
2853 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
2854 // XDOM
2855 $node = $this->stack[$i];
2856 if ($node->namespaceURI !== self::NS_HTML) {
2857 $found = true;
2858 break;
2859 } elseif (in_array($node->tagName, array('table', 'html',
2860 'applet', 'caption', 'td', 'th', 'button', 'marquee',
2861 'object')) || ($node->tagName === 'foreignObject' &&
2862 $node->namespaceURI === self::NS_SVG)) {
2863 break;
2864 }
2865 }
2866 if (!$found) {
2867 $this->mode = $this->secondary_mode;
2868 }
2869 }
2870 } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
2871 $token['type'] === HTML5_Tokenizer::STARTTAG &&
2872 (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
2873 "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
2874 "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
2875 "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
2876 "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
2877 "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2878 $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2879 // XERROR: parse error
2880 do {
2881 $node = array_pop($this->stack);
2882 // XDOM
2883 } while ($node->namespaceURI !== self::NS_HTML);
2884 $this->stack[] = $node;
2885 $this->mode = $this->secondary_mode;
2886 $this->emitToken($token);
2887 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
2888 static $svg_lookup = array(
2889 'altglyph' => 'altGlyph',
2890 'altglyphdef' => 'altGlyphDef',
2891 'altglyphitem' => 'altGlyphItem',
2892 'animatecolor' => 'animateColor',
2893 'animatemotion' => 'animateMotion',
2894 'animatetransform' => 'animateTransform',
2895 'clippath' => 'clipPath',
2896 'feblend' => 'feBlend',
2897 'fecolormatrix' => 'feColorMatrix',
2898 'fecomponenttransfer' => 'feComponentTransfer',
2899 'fecomposite' => 'feComposite',
2900 'feconvolvematrix' => 'feConvolveMatrix',
2901 'fediffuselighting' => 'feDiffuseLighting',
2902 'fedisplacementmap' => 'feDisplacementMap',
2903 'fedistantlight' => 'feDistantLight',
2904 'feflood' => 'feFlood',
2905 'fefunca' => 'feFuncA',
2906 'fefuncb' => 'feFuncB',
2907 'fefuncg' => 'feFuncG',
2908 'fefuncr' => 'feFuncR',
2909 'fegaussianblur' => 'feGaussianBlur',
2910 'feimage' => 'feImage',
2911 'femerge' => 'feMerge',
2912 'femergenode' => 'feMergeNode',
2913 'femorphology' => 'feMorphology',
2914 'feoffset' => 'feOffset',
2915 'fepointlight' => 'fePointLight',
2916 'fespecularlighting' => 'feSpecularLighting',
2917 'fespotlight' => 'feSpotLight',
2918 'fetile' => 'feTile',
2919 'feturbulence' => 'feTurbulence',
2920 'foreignobject' => 'foreignObject',
2921 'glyphref' => 'glyphRef',
2922 'lineargradient' => 'linearGradient',
2923 'radialgradient' => 'radialGradient',
2924 'textpath' => 'textPath',
2925 );
2926 // XDOM
2927 $current = end($this->stack);
2928 if ($current->namespaceURI === self::NS_MATHML) {
2929 $token = $this->adjustMathMLAttributes($token);
2930 }
2931 if ($current->namespaceURI === self::NS_SVG &&
2932 isset($svg_lookup[$token['name']])) {
2933 $token['name'] = $svg_lookup[$token['name']];
2934 }
2935 if ($current->namespaceURI === self::NS_SVG) {
2936 $token = $this->adjustSVGAttributes($token);
2937 }
2938 $token = $this->adjustForeignAttributes($token);
2939 $this->insertForeignElement($token, $current->namespaceURI);
2940 if (isset($token['self-closing'])) {
2941 array_pop($this->stack);
2942 // XERROR: acknowledge self-closing flag
2943 }
2944 }
2945 break;
2946
2947 case self::AFTER_BODY:
2948 /* Handle the token as follows: */
2949
2950 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2951 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2952 or U+0020 SPACE */
2953 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2954 /* Process the token as it would be processed if the insertion mode
2955 was "in body". */
2956 $this->processWithRulesFor($token, self::IN_BODY);
2957
2958 /* A comment token */
2959 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2960 /* Append a Comment node to the first element in the stack of open
2961 elements (the html element), with the data attribute set to the
2962 data given in the comment token. */
2963 // XDOM
2964 $comment = $this->dom->createComment($token['data']);
2965 $this->stack[0]->appendChild($comment);
2966
2967 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2968 // parse error
2969
2970 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2971 $this->processWithRulesFor($token, self::IN_BODY);
2972
2973 /* An end tag with the tag name "html" */
2974 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
2975 /* If the parser was originally created as part of the HTML
2976 * fragment parsing algorithm, this is a parse error; ignore
2977 * the token. (fragment case) */
2978 $this->ignored = true;
2979 // XERROR: implement this
2980
2981 $this->mode = self::AFTER_AFTER_BODY;
2982
2983 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2984 /* Stop parsing */
2985
2986 /* Anything else */
2987 } else {
2988 /* Parse error. Set the insertion mode to "in body" and reprocess
2989 the token. */
2990 $this->mode = self::IN_BODY;
2991 $this->emitToken($token);
2992 }
2993 break;
2994
2995 case self::IN_FRAMESET:
2996 /* Handle the token as follows: */
2997
2998 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2999 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3000 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3001 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3002 /* Append the character to the current node. */
3003 $this->insertText($token['data']);
3004
3005 /* A comment token */
3006 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
3007 /* Append a Comment node to the current node with the data
3008 attribute set to the data given in the comment token. */
3009 $this->insertComment($token['data']);
3010
3011 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3012 // parse error
3013
3014 /* A start tag with the tag name "frameset" */
3015 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3016 $token['name'] === 'frameset') {
3017 $this->insertElement($token);
3018
3019 /* An end tag with the tag name "frameset" */
3020 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
3021 $token['name'] === 'frameset') {
3022 /* If the current node is the root html element, then this is a
3023 parse error; ignore the token. (fragment case) */
3024 if(end($this->stack)->tagName === 'html') {
3025 $this->ignored = true;
3026 // Parse error
3027
3028 } else {
3029 /* Otherwise, pop the current node from the stack of open
3030 elements. */
3031 array_pop($this->stack);
3032
3033 /* If the parser was not originally created as part of the HTML
3034 * fragment parsing algorithm (fragment case), and the current
3035 * node is no longer a frameset element, then switch the
3036 * insertion mode to "after frameset". */
3037 $this->mode = self::AFTER_FRAMESET;
3038 }
3039
3040 /* A start tag with the tag name "frame" */
3041 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3042 $token['name'] === 'frame') {
3043 /* Insert an HTML element for the token. */
3044 $this->insertElement($token);
3045
3046 /* Immediately pop the current node off the stack of open elements. */
3047 array_pop($this->stack);
3048
3049 // XERROR: Acknowledge the token's self-closing flag, if it is set.
3050
3051 /* A start tag with the tag name "noframes" */
3052 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3053 $token['name'] === 'noframes') {
3054 /* Process the token using the rules for the "in head" insertion mode. */
3055 $this->processwithRulesFor($token, self::IN_HEAD);
3056
3057 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3058 // XERROR: If the current node is not the root html element, then this is a parse error.
3059 /* Stop parsing */
3060 /* Anything else */
3061 } else {
3062 /* Parse error. Ignore the token. */
3063 $this->ignored = true;
3064 }
3065 break;
3066
3067 case self::AFTER_FRAMESET:
3068 /* Handle the token as follows: */
3069
3070 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3071 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3072 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3073 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3074 /* Append the character to the current node. */
3075 $this->insertText($token['data']);
3076
3077 /* A comment token */
3078 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
3079 /* Append a Comment node to the current node with the data
3080 attribute set to the data given in the comment token. */
3081 $this->insertComment($token['data']);
3082
3083 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3084 // parse error
3085
3086 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
3087 $this->processWithRulesFor($token, self::IN_BODY);
3088
3089 /* An end tag with the tag name "html" */
3090 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
3091 $token['name'] === 'html') {
3092 $this->mode = self::AFTER_AFTER_FRAMESET;
3093
3094 /* A start tag with the tag name "noframes" */
3095 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3096 $token['name'] === 'noframes') {
3097 $this->processWithRulesFor($token, self::IN_HEAD);
3098
3099 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3100 /* Stop parsing */
3101
3102 /* Anything else */
3103 } else {
3104 /* Parse error. Ignore the token. */
3105 $this->ignored = true;
3106 }
3107 break;
3108
3109 case self::AFTER_AFTER_BODY:
3110 /* A comment token */
3111 if($token['type'] === HTML5_Tokenizer::COMMENT) {
3112 /* Append a Comment node to the Document object with the data
3113 attribute set to the data given in the comment token. */
3114 // XDOM
3115 $comment = $this->dom->createComment($token['data']);
3116 $this->dom->appendChild($comment);
3117
3118 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3119 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3120 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3121 $this->processWithRulesFor($token, self::IN_BODY);
3122
3123 /* An end-of-file token */
3124 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3125 /* OMG DONE!! */
3126 } else {
3127 // parse error
3128 $this->mode = self::IN_BODY;
3129 $this->emitToken($token);
3130 }
3131 break;
3132
3133 case self::AFTER_AFTER_FRAMESET:
3134 /* A comment token */
3135 if($token['type'] === HTML5_Tokenizer::COMMENT) {
3136 /* Append a Comment node to the Document object with the data
3137 attribute set to the data given in the comment token. */
3138 // XDOM
3139 $comment = $this->dom->createComment($token['data']);
3140 $this->dom->appendChild($comment);
3141
3142 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3143 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3144 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3145 $this->processWithRulesFor($token, self::IN_BODY);
3146
3147 /* An end-of-file token */
3148 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3149 /* OMG DONE!! */
3150 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
3151 $this->processWithRulesFor($token, self::IN_HEAD);
3152 } else {
3153 // parse error
3154 }
3155 break;
3156 }
3157 // end funky indenting
3158 }
3159
3160 private function insertElement($token, $append = true) {
3ec62cf9
MR
3161 //$el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3162 $namespaceURI = strpos($token['name'], ':') ? self::NS_XHTML : self::NS_HTML;
3163 $el = $this->dom->createElementNS($namespaceURI, $token['name']);
42c80841
NL
3164
3165 if (!empty($token['attr'])) {
3166 foreach($token['attr'] as $attr) {
3ec62cf9
MR
3167
3168 // mike@macgirvin.com 2011-11-17, check attribute name for
3169 // validity (ignoring extenders and combiners) as illegal chars in names
3170 // causes everything to abort
3171
3172 $valid = preg_match('/^[a-zA-Z\_\:]([\-a-zA-Z0-9\_\:\.]+$)/',$attr['name']);
3173 if($attr['name'] && (!$el->hasAttribute($attr['name'])) && ($valid)) {
42c80841
NL
3174 $el->setAttribute($attr['name'], $attr['value']);
3175 }
3176 }
3177 }
3178 if ($append) {
3179 $this->appendToRealParent($el);
3180 $this->stack[] = $el;
3181 }
3182
3183 return $el;
3184 }
3185
3186 private function insertText($data) {
3187 if ($data === '') return;
3188 if ($this->ignore_lf_token) {
3189 if ($data[0] === "\n") {
3190 $data = substr($data, 1);
3191 if ($data === false) return;
3192 }
3193 }
3194 $text = $this->dom->createTextNode($data);
3195 $this->appendToRealParent($text);
3196 }
3197
3198 private function insertComment($data) {
3199 $comment = $this->dom->createComment($data);
3200 $this->appendToRealParent($comment);
3201 }
3202
3203 private function appendToRealParent($node) {
3204 // this is only for the foster_parent case
3205 /* If the current node is a table, tbody, tfoot, thead, or tr
3206 element, then, whenever a node would be inserted into the current
3207 node, it must instead be inserted into the foster parent element. */
3208 if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
3209 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3210 end($this->stack)->appendChild($node);
3211 } else {
3212 $this->fosterParent($node);
3213 }
3214 }
3215
3216 private function elementInScope($el, $scope = self::SCOPE) {
3217 if(is_array($el)) {
3218 foreach($el as $element) {
3219 if($this->elementInScope($element, $scope)) {
3220 return true;
3221 }
3222 }
3223
3224 return false;
3225 }
3226
3227 $leng = count($this->stack);
3228
3229 for($n = 0; $n < $leng; $n++) {
3230 /* 1. Initialise node to be the current node (the bottommost node of
3231 the stack). */
3232 $node = $this->stack[$leng - 1 - $n];
3233
3234 if($node->tagName === $el) {
3235 /* 2. If node is the target node, terminate in a match state. */
3236 return true;
3237
3238 // We've expanded the logic for these states a little differently;
3239 // Hixie's refactoring into "specific scope" is more general, but
3240 // this "gets the job done"
3241
3242 // these are the common states for all scopes
3243 } elseif($node->tagName === 'table' || $node->tagName === 'html') {
3244 return false;
3245
3246 // these are valid for "in scope" and "in list item scope"
3247 } elseif($scope !== self::SCOPE_TABLE &&
3248 (in_array($node->tagName, array('applet', 'caption', 'td',
3249 'th', 'button', 'marquee', 'object')) ||
3250 $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
3251 return false;
3252
3253
3254 // these are valid for "in list item scope"
3255 } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
3256 return false;
3257 }
3258
3259 /* Otherwise, set node to the previous entry in the stack of open
3260 elements and return to step 2. (This will never fail, since the loop
3261 will always terminate in the previous step if the top of the stack
3262 is reached.) */
3263 }
3264 }
3265
3266 private function reconstructActiveFormattingElements() {
3267 /* 1. If there are no entries in the list of active formatting elements,
3268 then there is nothing to reconstruct; stop this algorithm. */
3269 $formatting_elements = count($this->a_formatting);
3270
3271 if($formatting_elements === 0) {
3272 return false;
3273 }
3274
3275 /* 3. Let entry be the last (most recently added) element in the list
3276 of active formatting elements. */
3277 $entry = end($this->a_formatting);
3278
3279 /* 2. If the last (most recently added) entry in the list of active
3280 formatting elements is a marker, or if it is an element that is in the
3281 stack of open elements, then there is nothing to reconstruct; stop this
3282 algorithm. */
3283 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3284 return false;
3285 }
3286
3287 for($a = $formatting_elements - 1; $a >= 0; true) {
3288 /* 4. If there are no entries before entry in the list of active
3289 formatting elements, then jump to step 8. */
3290 if($a === 0) {
3291 $step_seven = false;
3292 break;
3293 }
3294
3295 /* 5. Let entry be the entry one earlier than entry in the list of
3296 active formatting elements. */
3297 $a--;
3298 $entry = $this->a_formatting[$a];
3299
3300 /* 6. If entry is neither a marker nor an element that is also in
3301 thetack of open elements, go to step 4. */
3302 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3303 break;
3304 }
3305 }
3306
3307 while(true) {
3308 /* 7. Let entry be the element one later than entry in the list of
3309 active formatting elements. */
3310 if(isset($step_seven) && $step_seven === true) {
3311 $a++;
3312 $entry = $this->a_formatting[$a];
3313 }
3314
3315 /* 8. Perform a shallow clone of the element entry to obtain clone. */
3316 $clone = $entry->cloneNode();
3317
3318 /* 9. Append clone to the current node and push it onto the stack
3319 of open elements so that it is the new current node. */
3320 $this->appendToRealParent($clone);
3321 $this->stack[] = $clone;
3322
3323 /* 10. Replace the entry for entry in the list with an entry for
3324 clone. */
3325 $this->a_formatting[$a] = $clone;
3326
3327 /* 11. If the entry for clone in the list of active formatting
3328 elements is not the last entry in the list, return to step 7. */
3329 if(end($this->a_formatting) !== $clone) {
3330 $step_seven = true;
3331 } else {
3332 break;
3333 }
3334 }
3335 }
3336
3337 private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3338 /* When the steps below require the UA to clear the list of active
3339 formatting elements up to the last marker, the UA must perform the
3340 following steps: */
3341
3342 while(true) {
3343 /* 1. Let entry be the last (most recently added) entry in the list
3344 of active formatting elements. */
3345 $entry = end($this->a_formatting);
3346
3347 /* 2. Remove entry from the list of active formatting elements. */
3348 array_pop($this->a_formatting);
3349
3350 /* 3. If entry was a marker, then stop the algorithm at this point.
3351 The list has been cleared up to the last marker. */
3352 if($entry === self::MARKER) {
3353 break;
3354 }
3355 }
3356 }
3357
3358 private function generateImpliedEndTags($exclude = array()) {
3359 /* When the steps below require the UA to generate implied end tags,
3360 * then, while the current node is a dc element, a dd element, a ds
3361 * element, a dt element, an li element, an option element, an optgroup
3362 * element, a p element, an rp element, or an rt element, the UA must
3363 * pop the current node off the stack of open elements. */
3364 $node = end($this->stack);
3365 $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3366
3367 while(in_array(end($this->stack)->tagName, $elements)) {
3368 array_pop($this->stack);
3369 }
3370 }
3371
3372 private function getElementCategory($node) {
3373 if (!is_object($node)) debug_print_backtrace();
3374 $name = $node->tagName;
3375 if(in_array($name, $this->special))
3376 return self::SPECIAL;
3377
3378 elseif(in_array($name, $this->scoping))
3379 return self::SCOPING;
3380
3381 elseif(in_array($name, $this->formatting))
3382 return self::FORMATTING;
3383
3384 else
3385 return self::PHRASING;
3386 }
3387
3388 private function clearStackToTableContext($elements) {
3389 /* When the steps above require the UA to clear the stack back to a
3390 table context, it means that the UA must, while the current node is not
3391 a table element or an html element, pop elements from the stack of open
3392 elements. */
3393 while(true) {
3394 $name = end($this->stack)->tagName;
3395
3396 if(in_array($name, $elements)) {
3397 break;
3398 } else {
3399 array_pop($this->stack);
3400 }
3401 }
3402 }
3403
3404 private function resetInsertionMode($context = null) {
3405 /* 1. Let last be false. */
3406 $last = false;
3407 $leng = count($this->stack);
3408
3409 for($n = $leng - 1; $n >= 0; $n--) {
3410 /* 2. Let node be the last node in the stack of open elements. */
3411 $node = $this->stack[$n];
3412
3413 /* 3. If node is the first node in the stack of open elements, then
3414 * set last to true and set node to the context element. (fragment
3415 * case) */
3416 if($this->stack[0]->isSameNode($node)) {
3417 $last = true;
3418 $node = $context;
3419 }
3420
3421 /* 4. If node is a select element, then switch the insertion mode to
3422 "in select" and abort these steps. (fragment case) */
3423 if($node->tagName === 'select') {
3424 $this->mode = self::IN_SELECT;
3425 break;
3426
3427 /* 5. If node is a td or th element, then switch the insertion mode
3428 to "in cell" and abort these steps. */
3429 } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
3430 $this->mode = self::IN_CELL;
3431 break;
3432
3433 /* 6. If node is a tr element, then switch the insertion mode to
3434 "in row" and abort these steps. */
3435 } elseif($node->tagName === 'tr') {
3436 $this->mode = self::IN_ROW;
3437 break;
3438
3439 /* 7. If node is a tbody, thead, or tfoot element, then switch the
3440 insertion mode to "in table body" and abort these steps. */
3441 } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
3442 $this->mode = self::IN_TABLE_BODY;
3443 break;
3444
3445 /* 8. If node is a caption element, then switch the insertion mode
3446 to "in caption" and abort these steps. */
3447 } elseif($node->tagName === 'caption') {
3448 $this->mode = self::IN_CAPTION;
3449 break;
3450
3451 /* 9. If node is a colgroup element, then switch the insertion mode
3452 to "in column group" and abort these steps. (innerHTML case) */
3453 } elseif($node->tagName === 'colgroup') {
3454 $this->mode = self::IN_COLUMN_GROUP;
3455 break;
3456
3457 /* 10. If node is a table element, then switch the insertion mode
3458 to "in table" and abort these steps. */
3459 } elseif($node->tagName === 'table') {
3460 $this->mode = self::IN_TABLE;
3461 break;
3462
3463 /* 11. If node is an element from the MathML namespace or the SVG
3464 * namespace, then switch the insertion mode to "in foreign
3465 * content", let the secondary insertion mode be "in body", and
3466 * abort these steps. */
3467 } elseif($node->namespaceURI === self::NS_SVG ||
3468 $node->namespaceURI === self::NS_MATHML) {
3469 $this->mode = self::IN_FOREIGN_CONTENT;
3470 $this->secondary_mode = self::IN_BODY;
3471 break;
3472
3473 /* 12. If node is a head element, then switch the insertion mode
3474 to "in body" ("in body"! not "in head"!) and abort these steps.
3475 (fragment case) */
3476 } elseif($node->tagName === 'head') {
3477 $this->mode = self::IN_BODY;
3478 break;
3479
3480 /* 13. If node is a body element, then switch the insertion mode to
3481 "in body" and abort these steps. */
3482 } elseif($node->tagName === 'body') {
3483 $this->mode = self::IN_BODY;
3484 break;
3485
3486 /* 14. If node is a frameset element, then switch the insertion
3487 mode to "in frameset" and abort these steps. (fragment case) */
3488 } elseif($node->tagName === 'frameset') {
3489 $this->mode = self::IN_FRAMESET;
3490 break;
3491
3492 /* 15. If node is an html element, then: if the head element
3493 pointer is null, switch the insertion mode to "before head",
3494 otherwise, switch the insertion mode to "after head". In either
3495 case, abort these steps. (fragment case) */
3496 } elseif($node->tagName === 'html') {
3497 $this->mode = ($this->head_pointer === null)
3498 ? self::BEFORE_HEAD
3499 : self::AFTER_HEAD;
3500
3501 break;
3502
3503 /* 16. If last is true, then set the insertion mode to "in body"
3504 and abort these steps. (fragment case) */
3505 } elseif($last) {
3506 $this->mode = self::IN_BODY;
3507 break;
3508 }
3509 }
3510 }
3511
3512 private function closeCell() {
3513 /* If the stack of open elements has a td or th element in table scope,
3514 then act as if an end tag token with that tag name had been seen. */
3515 foreach(array('td', 'th') as $cell) {
3516 if($this->elementInScope($cell, self::SCOPE_TABLE)) {
3517 $this->emitToken(array(
3518 'name' => $cell,
3519 'type' => HTML5_Tokenizer::ENDTAG
3520 ));
3521
3522 break;
3523 }
3524 }
3525 }
3526
3527 private function processWithRulesFor($token, $mode) {
3528 /* "using the rules for the m insertion mode", where m is one of these
3529 * modes, the user agent must use the rules described under the m
3530 * insertion mode's section, but must leave the insertion mode
3531 * unchanged unless the rules in m themselves switch the insertion mode
3532 * to a new value. */
3533 return $this->emitToken($token, $mode);
3534 }
3535
3536 private function insertCDATAElement($token) {
3537 $this->insertElement($token);
3538 $this->original_mode = $this->mode;
3539 $this->mode = self::IN_CDATA_RCDATA;
3540 $this->content_model = HTML5_Tokenizer::CDATA;
3541 }
3542
3543 private function insertRCDATAElement($token) {
3544 $this->insertElement($token);
3545 $this->original_mode = $this->mode;
3546 $this->mode = self::IN_CDATA_RCDATA;
3547 $this->content_model = HTML5_Tokenizer::RCDATA;
3548 }
3549
3550 private function getAttr($token, $key) {
3551 if (!isset($token['attr'])) return false;
3552 $ret = false;
3553 foreach ($token['attr'] as $keypair) {
3554 if ($keypair['name'] === $key) $ret = $keypair['value'];
3555 }
3556 return $ret;
3557 }
3558
3559 private function getCurrentTable() {
3560 /* The current table is the last table element in the stack of open
3561 * elements, if there is one. If there is no table element in the stack
3562 * of open elements (fragment case), then the current table is the
3563 * first element in the stack of open elements (the html element). */
3564 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
3565 if ($this->stack[$i]->tagName === 'table') {
3566 return $this->stack[$i];
3567 }
3568 }
3569 return $this->stack[0];
3570 }
3571
3572 private function getFosterParent() {
3573 /* The foster parent element is the parent element of the last
3574 table element in the stack of open elements, if there is a
3575 table element and it has such a parent element. If there is no
3576 table element in the stack of open elements (innerHTML case),
3577 then the foster parent element is the first element in the
3578 stack of open elements (the html element). Otherwise, if there
3579 is a table element in the stack of open elements, but the last
3580 table element in the stack of open elements has no parent, or
3581 its parent node is not an element, then the foster parent
3582 element is the element before the last table element in the
3583 stack of open elements. */
3584 for($n = count($this->stack) - 1; $n >= 0; $n--) {
3585 if($this->stack[$n]->tagName === 'table') {
3586 $table = $this->stack[$n];
3587 break;
3588 }
3589 }
3590
3591 if(isset($table) && $table->parentNode !== null) {
3592 return $table->parentNode;
3593
3594 } elseif(!isset($table)) {
3595 return $this->stack[0];
3596
3597 } elseif(isset($table) && ($table->parentNode === null ||
3598 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
3599 return $this->stack[$n - 1];
3600 }
3601 }
3602
3603 public function fosterParent($node) {
3604 $foster_parent = $this->getFosterParent();
3605 $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3606 /* When a node node is to be foster parented, the node node must be
3607 * be inserted into the foster parent element. */
3608 /* If the foster parent element is the parent element of the last table
3609 * element in the stack of open elements, then node must be inserted
3610 * immediately before the last table element in the stack of open
3611 * elements in the foster parent element; otherwise, node must be
3612 * appended to the foster parent element. */
3613 if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
3614 $foster_parent->insertBefore($node, $table);
3615 } else {
3616 $foster_parent->appendChild($node);
3617 }
3618 }
3619
3620 /**
3621 * For debugging, prints the stack
3622 */
3623 private function printStack() {
3624 $names = array();
3625 foreach ($this->stack as $i => $element) {
3626 $names[] = $element->tagName;
3627 }
3628 echo " -> stack [" . implode(', ', $names) . "]\n";
3629 }
3630
3631 /**
3632 * For debugging, prints active formatting elements
3633 */
3634 private function printActiveFormattingElements() {
3635 if (!$this->a_formatting) return;
3636 $names = array();
3637 foreach ($this->a_formatting as $node) {
3638 if ($node === self::MARKER) $names[] = 'MARKER';
3639 else $names[] = $node->tagName;
3640 }
3641 echo " -> active formatting [" . implode(', ', $names) . "]\n";
3642 }
3643
3644 public function currentTableIsTainted() {
3645 return !empty($this->getCurrentTable()->tainted);
3646 }
3647
3648 /**
3649 * Sets up the tree constructor for building a fragment.
3650 */
3651 public function setupContext($context = null) {
3652 $this->fragment = true;
3653 if ($context) {
3654 $context = $this->dom->createElementNS(self::NS_HTML, $context);
3655 /* 4.1. Set the HTML parser's tokenization stage's content model
3656 * flag according to the context element, as follows: */
3657 switch ($context->tagName) {
3658 case 'title': case 'textarea':
3659 $this->content_model = HTML5_Tokenizer::RCDATA;
3660 break;
3661 case 'style': case 'script': case 'xmp': case 'iframe':
3662 case 'noembed': case 'noframes':
3663 $this->content_model = HTML5_Tokenizer::CDATA;
3664 break;
3665 case 'noscript':
3666 // XSCRIPT: assuming scripting is enabled
3667 $this->content_model = HTML5_Tokenizer::CDATA;
3668 break;
3669 case 'plaintext':
3670 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
3671 break;
3672 }
3673 /* 4.2. Let root be a new html element with no attributes. */
3674 $root = $this->dom->createElementNS(self::NS_HTML, 'html');
3675 $this->root = $root;
3676 /* 4.3 Append the element root to the Document node created above. */
3677 $this->dom->appendChild($root);
3678 /* 4.4 Set up the parser's stack of open elements so that it
3679 * contains just the single element root. */
3680 $this->stack = array($root);
3681 /* 4.5 Reset the parser's insertion mode appropriately. */
3682 $this->resetInsertionMode($context);
3683 /* 4.6 Set the parser's form element pointer to the nearest node
3684 * to the context element that is a form element (going straight up
3685 * the ancestor chain, and including the element itself, if it is a
3686 * form element), or, if there is no such form element, to null. */
3687 $node = $context;
3688 do {
3689 if ($node->tagName === 'form') {
3690 $this->form_pointer = $node;
3691 break;
3692 }
3693 } while ($node = $node->parentNode);
3694 }
3695 }
3696
3697 public function adjustMathMLAttributes($token) {
3698 foreach ($token['attr'] as &$kp) {
3699 if ($kp['name'] === 'definitionurl') {
3700 $kp['name'] = 'definitionURL';
3701 }
3702 }
3703 return $token;
3704 }
3705
3706 public function adjustSVGAttributes($token) {
3707 static $lookup = array(
3708 'attributename' => 'attributeName',
3709 'attributetype' => 'attributeType',
3710 'basefrequency' => 'baseFrequency',
3711 'baseprofile' => 'baseProfile',
3712 'calcmode' => 'calcMode',
3713 'clippathunits' => 'clipPathUnits',
3714 'contentscripttype' => 'contentScriptType',
3715 'contentstyletype' => 'contentStyleType',
3716 'diffuseconstant' => 'diffuseConstant',
3717 'edgemode' => 'edgeMode',
3718 'externalresourcesrequired' => 'externalResourcesRequired',
3719 'filterres' => 'filterRes',
3720 'filterunits' => 'filterUnits',
3721 'glyphref' => 'glyphRef',
3722 'gradienttransform' => 'gradientTransform',
3723 'gradientunits' => 'gradientUnits',
3724 'kernelmatrix' => 'kernelMatrix',
3725 'kernelunitlength' => 'kernelUnitLength',
3726 'keypoints' => 'keyPoints',
3727 'keysplines' => 'keySplines',
3728 'keytimes' => 'keyTimes',
3729 'lengthadjust' => 'lengthAdjust',
3730 'limitingconeangle' => 'limitingConeAngle',
3731 'markerheight' => 'markerHeight',
3732 'markerunits' => 'markerUnits',
3733 'markerwidth' => 'markerWidth',
3734 'maskcontentunits' => 'maskContentUnits',
3735 'maskunits' => 'maskUnits',
3736 'numoctaves' => 'numOctaves',
3737 'pathlength' => 'pathLength',
3738 'patterncontentunits' => 'patternContentUnits',
3739 'patterntransform' => 'patternTransform',
3740 'patternunits' => 'patternUnits',
3741 'pointsatx' => 'pointsAtX',
3742 'pointsaty' => 'pointsAtY',
3743 'pointsatz' => 'pointsAtZ',
3744 'preservealpha' => 'preserveAlpha',
3745 'preserveaspectratio' => 'preserveAspectRatio',
3746 'primitiveunits' => 'primitiveUnits',
3747 'refx' => 'refX',
3748 'refy' => 'refY',
3749 'repeatcount' => 'repeatCount',
3750 'repeatdur' => 'repeatDur',
3751 'requiredextensions' => 'requiredExtensions',
3752 'requiredfeatures' => 'requiredFeatures',
3753 'specularconstant' => 'specularConstant',
3754 'specularexponent' => 'specularExponent',
3755 'spreadmethod' => 'spreadMethod',
3756 'startoffset' => 'startOffset',
3757 'stddeviation' => 'stdDeviation',
3758 'stitchtiles' => 'stitchTiles',
3759 'surfacescale' => 'surfaceScale',
3760 'systemlanguage' => 'systemLanguage',
3761 'tablevalues' => 'tableValues',
3762 'targetx' => 'targetX',
3763 'targety' => 'targetY',
3764 'textlength' => 'textLength',
3765 'viewbox' => 'viewBox',
3766 'viewtarget' => 'viewTarget',
3767 'xchannelselector' => 'xChannelSelector',
3768 'ychannelselector' => 'yChannelSelector',
3769 'zoomandpan' => 'zoomAndPan',
3770 );
3771 foreach ($token['attr'] as &$kp) {
3772 if (isset($lookup[$kp['name']])) {
3773 $kp['name'] = $lookup[$kp['name']];
3774 }
3775 }
3776 return $token;
3777 }
3778
3779 public function adjustForeignAttributes($token) {
3780 static $lookup = array(
3781 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
3782 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
3783 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
3784 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
3785 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
3786 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
3787 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
3788 'xml:base' => array('xml', 'base', self::NS_XML),
3789 'xml:lang' => array('xml', 'lang', self::NS_XML),
3790 'xml:space' => array('xml', 'space', self::NS_XML),
3791 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
3792 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
3793 );
3794 foreach ($token['attr'] as &$kp) {
3795 if (isset($lookup[$kp['name']])) {
3796 $kp['name'] = $lookup[$kp['name']];
3797 }
3798 }
3799 return $token;
3800 }
3801
3802 public function insertForeignElement($token, $namespaceURI) {
3803 $el = $this->dom->createElementNS($namespaceURI, $token['name']);
3804 if (!empty($token['attr'])) {
3805 foreach ($token['attr'] as $kp) {
3806 $attr = $kp['name'];
3807 if (is_array($attr)) {
3808 $ns = $attr[2];
3809 $attr = $attr[1];
3810 } else {
3811 $ns = self::NS_HTML;
3812 }
3813 if (!$el->hasAttributeNS($ns, $attr)) {
3814 // XSKETCHY: work around godawful libxml bug
3815 if ($ns === self::NS_XLINK) {
3816 $el->setAttribute('xlink:'.$attr, $kp['value']);
3817 } elseif ($ns === self::NS_HTML) {
3818 // Another godawful libxml bug
3819 $el->setAttribute($attr, $kp['value']);
3820 } else {
3821 $el->setAttributeNS($ns, $attr, $kp['value']);
3822 }
3823 }
3824 }
3825 }
3826 $this->appendToRealParent($el);
3827 $this->stack[] = $el;
3828 // XERROR: see below
3829 /* If the newly created element has an xmlns attribute in the XMLNS
3830 * namespace whose value is not exactly the same as the element's
3831 * namespace, that is a parse error. Similarly, if the newly created
3832 * element has an xmlns:xlink attribute in the XMLNS namespace whose
3833 * value is not the XLink Namespace, that is a parse error. */
3834 }
3835
3836 public function save() {
3837 $this->dom->normalize();
3838 if (!$this->fragment) {
3839 return $this->dom;
3840 } else {
3841 if ($this->root) {
3842 return $this->root->childNodes;
3843 } else {
3844 return $this->dom->childNodes;
3845 }
3846 }
3847 }
3848}
3849