]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/libraries/html5/TreeBuilder.php
2f5244f9fd744d1543fa83d4c5432dc7a7ff8773
[github/wallabag/wallabag.git] / inc / 3rdparty / libraries / html5 / TreeBuilder.php
1 <?php
2
3 /*
4
5 Copyright 2007 Jeroen van der Meer <http://jero.net/>
6 Copyright 2009 Edward Z. Yang <edwardzyang@thewritingpot.com>
7
8 Permission is hereby granted, free of charge, to any person obtaining a
9 copy of this software and associated documentation files (the
10 "Software"), to deal in the Software without restriction, including
11 without limitation the rights to use, copy, modify, merge, publish,
12 distribute, sublicense, and/or sell copies of the Software, and to
13 permit persons to whom the Software is furnished to do so, subject to
14 the following conditions:
15
16 The above copyright notice and this permission notice shall be included
17 in all copies or substantial portions of the Software.
18
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
23 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
27 */
28
29 // Tags for FIX ME!!!: (in order of priority)
30 // XXX - should be fixed NAO!
31 // XERROR - with regards to parse errors
32 // XSCRIPT - with regards to scripting mode
33 // XENCODING - with regards to encoding (for reparsing tests)
34 // XDOM - DOM specific code (tagName is explicitly not marked).
35 // this is not (yet) in helper functions.
36
37 class HTML5_TreeBuilder {
38 public $stack = array();
39 public $content_model;
40
41 private $mode;
42 private $original_mode;
43 private $secondary_mode;
44 private $dom;
45 // Whether or not normal insertion of nodes should actually foster
46 // parent (used in one case in spec)
47 private $foster_parent = false;
48 private $a_formatting = array();
49
50 private $head_pointer = null;
51 private $form_pointer = null;
52
53 private $flag_frameset_ok = true;
54 private $flag_force_quirks = false;
55 private $ignored = false;
56 private $quirks_mode = null;
57 // this gets to 2 when we want to ignore the next lf character, and
58 // is decrement at the beginning of each processed token (this way,
59 // code can check for (bool)$ignore_lf_token, but it phases out
60 // appropriately)
61 private $ignore_lf_token = 0;
62 private $fragment = false;
63 private $root;
64
65 private $scoping = array('applet','button','caption','html','marquee','object','table','td','th', 'svg:foreignObject');
66 private $formatting = array('a','b','big','code','em','font','i','nobr','s','small','strike','strong','tt','u');
67 // dl and ds are speculative
68 private $special = array('address','area','article','aside','base','basefont','bgsound',
69 'blockquote','body','br','center','col','colgroup','command','dc','dd','details','dir','div','dl','ds',
70 'dt','embed','fieldset','figure','footer','form','frame','frameset','h1','h2','h3','h4','h5',
71 'h6','head','header','hgroup','hr','iframe','img','input','isindex','li','link',
72 'listing','menu','meta','nav','noembed','noframes','noscript','ol',
73 'p','param','plaintext','pre','script','select','spacer','style',
74 'tbody','textarea','tfoot','thead','title','tr','ul','wbr');
75
76 private $pendingTableCharacters;
77 private $pendingTableCharactersDirty;
78
79 // Tree construction modes
80 const INITIAL = 0;
81 const BEFORE_HTML = 1;
82 const BEFORE_HEAD = 2;
83 const IN_HEAD = 3;
84 const IN_HEAD_NOSCRIPT = 4;
85 const AFTER_HEAD = 5;
86 const IN_BODY = 6;
87 const IN_CDATA_RCDATA = 7;
88 const IN_TABLE = 8;
89 const IN_TABLE_TEXT = 9;
90 const IN_CAPTION = 10;
91 const IN_COLUMN_GROUP = 11;
92 const IN_TABLE_BODY = 12;
93 const IN_ROW = 13;
94 const IN_CELL = 14;
95 const IN_SELECT = 15;
96 const IN_SELECT_IN_TABLE= 16;
97 const IN_FOREIGN_CONTENT= 17;
98 const AFTER_BODY = 18;
99 const IN_FRAMESET = 19;
100 const AFTER_FRAMESET = 20;
101 const AFTER_AFTER_BODY = 21;
102 const AFTER_AFTER_FRAMESET = 22;
103
104 /**
105 * Converts a magic number to a readable name. Use for debugging.
106 */
107 private function strConst($number) {
108 static $lookup;
109 if (!$lookup) {
110 $lookup = array();
111 $r = new ReflectionClass('HTML5_TreeBuilder');
112 $consts = $r->getConstants();
113 foreach ($consts as $const => $num) {
114 if (!is_int($num)) continue;
115 $lookup[$num] = $const;
116 }
117 }
118 return $lookup[$number];
119 }
120
121 // The different types of elements.
122 const SPECIAL = 100;
123 const SCOPING = 101;
124 const FORMATTING = 102;
125 const PHRASING = 103;
126
127 // Quirks modes in $quirks_mode
128 const NO_QUIRKS = 200;
129 const QUIRKS_MODE = 201;
130 const LIMITED_QUIRKS_MODE = 202;
131
132 // Marker to be placed in $a_formatting
133 const MARKER = 300;
134
135 // Namespaces for foreign content
136 const NS_HTML = null; // to prevent DOM from requiring NS on everything
137 const NS_MATHML = 'http://www.w3.org/1998/Math/MathML';
138 const NS_SVG = 'http://www.w3.org/2000/svg';
139 const NS_XLINK = 'http://www.w3.org/1999/xlink';
140 const NS_XML = 'http://www.w3.org/XML/1998/namespace';
141 const NS_XMLNS = 'http://www.w3.org/2000/xmlns/';
142
143 // Different types of scopes to test for elements
144 const SCOPE = 0;
145 const SCOPE_LISTITEM = 1;
146 const SCOPE_TABLE = 2;
147
148 public function __construct() {
149 $this->mode = self::INITIAL;
150 $this->dom = new DOMDocument;
151
152 $this->dom->encoding = 'UTF-8';
153 $this->dom->preserveWhiteSpace = true;
154 $this->dom->substituteEntities = true;
155 $this->dom->strictErrorChecking = false;
156 }
157
158 // Process tag tokens
159 public function emitToken($token, $mode = null) {
160 // XXX: ignore parse errors... why are we emitting them, again?
161 if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return;
162 if ($mode === null) $mode = $this->mode;
163
164 /*
165 $backtrace = debug_backtrace();
166 if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n";
167 echo $this->strConst($mode);
168 if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")";
169 echo "\n ";
170 token_dump($token);
171 $this->printStack();
172 $this->printActiveFormattingElements();
173 if ($this->foster_parent) echo " -> this is a foster parent mode\n";
174 if ($this->flag_frameset_ok) echo " -> frameset ok\n";
175 */
176
177 if ($this->ignore_lf_token) $this->ignore_lf_token--;
178 $this->ignored = false;
179 // indenting is a little wonky, this can be changed later on
180 switch ($mode) {
181
182 case self::INITIAL:
183
184 /* A character token that is one of U+0009 CHARACTER TABULATION,
185 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */
186 if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
187 /* Ignore the token. */
188 $this->ignored = true;
189 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
190 if (
191 $token['name'] !== 'html' || !empty($token['public']) ||
192 !empty($token['system']) || $token !== 'about:legacy-compat'
193 ) {
194 /* If the DOCTYPE token's name is not a case-sensitive match
195 * for the string "html", or if the token's public identifier
196 * is not missing, or if the token's system identifier is
197 * neither missing nor a case-sensitive match for the string
198 * "about:legacy-compat", then there is a parse error (this
199 * is the DOCTYPE parse error). */
200 // DOCTYPE parse error
201 }
202 /* Append a DocumentType node to the Document node, with the name
203 * attribute set to the name given in the DOCTYPE token, or the
204 * empty string if the name was missing; the publicId attribute
205 * set to the public identifier given in the DOCTYPE token, or
206 * the empty string if the public identifier was missing; the
207 * systemId attribute set to the system identifier given in the
208 * DOCTYPE token, or the empty string if the system identifier
209 * was missing; and the other attributes specific to
210 * DocumentType objects set to null and empty lists as
211 * appropriate. Associate the DocumentType node with the
212 * Document object so that it is returned as the value of the
213 * doctype attribute of the Document object. */
214 if (!isset($token['public'])) $token['public'] = null;
215 if (!isset($token['system'])) $token['system'] = null;
216 // XDOM
217 // Yes this is hacky. I'm kind of annoyed that I can't appendChild
218 // a doctype to DOMDocument. Maybe I haven't chanted the right
219 // syllables.
220 $impl = new DOMImplementation();
221 // This call can fail for particularly pathological cases (namely,
222 // the qualifiedName parameter ($token['name']) could be missing.
223 if ($token['name']) {
224 $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']);
225 $this->dom->appendChild($doctype);
226 } else {
227 // It looks like libxml's not actually *able* to express this case.
228 // So... don't.
229 $this->dom->emptyDoctype = true;
230 }
231 $public = is_null($token['public']) ? false : strtolower($token['public']);
232 $system = is_null($token['system']) ? false : strtolower($token['system']);
233 $publicStartsWithForQuirks = array(
234 "+//silmaril//dtd html pro v0r11 19970101//",
235 "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",
236 "-//as//dtd html 3.0 aswedit + extensions//",
237 "-//ietf//dtd html 2.0 level 1//",
238 "-//ietf//dtd html 2.0 level 2//",
239 "-//ietf//dtd html 2.0 strict level 1//",
240 "-//ietf//dtd html 2.0 strict level 2//",
241 "-//ietf//dtd html 2.0 strict//",
242 "-//ietf//dtd html 2.0//",
243 "-//ietf//dtd html 2.1e//",
244 "-//ietf//dtd html 3.0//",
245 "-//ietf//dtd html 3.2 final//",
246 "-//ietf//dtd html 3.2//",
247 "-//ietf//dtd html 3//",
248 "-//ietf//dtd html level 0//",
249 "-//ietf//dtd html level 1//",
250 "-//ietf//dtd html level 2//",
251 "-//ietf//dtd html level 3//",
252 "-//ietf//dtd html strict level 0//",
253 "-//ietf//dtd html strict level 1//",
254 "-//ietf//dtd html strict level 2//",
255 "-//ietf//dtd html strict level 3//",
256 "-//ietf//dtd html strict//",
257 "-//ietf//dtd html//",
258 "-//metrius//dtd metrius presentational//",
259 "-//microsoft//dtd internet explorer 2.0 html strict//",
260 "-//microsoft//dtd internet explorer 2.0 html//",
261 "-//microsoft//dtd internet explorer 2.0 tables//",
262 "-//microsoft//dtd internet explorer 3.0 html strict//",
263 "-//microsoft//dtd internet explorer 3.0 html//",
264 "-//microsoft//dtd internet explorer 3.0 tables//",
265 "-//netscape comm. corp.//dtd html//",
266 "-//netscape comm. corp.//dtd strict html//",
267 "-//o'reilly and associates//dtd html 2.0//",
268 "-//o'reilly and associates//dtd html extended 1.0//",
269 "-//o'reilly and associates//dtd html extended relaxed 1.0//",
270 "-//spyglass//dtd html 2.0 extended//",
271 "-//sq//dtd html 2.0 hotmetal + extensions//",
272 "-//sun microsystems corp.//dtd hotjava html//",
273 "-//sun microsystems corp.//dtd hotjava strict html//",
274 "-//w3c//dtd html 3 1995-03-24//",
275 "-//w3c//dtd html 3.2 draft//",
276 "-//w3c//dtd html 3.2 final//",
277 "-//w3c//dtd html 3.2//",
278 "-//w3c//dtd html 3.2s draft//",
279 "-//w3c//dtd html 4.0 frameset//",
280 "-//w3c//dtd html 4.0 transitional//",
281 "-//w3c//dtd html experimental 19960712//",
282 "-//w3c//dtd html experimental 970421//",
283 "-//w3c//dtd w3 html//",
284 "-//w3o//dtd w3 html 3.0//",
285 "-//webtechs//dtd mozilla html 2.0//",
286 "-//webtechs//dtd mozilla html//",
287 );
288 $publicSetToForQuirks = array(
289 "-//w3o//dtd w3 html strict 3.0//",
290 "-/w3c/dtd html 4.0 transitional/en",
291 "html",
292 );
293 $publicStartsWithAndSystemForQuirks = array(
294 "-//w3c//dtd html 4.01 frameset//",
295 "-//w3c//dtd html 4.01 transitional//",
296 );
297 $publicStartsWithForLimitedQuirks = array(
298 "-//w3c//dtd xhtml 1.0 frameset//",
299 "-//w3c//dtd xhtml 1.0 transitional//",
300 );
301 $publicStartsWithAndSystemForLimitedQuirks = array(
302 "-//w3c//dtd html 4.01 frameset//",
303 "-//w3c//dtd html 4.01 transitional//",
304 );
305 // first, do easy checks
306 if (
307 !empty($token['force-quirks']) ||
308 strtolower($token['name']) !== 'html'
309 ) {
310 $this->quirks_mode = self::QUIRKS_MODE;
311 } else {
312 do {
313 if ($system) {
314 foreach ($publicStartsWithAndSystemForQuirks as $x) {
315 if (strncmp($public, $x, strlen($x)) === 0) {
316 $this->quirks_mode = self::QUIRKS_MODE;
317 break;
318 }
319 }
320 if (!is_null($this->quirks_mode)) break;
321 foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) {
322 if (strncmp($public, $x, strlen($x)) === 0) {
323 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
324 break;
325 }
326 }
327 if (!is_null($this->quirks_mode)) break;
328 }
329 foreach ($publicSetToForQuirks as $x) {
330 if ($public === $x) {
331 $this->quirks_mode = self::QUIRKS_MODE;
332 break;
333 }
334 }
335 if (!is_null($this->quirks_mode)) break;
336 foreach ($publicStartsWithForLimitedQuirks as $x) {
337 if (strncmp($public, $x, strlen($x)) === 0) {
338 $this->quirks_mode = self::LIMITED_QUIRKS_MODE;
339 }
340 }
341 if (!is_null($this->quirks_mode)) break;
342 if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") {
343 $this->quirks_mode = self::QUIRKS_MODE;
344 break;
345 }
346 foreach ($publicStartsWithForQuirks as $x) {
347 if (strncmp($public, $x, strlen($x)) === 0) {
348 $this->quirks_mode = self::QUIRKS_MODE;
349 break;
350 }
351 }
352 if (is_null($this->quirks_mode)) {
353 $this->quirks_mode = self::NO_QUIRKS;
354 }
355 } while (false);
356 }
357 $this->mode = self::BEFORE_HTML;
358 } else {
359 // parse error
360 /* Switch the insertion mode to "before html", then reprocess the
361 * current token. */
362 $this->mode = self::BEFORE_HTML;
363 $this->quirks_mode = self::QUIRKS_MODE;
364 $this->emitToken($token);
365 }
366 break;
367
368 case self::BEFORE_HTML:
369
370 /* A DOCTYPE token */
371 if($token['type'] === HTML5_Tokenizer::DOCTYPE) {
372 // Parse error. Ignore the token.
373 $this->ignored = true;
374
375 /* A comment token */
376 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
377 /* Append a Comment node to the Document object with the data
378 attribute set to the data given in the comment token. */
379 // XDOM
380 $comment = $this->dom->createComment($token['data']);
381 $this->dom->appendChild($comment);
382
383 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
384 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
385 or U+0020 SPACE */
386 } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
387 /* Ignore the token. */
388 $this->ignored = true;
389
390 /* A start tag whose tag name is "html" */
391 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') {
392 /* Create an element for the token in the HTML namespace. Append it
393 * to the Document object. Put this element in the stack of open
394 * elements. */
395 // XDOM
396 $html = $this->insertElement($token, false);
397 $this->dom->appendChild($html);
398 $this->stack[] = $html;
399
400 $this->mode = self::BEFORE_HEAD;
401
402 } else {
403 /* Create an html element. Append it to the Document object. Put
404 * this element in the stack of open elements. */
405 // XDOM
406 $html = $this->dom->createElementNS(self::NS_HTML, 'html');
407 $this->dom->appendChild($html);
408 $this->stack[] = $html;
409
410 /* Switch the insertion mode to "before head", then reprocess the
411 * current token. */
412 $this->mode = self::BEFORE_HEAD;
413 $this->emitToken($token);
414 }
415 break;
416
417 case self::BEFORE_HEAD:
418
419 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
420 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
421 or U+0020 SPACE */
422 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
423 /* Ignore the token. */
424 $this->ignored = true;
425
426 /* A comment token */
427 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
428 /* Append a Comment node to the current node with the data attribute
429 set to the data given in the comment token. */
430 $this->insertComment($token['data']);
431
432 /* A DOCTYPE token */
433 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
434 /* Parse error. Ignore the token */
435 $this->ignored = true;
436 // parse error
437
438 /* A start tag token with the tag name "html" */
439 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
440 /* Process the token using the rules for the "in body"
441 * insertion mode. */
442 $this->processWithRulesFor($token, self::IN_BODY);
443
444 /* A start tag token with the tag name "head" */
445 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') {
446 /* Insert an HTML element for the token. */
447 $element = $this->insertElement($token);
448
449 /* Set the head element pointer to this new element node. */
450 $this->head_pointer = $element;
451
452 /* Change the insertion mode to "in head". */
453 $this->mode = self::IN_HEAD;
454
455 /* An end tag whose tag name is one of: "head", "body", "html", "br" */
456 } elseif(
457 $token['type'] === HTML5_Tokenizer::ENDTAG && (
458 $token['name'] === 'head' || $token['name'] === 'body' ||
459 $token['name'] === 'html' || $token['name'] === 'br'
460 )) {
461 /* Act as if a start tag token with the tag name "head" and no
462 * attributes had been seen, then reprocess the current token. */
463 $this->emitToken(array(
464 'name' => 'head',
465 'type' => HTML5_Tokenizer::STARTTAG,
466 'attr' => array()
467 ));
468 $this->emitToken($token);
469
470 /* Any other end tag */
471 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) {
472 /* Parse error. Ignore the token. */
473 $this->ignored = true;
474
475 } else {
476 /* Act as if a start tag token with the tag name "head" and no
477 * attributes had been seen, then reprocess the current token.
478 * Note: This will result in an empty head element being
479 * generated, with the current token being reprocessed in the
480 * "after head" insertion mode. */
481 $this->emitToken(array(
482 'name' => 'head',
483 'type' => HTML5_Tokenizer::STARTTAG,
484 'attr' => array()
485 ));
486 $this->emitToken($token);
487 }
488 break;
489
490 case self::IN_HEAD:
491
492 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
493 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
494 or U+0020 SPACE. */
495 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
496 /* Insert the character into the current node. */
497 $this->insertText($token['data']);
498
499 /* A comment token */
500 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
501 /* Append a Comment node to the current node with the data attribute
502 set to the data given in the comment token. */
503 $this->insertComment($token['data']);
504
505 /* A DOCTYPE token */
506 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
507 /* Parse error. Ignore the token. */
508 $this->ignored = true;
509 // parse error
510
511 /* A start tag whose tag name is "html" */
512 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
513 $token['name'] === 'html') {
514 $this->processWithRulesFor($token, self::IN_BODY);
515
516 /* A start tag whose tag name is one of: "base", "command", "link" */
517 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
518 ($token['name'] === 'base' || $token['name'] === 'command' ||
519 $token['name'] === 'link')) {
520 /* Insert an HTML element for the token. Immediately pop the
521 * current node off the stack of open elements. */
522 $this->insertElement($token);
523 array_pop($this->stack);
524
525 // YYY: Acknowledge the token's self-closing flag, if it is set.
526
527 /* A start tag whose tag name is "meta" */
528 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') {
529 /* Insert an HTML element for the token. Immediately pop the
530 * current node off the stack of open elements. */
531 $this->insertElement($token);
532 array_pop($this->stack);
533
534 // XERROR: Acknowledge the token's self-closing flag, if it is set.
535
536 // XENCODING: If the element has a charset attribute, and its value is a
537 // supported encoding, and the confidence is currently tentative,
538 // then change the encoding to the encoding given by the value of
539 // the charset attribute.
540 //
541 // Otherwise, if the element has a content attribute, and applying
542 // the algorithm for extracting an encoding from a Content-Type to
543 // its value returns a supported encoding encoding, and the
544 // confidence is currently tentative, then change the encoding to
545 // the encoding encoding.
546
547 /* A start tag with the tag name "title" */
548 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') {
549 $this->insertRCDATAElement($token);
550
551 /* A start tag whose tag name is "noscript", if the scripting flag is enabled, or
552 * A start tag whose tag name is one of: "noframes", "style" */
553 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
554 ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) {
555 // XSCRIPT: Scripting flag not respected
556 $this->insertCDATAElement($token);
557
558 // XSCRIPT: Scripting flag disable not implemented
559
560 /* A start tag with the tag name "script" */
561 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
562 /* 1. Create an element for the token in the HTML namespace. */
563 $node = $this->insertElement($token, false);
564
565 /* 2. Mark the element as being "parser-inserted" */
566 // Uhhh... XSCRIPT
567
568 /* 3. If the parser was originally created for the HTML
569 * fragment parsing algorithm, then mark the script element as
570 * "already executed". (fragment case) */
571 // ditto... XSCRIPT
572
573 /* 4. Append the new element to the current node and push it onto
574 * the stack of open elements. */
575 end($this->stack)->appendChild($node);
576 $this->stack[] = $node;
577 // I guess we could squash these together
578
579 /* 6. Let the original insertion mode be the current insertion mode. */
580 $this->original_mode = $this->mode;
581 /* 7. Switch the insertion mode to "in CDATA/RCDATA" */
582 $this->mode = self::IN_CDATA_RCDATA;
583 /* 5. Switch the tokeniser's content model flag to the CDATA state. */
584 $this->content_model = HTML5_Tokenizer::CDATA;
585
586 /* An end tag with the tag name "head" */
587 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') {
588 /* Pop the current node (which will be the head element) off the stack of open elements. */
589 array_pop($this->stack);
590
591 /* Change the insertion mode to "after head". */
592 $this->mode = self::AFTER_HEAD;
593
594 // Slight logic inversion here to minimize duplication
595 /* A start tag with the tag name "head". */
596 /* An end tag whose tag name is not one of: "body", "html", "br" */
597 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
598 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' &&
599 $token['name'] !== 'body' && $token['name'] !== 'br')) {
600 // Parse error. Ignore the token.
601 $this->ignored = true;
602
603 /* Anything else */
604 } else {
605 /* Act as if an end tag token with the tag name "head" had been
606 * seen, and reprocess the current token. */
607 $this->emitToken(array(
608 'name' => 'head',
609 'type' => HTML5_Tokenizer::ENDTAG
610 ));
611
612 /* Then, reprocess the current token. */
613 $this->emitToken($token);
614 }
615 break;
616
617 case self::IN_HEAD_NOSCRIPT:
618 if ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
619 // parse error
620 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
621 $this->processWithRulesFor($token, self::IN_BODY);
622 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') {
623 /* Pop the current node (which will be a noscript element) from the
624 * stack of open elements; the new current node will be a head
625 * element. */
626 array_pop($this->stack);
627 $this->mode = self::IN_HEAD;
628 } elseif (
629 ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) ||
630 ($token['type'] === HTML5_Tokenizer::COMMENT) ||
631 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
632 $token['name'] === 'link' || $token['name'] === 'meta' ||
633 $token['name'] === 'noframes' || $token['name'] === 'style'))) {
634 $this->processWithRulesFor($token, self::IN_HEAD);
635 // inverted logic
636 } elseif (
637 ($token['type'] === HTML5_Tokenizer::STARTTAG && (
638 $token['name'] === 'head' || $token['name'] === 'noscript')) ||
639 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
640 $token['name'] !== 'br')) {
641 // parse error
642 } else {
643 // parse error
644 $this->emitToken(array(
645 'type' => HTML5_Tokenizer::ENDTAG,
646 'name' => 'noscript',
647 ));
648 $this->emitToken($token);
649 }
650 break;
651
652 case self::AFTER_HEAD:
653 /* Handle the token as follows: */
654
655 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
656 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
657 or U+0020 SPACE */
658 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
659 /* Append the character to the current node. */
660 $this->insertText($token['data']);
661
662 /* A comment token */
663 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
664 /* Append a Comment node to the current node with the data attribute
665 set to the data given in the comment token. */
666 $this->insertComment($token['data']);
667
668 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
669 // parse error
670
671 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
672 $this->processWithRulesFor($token, self::IN_BODY);
673
674 /* A start tag token with the tag name "body" */
675 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') {
676 $this->insertElement($token);
677
678 /* Set the frameset-ok flag to "not ok". */
679 $this->flag_frameset_ok = false;
680
681 /* Change the insertion mode to "in body". */
682 $this->mode = self::IN_BODY;
683
684 /* A start tag token with the tag name "frameset" */
685 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') {
686 /* Insert a frameset element for the token. */
687 $this->insertElement($token);
688
689 /* Change the insertion mode to "in frameset". */
690 $this->mode = self::IN_FRAMESET;
691
692 /* A start tag token whose tag name is one of: "base", "link", "meta",
693 "script", "style", "title" */
694 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
695 array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) {
696 // parse error
697 /* Push the node pointed to by the head element pointer onto the
698 * stack of open elements. */
699 $this->stack[] = $this->head_pointer;
700 $this->processWithRulesFor($token, self::IN_HEAD);
701 array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1);
702
703 // inversion of specification
704 } elseif(
705 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') ||
706 ($token['type'] === HTML5_Tokenizer::ENDTAG &&
707 $token['name'] !== 'body' && $token['name'] !== 'html' &&
708 $token['name'] !== 'br')) {
709 // parse error
710
711 /* Anything else */
712 } else {
713 $this->emitToken(array(
714 'name' => 'body',
715 'type' => HTML5_Tokenizer::STARTTAG,
716 'attr' => array()
717 ));
718 $this->flag_frameset_ok = true;
719 $this->emitToken($token);
720 }
721 break;
722
723 case self::IN_BODY:
724 /* Handle the token as follows: */
725
726 switch($token['type']) {
727 /* A character token */
728 case HTML5_Tokenizer::CHARACTER:
729 case HTML5_Tokenizer::SPACECHARACTER:
730 /* Reconstruct the active formatting elements, if any. */
731 $this->reconstructActiveFormattingElements();
732
733 /* Append the token's character to the current node. */
734 $this->insertText($token['data']);
735
736 /* If the token is not one of U+0009 CHARACTER TABULATION,
737 * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020
738 * SPACE, then set the frameset-ok flag to "not ok". */
739 // i.e., if any of the characters is not whitespace
740 if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) {
741 $this->flag_frameset_ok = false;
742 }
743 break;
744
745 /* A comment token */
746 case HTML5_Tokenizer::COMMENT:
747 /* Append a Comment node to the current node with the data
748 attribute set to the data given in the comment token. */
749 $this->insertComment($token['data']);
750 break;
751
752 case HTML5_Tokenizer::DOCTYPE:
753 // parse error
754 break;
755
756 case HTML5_Tokenizer::EOF:
757 // parse error
758 break;
759
760 case HTML5_Tokenizer::STARTTAG:
761 switch($token['name']) {
762 case 'html':
763 // parse error
764 /* For each attribute on the token, check to see if the
765 * attribute is already present on the top element of the
766 * stack of open elements. If it is not, add the attribute
767 * and its corresponding value to that element. */
768 foreach($token['attr'] as $attr) {
769 if(!$this->stack[0]->hasAttribute($attr['name'])) {
770 $this->stack[0]->setAttribute($attr['name'], $attr['value']);
771 }
772 }
773 break;
774
775 case 'base': case 'command': case 'link': case 'meta': case 'noframes':
776 case 'script': case 'style': case 'title':
777 /* Process the token as if the insertion mode had been "in
778 head". */
779 $this->processWithRulesFor($token, self::IN_HEAD);
780 break;
781
782 /* A start tag token with the tag name "body" */
783 case 'body':
784 /* Parse error. If the second element on the stack of open
785 elements is not a body element, or, if the stack of open
786 elements has only one node on it, then ignore the token.
787 (fragment case) */
788 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
789 $this->ignored = true;
790 // Ignore
791
792 /* Otherwise, for each attribute on the token, check to see
793 if the attribute is already present on the body element (the
794 second element) on the stack of open elements. If it is not,
795 add the attribute and its corresponding value to that
796 element. */
797 } else {
798 foreach($token['attr'] as $attr) {
799 if(!$this->stack[1]->hasAttribute($attr['name'])) {
800 $this->stack[1]->setAttribute($attr['name'], $attr['value']);
801 }
802 }
803 }
804 break;
805
806 case 'frameset':
807 // parse error
808 /* If the second element on the stack of open elements is
809 * not a body element, or, if the stack of open elements
810 * has only one node on it, then ignore the token.
811 * (fragment case) */
812 if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') {
813 $this->ignored = true;
814 // Ignore
815 } elseif (!$this->flag_frameset_ok) {
816 $this->ignored = true;
817 // Ignore
818 } else {
819 /* 1. Remove the second element on the stack of open
820 * elements from its parent node, if it has one. */
821 if($this->stack[1]->parentNode) {
822 $this->stack[1]->parentNode->removeChild($this->stack[1]);
823 }
824
825 /* 2. Pop all the nodes from the bottom of the stack of
826 * open elements, from the current node up to the root
827 * html element. */
828 array_splice($this->stack, 1);
829
830 $this->insertElement($token);
831 $this->mode = self::IN_FRAMESET;
832 }
833 break;
834
835 // in spec, there is a diversion here
836
837 case 'address': case 'article': case 'aside': case 'blockquote':
838 case 'center': case 'datagrid': case 'details': case 'dir':
839 case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer':
840 case 'header': case 'hgroup': case 'menu': case 'nav':
841 case 'ol': case 'p': case 'section': case 'ul':
842 /* If the stack of open elements has a p element in scope,
843 then act as if an end tag with the tag name p had been
844 seen. */
845 if($this->elementInScope('p')) {
846 $this->emitToken(array(
847 'name' => 'p',
848 'type' => HTML5_Tokenizer::ENDTAG
849 ));
850 }
851
852 /* Insert an HTML element for the token. */
853 $this->insertElement($token);
854 break;
855
856 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",
857 "h5", "h6" */
858 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
859 /* If the stack of open elements has a p element in scope,
860 then act as if an end tag with the tag name p had been seen. */
861 if($this->elementInScope('p')) {
862 $this->emitToken(array(
863 'name' => 'p',
864 'type' => HTML5_Tokenizer::ENDTAG
865 ));
866 }
867
868 /* If the current node is an element whose tag name is one
869 * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a
870 * parse error; pop the current node off the stack of open
871 * elements. */
872 $peek = array_pop($this->stack);
873 if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) {
874 // parse error
875 } else {
876 $this->stack[] = $peek;
877 }
878
879 /* Insert an HTML element for the token. */
880 $this->insertElement($token);
881 break;
882
883 case 'pre': case 'listing':
884 /* If the stack of open elements has a p element in scope,
885 then act as if an end tag with the tag name p had been seen. */
886 if($this->elementInScope('p')) {
887 $this->emitToken(array(
888 'name' => 'p',
889 'type' => HTML5_Tokenizer::ENDTAG
890 ));
891 }
892 $this->insertElement($token);
893 /* If the next token is a U+000A LINE FEED (LF) character
894 * token, then ignore that token and move on to the next
895 * one. (Newlines at the start of pre blocks are ignored as
896 * an authoring convenience.) */
897 $this->ignore_lf_token = 2;
898 $this->flag_frameset_ok = false;
899 break;
900
901 /* A start tag whose tag name is "form" */
902 case 'form':
903 /* If the form element pointer is not null, ignore the
904 token with a parse error. */
905 if($this->form_pointer !== null) {
906 $this->ignored = true;
907 // Ignore.
908
909 /* Otherwise: */
910 } else {
911 /* If the stack of open elements has a p element in
912 scope, then act as if an end tag with the tag name p
913 had been seen. */
914 if($this->elementInScope('p')) {
915 $this->emitToken(array(
916 'name' => 'p',
917 'type' => HTML5_Tokenizer::ENDTAG
918 ));
919 }
920
921 /* Insert an HTML element for the token, and set the
922 form element pointer to point to the element created. */
923 $element = $this->insertElement($token);
924 $this->form_pointer = $element;
925 }
926 break;
927
928 // condensed specification
929 case 'li': case 'dc': case 'dd': case 'ds': case 'dt':
930 /* 1. Set the frameset-ok flag to "not ok". */
931 $this->flag_frameset_ok = false;
932
933 $stack_length = count($this->stack) - 1;
934 for($n = $stack_length; 0 <= $n; $n--) {
935 /* 2. Initialise node to be the current node (the
936 bottommost node of the stack). */
937 $stop = false;
938 $node = $this->stack[$n];
939 $cat = $this->getElementCategory($node);
940
941 // for case 'li':
942 /* 3. If node is an li element, then act as if an end
943 * tag with the tag name "li" had been seen, then jump
944 * to the last step. */
945 // for case 'dc': case 'dd': case 'ds': case 'dt':
946 /* If node is a dc, dd, ds or dt element, then act as if an end
947 * tag with the same tag name as node had been seen, then
948 * jump to the last step. */
949 if(($token['name'] === 'li' && $node->tagName === 'li') ||
950 ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional
951 $this->emitToken(array(
952 'type' => HTML5_Tokenizer::ENDTAG,
953 'name' => $node->tagName,
954 ));
955 break;
956 }
957
958 /* 4. If node is not in the formatting category, and is
959 not in the phrasing category, and is not an address,
960 div or p element, then stop this algorithm. */
961 if($cat !== self::FORMATTING && $cat !== self::PHRASING &&
962 $node->tagName !== 'address' && $node->tagName !== 'div' &&
963 $node->tagName !== 'p') {
964 break;
965 }
966
967 /* 5. Otherwise, set node to the previous entry in the
968 * stack of open elements and return to step 2. */
969 }
970
971 /* 6. This is the last step. */
972
973 /* If the stack of open elements has a p element in scope,
974 then act as if an end tag with the tag name p had been
975 seen. */
976 if($this->elementInScope('p')) {
977 $this->emitToken(array(
978 'name' => 'p',
979 'type' => HTML5_Tokenizer::ENDTAG
980 ));
981 }
982
983 /* Finally, insert an HTML element with the same tag
984 name as the token's. */
985 $this->insertElement($token);
986 break;
987
988 /* A start tag token whose tag name is "plaintext" */
989 case 'plaintext':
990 /* If the stack of open elements has a p element in scope,
991 then act as if an end tag with the tag name p had been
992 seen. */
993 if($this->elementInScope('p')) {
994 $this->emitToken(array(
995 'name' => 'p',
996 'type' => HTML5_Tokenizer::ENDTAG
997 ));
998 }
999
1000 /* Insert an HTML element for the token. */
1001 $this->insertElement($token);
1002
1003 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
1004 break;
1005
1006 // more diversions
1007
1008 /* A start tag whose tag name is "a" */
1009 case 'a':
1010 /* If the list of active formatting elements contains
1011 an element whose tag name is "a" between the end of the
1012 list and the last marker on the list (or the start of
1013 the list if there is no marker on the list), then this
1014 is a parse error; act as if an end tag with the tag name
1015 "a" had been seen, then remove that element from the list
1016 of active formatting elements and the stack of open
1017 elements if the end tag didn't already remove it (it
1018 might not have if the element is not in table scope). */
1019 $leng = count($this->a_formatting);
1020
1021 for($n = $leng - 1; $n >= 0; $n--) {
1022 if($this->a_formatting[$n] === self::MARKER) {
1023 break;
1024
1025 } elseif($this->a_formatting[$n]->tagName === 'a') {
1026 $a = $this->a_formatting[$n];
1027 $this->emitToken(array(
1028 'name' => 'a',
1029 'type' => HTML5_Tokenizer::ENDTAG
1030 ));
1031 if (in_array($a, $this->a_formatting)) {
1032 $a_i = array_search($a, $this->a_formatting, true);
1033 if($a_i !== false) array_splice($this->a_formatting, $a_i, 1);
1034 }
1035 if (in_array($a, $this->stack)) {
1036 $a_i = array_search($a, $this->stack, true);
1037 if ($a_i !== false) array_splice($this->stack, $a_i, 1);
1038 }
1039 break;
1040 }
1041 }
1042
1043 /* Reconstruct the active formatting elements, if any. */
1044 $this->reconstructActiveFormattingElements();
1045
1046 /* Insert an HTML element for the token. */
1047 $el = $this->insertElement($token);
1048
1049 /* Add that element to the list of active formatting
1050 elements. */
1051 $this->a_formatting[] = $el;
1052 break;
1053
1054 case 'b': case 'big': case 'code': case 'em': case 'font': case 'i':
1055 case 's': case 'small': case 'strike':
1056 case 'strong': case 'tt': case 'u':
1057 /* Reconstruct the active formatting elements, if any. */
1058 $this->reconstructActiveFormattingElements();
1059
1060 /* Insert an HTML element for the token. */
1061 $el = $this->insertElement($token);
1062
1063 /* Add that element to the list of active formatting
1064 elements. */
1065 $this->a_formatting[] = $el;
1066 break;
1067
1068 case 'nobr':
1069 /* Reconstruct the active formatting elements, if any. */
1070 $this->reconstructActiveFormattingElements();
1071
1072 /* If the stack of open elements has a nobr element in
1073 * scope, then this is a parse error; act as if an end tag
1074 * with the tag name "nobr" had been seen, then once again
1075 * reconstruct the active formatting elements, if any. */
1076 if ($this->elementInScope('nobr')) {
1077 $this->emitToken(array(
1078 'name' => 'nobr',
1079 'type' => HTML5_Tokenizer::ENDTAG,
1080 ));
1081 $this->reconstructActiveFormattingElements();
1082 }
1083
1084 /* Insert an HTML element for the token. */
1085 $el = $this->insertElement($token);
1086
1087 /* Add that element to the list of active formatting
1088 elements. */
1089 $this->a_formatting[] = $el;
1090 break;
1091
1092 // another diversion
1093
1094 /* A start tag token whose tag name is "button" */
1095 case 'button':
1096 /* If the stack of open elements has a button element in scope,
1097 then this is a parse error; act as if an end tag with the tag
1098 name "button" had been seen, then reprocess the token. (We don't
1099 do that. Unnecessary.) (I hope you're right! -- ezyang) */
1100 if($this->elementInScope('button')) {
1101 $this->emitToken(array(
1102 'name' => 'button',
1103 'type' => HTML5_Tokenizer::ENDTAG
1104 ));
1105 }
1106
1107 /* Reconstruct the active formatting elements, if any. */
1108 $this->reconstructActiveFormattingElements();
1109
1110 /* Insert an HTML element for the token. */
1111 $this->insertElement($token);
1112
1113 /* Insert a marker at the end of the list of active
1114 formatting elements. */
1115 $this->a_formatting[] = self::MARKER;
1116
1117 $this->flag_frameset_ok = false;
1118 break;
1119
1120 case 'applet': case 'marquee': case 'object':
1121 /* Reconstruct the active formatting elements, if any. */
1122 $this->reconstructActiveFormattingElements();
1123
1124 /* Insert an HTML element for the token. */
1125 $this->insertElement($token);
1126
1127 /* Insert a marker at the end of the list of active
1128 formatting elements. */
1129 $this->a_formatting[] = self::MARKER;
1130
1131 $this->flag_frameset_ok = false;
1132 break;
1133
1134 // spec diversion
1135
1136 /* A start tag whose tag name is "table" */
1137 case 'table':
1138 /* If the Document is not set to quirks mode, and the
1139 * stack of open elements has a p element in scope, then
1140 * act as if an end tag with the tag name "p" had been
1141 * seen. */
1142 if($this->quirks_mode !== self::QUIRKS_MODE &&
1143 $this->elementInScope('p')) {
1144 $this->emitToken(array(
1145 'name' => 'p',
1146 'type' => HTML5_Tokenizer::ENDTAG
1147 ));
1148 }
1149
1150 /* Insert an HTML element for the token. */
1151 $this->insertElement($token);
1152
1153 $this->flag_frameset_ok = false;
1154
1155 /* Change the insertion mode to "in table". */
1156 $this->mode = self::IN_TABLE;
1157 break;
1158
1159 /* A start tag whose tag name is one of: "area", "basefont",
1160 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */
1161 case 'area': case 'basefont': case 'bgsound': case 'br':
1162 case 'embed': case 'img': case 'input': case 'keygen': case 'spacer':
1163 case 'wbr':
1164 /* Reconstruct the active formatting elements, if any. */
1165 $this->reconstructActiveFormattingElements();
1166
1167 /* Insert an HTML element for the token. */
1168 $this->insertElement($token);
1169
1170 /* Immediately pop the current node off the stack of open elements. */
1171 array_pop($this->stack);
1172
1173 // YYY: Acknowledge the token's self-closing flag, if it is set.
1174
1175 $this->flag_frameset_ok = false;
1176 break;
1177
1178 case 'param': case 'source':
1179 /* Insert an HTML element for the token. */
1180 $this->insertElement($token);
1181
1182 /* Immediately pop the current node off the stack of open elements. */
1183 array_pop($this->stack);
1184
1185 // YYY: Acknowledge the token's self-closing flag, if it is set.
1186 break;
1187
1188 /* A start tag whose tag name is "hr" */
1189 case 'hr':
1190 /* If the stack of open elements has a p element in scope,
1191 then act as if an end tag with the tag name p had been seen. */
1192 if($this->elementInScope('p')) {
1193 $this->emitToken(array(
1194 'name' => 'p',
1195 'type' => HTML5_Tokenizer::ENDTAG
1196 ));
1197 }
1198
1199 /* Insert an HTML element for the token. */
1200 $this->insertElement($token);
1201
1202 /* Immediately pop the current node off the stack of open elements. */
1203 array_pop($this->stack);
1204
1205 // YYY: Acknowledge the token's self-closing flag, if it is set.
1206
1207 $this->flag_frameset_ok = false;
1208 break;
1209
1210 /* A start tag whose tag name is "image" */
1211 case 'image':
1212 /* Parse error. Change the token's tag name to "img" and
1213 reprocess it. (Don't ask.) */
1214 $token['name'] = 'img';
1215 $this->emitToken($token);
1216 break;
1217
1218 /* A start tag whose tag name is "isindex" */
1219 case 'isindex':
1220 /* Parse error. */
1221
1222 /* If the form element pointer is not null,
1223 then ignore the token. */
1224 if($this->form_pointer === null) {
1225 /* Act as if a start tag token with the tag name "form" had
1226 been seen. */
1227 /* If the token has an attribute called "action", set
1228 * the action attribute on the resulting form
1229 * element to the value of the "action" attribute of
1230 * the token. */
1231 $attr = array();
1232 $action = $this->getAttr($token, 'action');
1233 if ($action !== false) {
1234 $attr[] = array('name' => 'action', 'value' => $action);
1235 }
1236 $this->emitToken(array(
1237 'name' => 'form',
1238 'type' => HTML5_Tokenizer::STARTTAG,
1239 'attr' => $attr
1240 ));
1241
1242 /* Act as if a start tag token with the tag name "hr" had
1243 been seen. */
1244 $this->emitToken(array(
1245 'name' => 'hr',
1246 'type' => HTML5_Tokenizer::STARTTAG,
1247 'attr' => array()
1248 ));
1249
1250 /* Act as if a start tag token with the tag name "label"
1251 had been seen. */
1252 $this->emitToken(array(
1253 'name' => 'label',
1254 'type' => HTML5_Tokenizer::STARTTAG,
1255 'attr' => array()
1256 ));
1257
1258 /* Act as if a stream of character tokens had been seen. */
1259 $prompt = $this->getAttr($token, 'prompt');
1260 if ($prompt === false) {
1261 $prompt = 'This is a searchable index. '.
1262 'Insert your search keywords here: ';
1263 }
1264 $this->emitToken(array(
1265 'data' => $prompt,
1266 'type' => HTML5_Tokenizer::CHARACTER,
1267 ));
1268
1269 /* Act as if a start tag token with the tag name "input"
1270 had been seen, with all the attributes from the "isindex"
1271 token, except with the "name" attribute set to the value
1272 "isindex" (ignoring any explicit "name" attribute). */
1273 $attr = array();
1274 foreach ($token['attr'] as $keypair) {
1275 if ($keypair['name'] === 'name' || $keypair['name'] === 'action' ||
1276 $keypair['name'] === 'prompt') continue;
1277 $attr[] = $keypair;
1278 }
1279 $attr[] = array('name' => 'name', 'value' => 'isindex');
1280
1281 $this->emitToken(array(
1282 'name' => 'input',
1283 'type' => HTML5_Tokenizer::STARTTAG,
1284 'attr' => $attr
1285 ));
1286
1287 /* Act as if an end tag token with the tag name "label"
1288 had been seen. */
1289 $this->emitToken(array(
1290 'name' => 'label',
1291 'type' => HTML5_Tokenizer::ENDTAG
1292 ));
1293
1294 /* Act as if a start tag token with the tag name "hr" had
1295 been seen. */
1296 $this->emitToken(array(
1297 'name' => 'hr',
1298 'type' => HTML5_Tokenizer::STARTTAG
1299 ));
1300
1301 /* Act as if an end tag token with the tag name "form" had
1302 been seen. */
1303 $this->emitToken(array(
1304 'name' => 'form',
1305 'type' => HTML5_Tokenizer::ENDTAG
1306 ));
1307 } else {
1308 $this->ignored = true;
1309 }
1310 break;
1311
1312 /* A start tag whose tag name is "textarea" */
1313 case 'textarea':
1314 $this->insertElement($token);
1315
1316 /* If the next token is a U+000A LINE FEED (LF)
1317 * character token, then ignore that token and move on to
1318 * the next one. (Newlines at the start of textarea
1319 * elements are ignored as an authoring convenience.)
1320 * need flag, see also <pre> */
1321 $this->ignore_lf_token = 2;
1322
1323 $this->original_mode = $this->mode;
1324 $this->flag_frameset_ok = false;
1325 $this->mode = self::IN_CDATA_RCDATA;
1326
1327 /* Switch the tokeniser's content model flag to the
1328 RCDATA state. */
1329 $this->content_model = HTML5_Tokenizer::RCDATA;
1330 break;
1331
1332 /* A start tag token whose tag name is "xmp" */
1333 case 'xmp':
1334 /* If the stack of open elements has a p element in
1335 scope, then act as if an end tag with the tag name
1336 "p" has been seen. */
1337 if ($this->elementInScope('p')) {
1338 $this->emitToken(array(
1339 'name' => 'p',
1340 'type' => HTML5_Tokenizer::ENDTAG
1341 ));
1342 }
1343
1344 /* Reconstruct the active formatting elements, if any. */
1345 $this->reconstructActiveFormattingElements();
1346
1347 $this->flag_frameset_ok = false;
1348
1349 $this->insertCDATAElement($token);
1350 break;
1351
1352 case 'iframe':
1353 $this->flag_frameset_ok = false;
1354 $this->insertCDATAElement($token);
1355 break;
1356
1357 case 'noembed': case 'noscript':
1358 // XSCRIPT: should check scripting flag
1359 $this->insertCDATAElement($token);
1360 break;
1361
1362 /* A start tag whose tag name is "select" */
1363 case 'select':
1364 /* Reconstruct the active formatting elements, if any. */
1365 $this->reconstructActiveFormattingElements();
1366
1367 /* Insert an HTML element for the token. */
1368 $this->insertElement($token);
1369
1370 $this->flag_frameset_ok = false;
1371
1372 /* If the insertion mode is one of in table", "in caption",
1373 * "in column group", "in table body", "in row", or "in
1374 * cell", then switch the insertion mode to "in select in
1375 * table". Otherwise, switch the insertion mode to "in
1376 * select". */
1377 if (
1378 $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION ||
1379 $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY ||
1380 $this->mode === self::IN_ROW || $this->mode === self::IN_CELL
1381 ) {
1382 $this->mode = self::IN_SELECT_IN_TABLE;
1383 } else {
1384 $this->mode = self::IN_SELECT;
1385 }
1386 break;
1387
1388 case 'option': case 'optgroup':
1389 if ($this->elementInScope('option')) {
1390 $this->emitToken(array(
1391 'name' => 'option',
1392 'type' => HTML5_Tokenizer::ENDTAG,
1393 ));
1394 }
1395 $this->reconstructActiveFormattingElements();
1396 $this->insertElement($token);
1397 break;
1398
1399 case 'rp': case 'rt':
1400 /* If the stack of open elements has a ruby element in scope, then generate
1401 * implied end tags. If the current node is not then a ruby element, this is
1402 * a parse error; pop all the nodes from the current node up to the node
1403 * immediately before the bottommost ruby element on the stack of open elements.
1404 */
1405 if ($this->elementInScope('ruby')) {
1406 $this->generateImpliedEndTags();
1407 }
1408 $peek = false;
1409 do {
1410 if ($peek) {
1411 // parse error
1412 }
1413 $peek = array_pop($this->stack);
1414 } while ($peek->tagName !== 'ruby');
1415 $this->stack[] = $peek; // we popped one too many
1416 $this->insertElement($token);
1417 break;
1418
1419 // spec diversion
1420
1421 case 'math':
1422 $this->reconstructActiveFormattingElements();
1423 $token = $this->adjustMathMLAttributes($token);
1424 $token = $this->adjustForeignAttributes($token);
1425 $this->insertForeignElement($token, self::NS_MATHML);
1426 if (isset($token['self-closing'])) {
1427 // XERROR: acknowledge the token's self-closing flag
1428 array_pop($this->stack);
1429 }
1430 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1431 $this->secondary_mode = $this->mode;
1432 $this->mode = self::IN_FOREIGN_CONTENT;
1433 }
1434 break;
1435
1436 case 'svg':
1437 $this->reconstructActiveFormattingElements();
1438 $token = $this->adjustSVGAttributes($token);
1439 $token = $this->adjustForeignAttributes($token);
1440 $this->insertForeignElement($token, self::NS_SVG);
1441 if (isset($token['self-closing'])) {
1442 // XERROR: acknowledge the token's self-closing flag
1443 array_pop($this->stack);
1444 }
1445 if ($this->mode !== self::IN_FOREIGN_CONTENT) {
1446 $this->secondary_mode = $this->mode;
1447 $this->mode = self::IN_FOREIGN_CONTENT;
1448 }
1449 break;
1450
1451 case 'caption': case 'col': case 'colgroup': case 'frame': case 'head':
1452 case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr':
1453 // parse error
1454 break;
1455
1456 /* A start tag token not covered by the previous entries */
1457 default:
1458 /* Reconstruct the active formatting elements, if any. */
1459 $this->reconstructActiveFormattingElements();
1460
1461 $this->insertElement($token);
1462 /* This element will be a phrasing element. */
1463 break;
1464 }
1465 break;
1466
1467 case HTML5_Tokenizer::ENDTAG:
1468 switch($token['name']) {
1469 /* An end tag with the tag name "body" */
1470 case 'body':
1471 /* If the stack of open elements does not have a body
1472 * element in scope, this is a parse error; ignore the
1473 * token. */
1474 if(!$this->elementInScope('body')) {
1475 $this->ignored = true;
1476
1477 /* Otherwise, if there is a node in the stack of open
1478 * elements that is not either a dc element, a dd element,
1479 * a ds element, a dt element, an li element, an optgroup
1480 * element, an option element, a p element, an rp element,
1481 * an rt element, a tbody element, a td element, a tfoot
1482 * element, a th element, a thead element, a tr element,
1483 * the body element, or the html element, then this is a
1484 * parse error.
1485 */
1486 } else {
1487 // XERROR: implement this check for parse error
1488 }
1489
1490 /* Change the insertion mode to "after body". */
1491 $this->mode = self::AFTER_BODY;
1492 break;
1493
1494 /* An end tag with the tag name "html" */
1495 case 'html':
1496 /* Act as if an end tag with tag name "body" had been seen,
1497 then, if that token wasn't ignored, reprocess the current
1498 token. */
1499 $this->emitToken(array(
1500 'name' => 'body',
1501 'type' => HTML5_Tokenizer::ENDTAG
1502 ));
1503
1504 if (!$this->ignored) $this->emitToken($token);
1505 break;
1506
1507 case 'address': case 'article': case 'aside': case 'blockquote':
1508 case 'center': case 'datagrid': case 'details': case 'dir':
1509 case 'div': case 'dl': case 'fieldset': case 'footer':
1510 case 'header': case 'hgroup': case 'listing': case 'menu':
1511 case 'nav': case 'ol': case 'pre': case 'section': case 'ul':
1512 /* If the stack of open elements has an element in scope
1513 with the same tag name as that of the token, then generate
1514 implied end tags. */
1515 if($this->elementInScope($token['name'])) {
1516 $this->generateImpliedEndTags();
1517
1518 /* Now, if the current node is not an element with
1519 the same tag name as that of the token, then this
1520 is a parse error. */
1521 // XERROR: implement parse error logic
1522
1523 /* If the stack of open elements has an element in
1524 scope with the same tag name as that of the token,
1525 then pop elements from this stack until an element
1526 with that tag name has been popped from the stack. */
1527 do {
1528 $node = array_pop($this->stack);
1529 } while ($node->tagName !== $token['name']);
1530 } else {
1531 // parse error
1532 }
1533 break;
1534
1535 /* An end tag whose tag name is "form" */
1536 case 'form':
1537 /* Let node be the element that the form element pointer is set to. */
1538 $node = $this->form_pointer;
1539 /* Set the form element pointer to null. */
1540 $this->form_pointer = null;
1541 /* If node is null or the stack of open elements does not
1542 * have node in scope, then this is a parse error; ignore the token. */
1543 if ($node === null || !in_array($node, $this->stack)) {
1544 // parse error
1545 $this->ignored = true;
1546 } else {
1547 /* 1. Generate implied end tags. */
1548 $this->generateImpliedEndTags();
1549 /* 2. If the current node is not node, then this is a parse error. */
1550 if (end($this->stack) !== $node) {
1551 // parse error
1552 }
1553 /* 3. Remove node from the stack of open elements. */
1554 array_splice($this->stack, array_search($node, $this->stack, true), 1);
1555 }
1556
1557 break;
1558
1559 /* An end tag whose tag name is "p" */
1560 case 'p':
1561 /* If the stack of open elements has a p element in scope,
1562 then generate implied end tags, except for p elements. */
1563 if($this->elementInScope('p')) {
1564 /* Generate implied end tags, except for elements with
1565 * the same tag name as the token. */
1566 $this->generateImpliedEndTags(array('p'));
1567
1568 /* If the current node is not a p element, then this is
1569 a parse error. */
1570 // XERROR: implement
1571
1572 /* Pop elements from the stack of open elements until
1573 * an element with the same tag name as the token has
1574 * been popped from the stack. */
1575 do {
1576 $node = array_pop($this->stack);
1577 } while ($node->tagName !== 'p');
1578
1579 } else {
1580 // parse error
1581 $this->emitToken(array(
1582 'name' => 'p',
1583 'type' => HTML5_Tokenizer::STARTTAG,
1584 ));
1585 $this->emitToken($token);
1586 }
1587 break;
1588
1589 /* An end tag whose tag name is "li" */
1590 case 'li':
1591 /* If the stack of open elements does not have an element
1592 * in list item scope with the same tag name as that of the
1593 * token, then this is a parse error; ignore the token. */
1594 if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) {
1595 /* Generate implied end tags, except for elements with the
1596 * same tag name as the token. */
1597 $this->generateImpliedEndTags(array($token['name']));
1598 /* If the current node is not an element with the same tag
1599 * name as that of the token, then this is a parse error. */
1600 // XERROR: parse error
1601 /* Pop elements from the stack of open elements until an
1602 * element with the same tag name as the token has been
1603 * popped from the stack. */
1604 do {
1605 $node = array_pop($this->stack);
1606 } while ($node->tagName !== $token['name']);
1607 } else {
1608 // XERROR: parse error
1609 }
1610 break;
1611
1612 /* An end tag whose tag name is "dc", "dd", "ds", "dt" */
1613 case 'dc': case 'dd': case 'ds': case 'dt':
1614 if($this->elementInScope($token['name'])) {
1615 $this->generateImpliedEndTags(array($token['name']));
1616
1617 /* If the current node is not an element with the same
1618 tag name as the token, then this is a parse error. */
1619 // XERROR: implement parse error
1620
1621 /* Pop elements from the stack of open elements until
1622 * an element with the same tag name as the token has
1623 * been popped from the stack. */
1624 do {
1625 $node = array_pop($this->stack);
1626 } while ($node->tagName !== $token['name']);
1627
1628 } else {
1629 // XERROR: parse error
1630 }
1631 break;
1632
1633 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",
1634 "h5", "h6" */
1635 case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6':
1636 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');
1637
1638 /* If the stack of open elements has in scope an element whose
1639 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then
1640 generate implied end tags. */
1641 if($this->elementInScope($elements)) {
1642 $this->generateImpliedEndTags();
1643
1644 /* Now, if the current node is not an element with the same
1645 tag name as that of the token, then this is a parse error. */
1646 // XERROR: implement parse error
1647
1648 /* If the stack of open elements has in scope an element
1649 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or
1650 "h6", then pop elements from the stack until an element
1651 with one of those tag names has been popped from the stack. */
1652 do {
1653 $node = array_pop($this->stack);
1654 } while (!in_array($node->tagName, $elements));
1655 } else {
1656 // parse error
1657 }
1658 break;
1659
1660 /* An end tag whose tag name is one of: "a", "b", "big", "em",
1661 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */
1662 case 'a': case 'b': case 'big': case 'code': case 'em': case 'font':
1663 case 'i': case 'nobr': case 's': case 'small': case 'strike':
1664 case 'strong': case 'tt': case 'u':
1665 // XERROR: generally speaking this needs parse error logic
1666 /* 1. Let the formatting element be the last element in
1667 the list of active formatting elements that:
1668 * is between the end of the list and the last scope
1669 marker in the list, if any, or the start of the list
1670 otherwise, and
1671 * has the same tag name as the token.
1672 */
1673 while(true) {
1674 for($a = count($this->a_formatting) - 1; $a >= 0; $a--) {
1675 if($this->a_formatting[$a] === self::MARKER) {
1676 break;
1677
1678 } elseif($this->a_formatting[$a]->tagName === $token['name']) {
1679 $formatting_element = $this->a_formatting[$a];
1680 $in_stack = in_array($formatting_element, $this->stack, true);
1681 $fe_af_pos = $a;
1682 break;
1683 }
1684 }
1685
1686 /* If there is no such node, or, if that node is
1687 also in the stack of open elements but the element
1688 is not in scope, then this is a parse error. Abort
1689 these steps. The token is ignored. */
1690 if(!isset($formatting_element) || ($in_stack &&
1691 !$this->elementInScope($token['name']))) {
1692 $this->ignored = true;
1693 break;
1694
1695 /* Otherwise, if there is such a node, but that node
1696 is not in the stack of open elements, then this is a
1697 parse error; remove the element from the list, and
1698 abort these steps. */
1699 } elseif(isset($formatting_element) && !$in_stack) {
1700 unset($this->a_formatting[$fe_af_pos]);
1701 $this->a_formatting = array_merge($this->a_formatting);
1702 break;
1703 }
1704
1705 /* Otherwise, there is a formatting element and that
1706 * element is in the stack and is in scope. If the
1707 * element is not the current node, this is a parse
1708 * error. In any case, proceed with the algorithm as
1709 * written in the following steps. */
1710 // XERROR: implement me
1711
1712 /* 2. Let the furthest block be the topmost node in the
1713 stack of open elements that is lower in the stack
1714 than the formatting element, and is not an element in
1715 the phrasing or formatting categories. There might
1716 not be one. */
1717 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1718 $length = count($this->stack);
1719
1720 for($s = $fe_s_pos + 1; $s < $length; $s++) {
1721 $category = $this->getElementCategory($this->stack[$s]);
1722
1723 if($category !== self::PHRASING && $category !== self::FORMATTING) {
1724 $furthest_block = $this->stack[$s];
1725 break;
1726 }
1727 }
1728
1729 /* 3. If there is no furthest block, then the UA must
1730 skip the subsequent steps and instead just pop all
1731 the nodes from the bottom of the stack of open
1732 elements, from the current node up to the formatting
1733 element, and remove the formatting element from the
1734 list of active formatting elements. */
1735 if(!isset($furthest_block)) {
1736 for($n = $length - 1; $n >= $fe_s_pos; $n--) {
1737 array_pop($this->stack);
1738 }
1739
1740 unset($this->a_formatting[$fe_af_pos]);
1741 $this->a_formatting = array_merge($this->a_formatting);
1742 break;
1743 }
1744
1745 /* 4. Let the common ancestor be the element
1746 immediately above the formatting element in the stack
1747 of open elements. */
1748 $common_ancestor = $this->stack[$fe_s_pos - 1];
1749
1750 /* 5. Let a bookmark note the position of the
1751 formatting element in the list of active formatting
1752 elements relative to the elements on either side
1753 of it in the list. */
1754 $bookmark = $fe_af_pos;
1755
1756 /* 6. Let node and last node be the furthest block.
1757 Follow these steps: */
1758 $node = $furthest_block;
1759 $last_node = $furthest_block;
1760
1761 while(true) {
1762 for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {
1763 /* 6.1 Let node be the element immediately
1764 prior to node in the stack of open elements. */
1765 $node = $this->stack[$n];
1766
1767 /* 6.2 If node is not in the list of active
1768 formatting elements, then remove node from
1769 the stack of open elements and then go back
1770 to step 1. */
1771 if(!in_array($node, $this->a_formatting, true)) {
1772 array_splice($this->stack, $n, 1);
1773
1774 } else {
1775 break;
1776 }
1777 }
1778
1779 /* 6.3 Otherwise, if node is the formatting
1780 element, then go to the next step in the overall
1781 algorithm. */
1782 if($node === $formatting_element) {
1783 break;
1784
1785 /* 6.4 Otherwise, if last node is the furthest
1786 block, then move the aforementioned bookmark to
1787 be immediately after the node in the list of
1788 active formatting elements. */
1789 } elseif($last_node === $furthest_block) {
1790 $bookmark = array_search($node, $this->a_formatting, true) + 1;
1791 }
1792
1793 /* 6.5 Create an element for the token for which
1794 * the element node was created, replace the entry
1795 * for node in the list of active formatting
1796 * elements with an entry for the new element,
1797 * replace the entry for node in the stack of open
1798 * elements with an entry for the new element, and
1799 * let node be the new element. */
1800 // we don't know what the token is anymore
1801 // XDOM
1802 $clone = $node->cloneNode();
1803 $a_pos = array_search($node, $this->a_formatting, true);
1804 $s_pos = array_search($node, $this->stack, true);
1805 $this->a_formatting[$a_pos] = $clone;
1806 $this->stack[$s_pos] = $clone;
1807 $node = $clone;
1808
1809 /* 6.6 Insert last node into node, first removing
1810 it from its previous parent node if any. */
1811 // XDOM
1812 if($last_node->parentNode !== null) {
1813 $last_node->parentNode->removeChild($last_node);
1814 }
1815
1816 // XDOM
1817 $node->appendChild($last_node);
1818
1819 /* 6.7 Let last node be node. */
1820 $last_node = $node;
1821
1822 /* 6.8 Return to step 1 of this inner set of steps. */
1823 }
1824
1825 /* 7. If the common ancestor node is a table, tbody,
1826 * tfoot, thead, or tr element, then, foster parent
1827 * whatever last node ended up being in the previous
1828 * step, first removing it from its previous parent
1829 * node if any. */
1830 // XDOM
1831 if ($last_node->parentNode) { // common step
1832 $last_node->parentNode->removeChild($last_node);
1833 }
1834 if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
1835 $this->fosterParent($last_node);
1836 /* Otherwise, append whatever last node ended up being
1837 * in the previous step to the common ancestor node,
1838 * first removing it from its previous parent node if
1839 * any. */
1840 } else {
1841 // XDOM
1842 $common_ancestor->appendChild($last_node);
1843 }
1844
1845 /* 8. Create an element for the token for which the
1846 * formatting element was created. */
1847 // XDOM
1848 $clone = $formatting_element->cloneNode();
1849
1850 /* 9. Take all of the child nodes of the furthest
1851 block and append them to the element created in the
1852 last step. */
1853 // XDOM
1854 while($furthest_block->hasChildNodes()) {
1855 $child = $furthest_block->firstChild;
1856 $furthest_block->removeChild($child);
1857 $clone->appendChild($child);
1858 }
1859
1860 /* 10. Append that clone to the furthest block. */
1861 // XDOM
1862 $furthest_block->appendChild($clone);
1863
1864 /* 11. Remove the formatting element from the list
1865 of active formatting elements, and insert the new element
1866 into the list of active formatting elements at the
1867 position of the aforementioned bookmark. */
1868 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);
1869 array_splice($this->a_formatting, $fe_af_pos, 1);
1870
1871 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);
1872 $af_part2 = array_slice($this->a_formatting, $bookmark);
1873 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);
1874
1875 /* 12. Remove the formatting element from the stack
1876 of open elements, and insert the new element into the stack
1877 of open elements immediately below the position of the
1878 furthest block in that stack. */
1879 $fe_s_pos = array_search($formatting_element, $this->stack, true);
1880 array_splice($this->stack, $fe_s_pos, 1);
1881
1882 $fb_s_pos = array_search($furthest_block, $this->stack, true);
1883 $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1);
1884 $s_part2 = array_slice($this->stack, $fb_s_pos + 1);
1885 $this->stack = array_merge($s_part1, array($clone), $s_part2);
1886
1887 /* 13. Jump back to step 1 in this series of steps. */
1888 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);
1889 }
1890 break;
1891
1892 case 'applet': case 'button': case 'marquee': case 'object':
1893 /* If the stack of open elements has an element in scope whose
1894 tag name matches the tag name of the token, then generate implied
1895 tags. */
1896 if($this->elementInScope($token['name'])) {
1897 $this->generateImpliedEndTags();
1898
1899 /* Now, if the current node is not an element with the same
1900 tag name as the token, then this is a parse error. */
1901 // XERROR: implement logic
1902
1903 /* Pop elements from the stack of open elements until
1904 * an element with the same tag name as the token has
1905 * been popped from the stack. */
1906 do {
1907 $node = array_pop($this->stack);
1908 } while ($node->tagName !== $token['name']);
1909
1910 /* Clear the list of active formatting elements up to the
1911 * last marker. */
1912 $keys = array_keys($this->a_formatting, self::MARKER, true);
1913 $marker = end($keys);
1914
1915 for($n = count($this->a_formatting) - 1; $n > $marker; $n--) {
1916 array_pop($this->a_formatting);
1917 }
1918 } else {
1919 // parse error
1920 }
1921 break;
1922
1923 case 'br':
1924 // Parse error
1925 $this->emitToken(array(
1926 'name' => 'br',
1927 'type' => HTML5_Tokenizer::STARTTAG,
1928 ));
1929 break;
1930
1931 /* An end tag token not covered by the previous entries */
1932 default:
1933 for($n = count($this->stack) - 1; $n >= 0; $n--) {
1934 /* Initialise node to be the current node (the bottommost
1935 node of the stack). */
1936 $node = $this->stack[$n];
1937
1938 /* If node has the same tag name as the end tag token,
1939 then: */
1940 if($token['name'] === $node->tagName) {
1941 /* Generate implied end tags. */
1942 $this->generateImpliedEndTags();
1943
1944 /* If the tag name of the end tag token does not
1945 match the tag name of the current node, this is a
1946 parse error. */
1947 // XERROR: implement this
1948
1949 /* Pop all the nodes from the current node up to
1950 node, including node, then stop these steps. */
1951 // XSKETCHY
1952 do {
1953 $pop = array_pop($this->stack);
1954 } while ($pop !== $node);
1955 break;
1956
1957 } else {
1958 $category = $this->getElementCategory($node);
1959
1960 if($category !== self::FORMATTING && $category !== self::PHRASING) {
1961 /* Otherwise, if node is in neither the formatting
1962 category nor the phrasing category, then this is a
1963 parse error. Stop this algorithm. The end tag token
1964 is ignored. */
1965 $this->ignored = true;
1966 break;
1967 // parse error
1968 }
1969 }
1970 /* Set node to the previous entry in the stack of open elements. Loop. */
1971 }
1972 break;
1973 }
1974 break;
1975 }
1976 break;
1977
1978 case self::IN_CDATA_RCDATA:
1979 if (
1980 $token['type'] === HTML5_Tokenizer::CHARACTER ||
1981 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
1982 ) {
1983 $this->insertText($token['data']);
1984 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
1985 // parse error
1986 /* If the current node is a script element, mark the script
1987 * element as "already executed". */
1988 // probably not necessary
1989 array_pop($this->stack);
1990 $this->mode = $this->original_mode;
1991 $this->emitToken($token);
1992 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') {
1993 array_pop($this->stack);
1994 $this->mode = $this->original_mode;
1995 // we're ignoring all of the execution stuff
1996 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) {
1997 array_pop($this->stack);
1998 $this->mode = $this->original_mode;
1999 }
2000 break;
2001
2002 case self::IN_TABLE:
2003 $clear = array('html', 'table');
2004
2005 /* A character token */
2006 if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2007 $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2008 /* Let the pending table character tokens
2009 * be an empty list of tokens. */
2010 $this->pendingTableCharacters = "";
2011 $this->pendingTableCharactersDirty = false;
2012 /* Let the original insertion mode be the current
2013 * insertion mode. */
2014 $this->original_mode = $this->mode;
2015 /* Switch the insertion mode to
2016 * "in table text" and
2017 * reprocess the token. */
2018 $this->mode = self::IN_TABLE_TEXT;
2019 $this->emitToken($token);
2020
2021 /* A comment token */
2022 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2023 /* Append a Comment node to the current node with the data
2024 attribute set to the data given in the comment token. */
2025 $this->insertComment($token['data']);
2026
2027 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2028 // parse error
2029
2030 /* A start tag whose tag name is "caption" */
2031 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2032 $token['name'] === 'caption') {
2033 /* Clear the stack back to a table context. */
2034 $this->clearStackToTableContext($clear);
2035
2036 /* Insert a marker at the end of the list of active
2037 formatting elements. */
2038 $this->a_formatting[] = self::MARKER;
2039
2040 /* Insert an HTML element for the token, then switch the
2041 insertion mode to "in caption". */
2042 $this->insertElement($token);
2043 $this->mode = self::IN_CAPTION;
2044
2045 /* A start tag whose tag name is "colgroup" */
2046 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2047 $token['name'] === 'colgroup') {
2048 /* Clear the stack back to a table context. */
2049 $this->clearStackToTableContext($clear);
2050
2051 /* Insert an HTML element for the token, then switch the
2052 insertion mode to "in column group". */
2053 $this->insertElement($token);
2054 $this->mode = self::IN_COLUMN_GROUP;
2055
2056 /* A start tag whose tag name is "col" */
2057 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2058 $token['name'] === 'col') {
2059 $this->emitToken(array(
2060 'name' => 'colgroup',
2061 'type' => HTML5_Tokenizer::STARTTAG,
2062 'attr' => array()
2063 ));
2064
2065 $this->emitToken($token);
2066
2067 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */
2068 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2069 array('tbody', 'tfoot', 'thead'))) {
2070 /* Clear the stack back to a table context. */
2071 $this->clearStackToTableContext($clear);
2072
2073 /* Insert an HTML element for the token, then switch the insertion
2074 mode to "in table body". */
2075 $this->insertElement($token);
2076 $this->mode = self::IN_TABLE_BODY;
2077
2078 /* A start tag whose tag name is one of: "td", "th", "tr" */
2079 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2080 in_array($token['name'], array('td', 'th', 'tr'))) {
2081 /* Act as if a start tag token with the tag name "tbody" had been
2082 seen, then reprocess the current token. */
2083 $this->emitToken(array(
2084 'name' => 'tbody',
2085 'type' => HTML5_Tokenizer::STARTTAG,
2086 'attr' => array()
2087 ));
2088
2089 $this->emitToken($token);
2090
2091 /* A start tag whose tag name is "table" */
2092 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2093 $token['name'] === 'table') {
2094 /* Parse error. Act as if an end tag token with the tag name "table"
2095 had been seen, then, if that token wasn't ignored, reprocess the
2096 current token. */
2097 $this->emitToken(array(
2098 'name' => 'table',
2099 'type' => HTML5_Tokenizer::ENDTAG
2100 ));
2101
2102 if (!$this->ignored) $this->emitToken($token);
2103
2104 /* An end tag whose tag name is "table" */
2105 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2106 $token['name'] === 'table') {
2107 /* If the stack of open elements does not have an element in table
2108 scope with the same tag name as the token, this is a parse error.
2109 Ignore the token. (fragment case) */
2110 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2111 $this->ignored = true;
2112
2113 /* Otherwise: */
2114 } else {
2115 do {
2116 $node = array_pop($this->stack);
2117 } while ($node->tagName !== 'table');
2118
2119 /* Reset the insertion mode appropriately. */
2120 $this->resetInsertionMode();
2121 }
2122
2123 /* An end tag whose tag name is one of: "body", "caption", "col",
2124 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2125 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2126 array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',
2127 'tfoot', 'th', 'thead', 'tr'))) {
2128 // Parse error. Ignore the token.
2129
2130 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2131 ($token['name'] === 'style' || $token['name'] === 'script')) {
2132 $this->processWithRulesFor($token, self::IN_HEAD);
2133
2134 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' &&
2135 // assignment is intentional
2136 /* If the token does not have an attribute with the name "type", or
2137 * if it does, but that attribute's value is not an ASCII
2138 * case-insensitive match for the string "hidden", then: act as
2139 * described in the "anything else" entry below. */
2140 ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') {
2141 // I.e., if its an input with the type attribute == 'hidden'
2142 /* Otherwise */
2143 // parse error
2144 $this->insertElement($token);
2145 array_pop($this->stack);
2146 } elseif ($token['type'] === HTML5_Tokenizer::EOF) {
2147 /* If the current node is not the root html element, then this is a parse error. */
2148 if (end($this->stack)->tagName !== 'html') {
2149 // Note: It can only be the current node in the fragment case.
2150 // parse error
2151 }
2152 /* Stop parsing. */
2153 /* Anything else */
2154 } else {
2155 /* Parse error. Process the token as if the insertion mode was "in
2156 body", with the following exception: */
2157
2158 $old = $this->foster_parent;
2159 $this->foster_parent = true;
2160 $this->processWithRulesFor($token, self::IN_BODY);
2161 $this->foster_parent = $old;
2162 }
2163 break;
2164
2165 case self::IN_TABLE_TEXT:
2166 /* A character token */
2167 if($token['type'] === HTML5_Tokenizer::CHARACTER) {
2168 /* Append the character token to the pending table
2169 * character tokens list. */
2170 $this->pendingTableCharacters .= $token['data'];
2171 $this->pendingTableCharactersDirty = true;
2172 } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2173 $this->pendingTableCharacters .= $token['data'];
2174 /* Anything else */
2175 } else {
2176 if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) {
2177 /* If any of the tokens in the pending table character tokens list
2178 * are character tokens that are not one of U+0009 CHARACTER
2179 * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or
2180 * U+0020 SPACE, then reprocess those character tokens using the
2181 * rules given in the "anything else" entry in the in table"
2182 * insertion mode.*/
2183 if ($this->pendingTableCharactersDirty) {
2184 /* Parse error. Process the token using the rules for the
2185 * "in body" insertion mode, except that if the current
2186 * node is a table, tbody, tfoot, thead, or tr element,
2187 * then, whenever a node would be inserted into the current
2188 * node, it must instead be foster parented. */
2189 // XERROR
2190 $old = $this->foster_parent;
2191 $this->foster_parent = true;
2192 $text_token = array(
2193 'type' => HTML5_Tokenizer::CHARACTER,
2194 'data' => $this->pendingTableCharacters,
2195 );
2196 $this->processWithRulesFor($text_token, self::IN_BODY);
2197 $this->foster_parent = $old;
2198
2199 /* Otherwise, insert the characters given by the pending table
2200 * character tokens list into the current node. */
2201 } else {
2202 $this->insertText($this->pendingTableCharacters);
2203 }
2204 $this->pendingTableCharacters = null;
2205 $this->pendingTableCharactersNull = null;
2206 }
2207
2208 /* Switch the insertion mode to the original insertion mode and
2209 * reprocess the token.
2210 */
2211 $this->mode = $this->original_mode;
2212 $this->emitToken($token);
2213 }
2214 break;
2215
2216 case self::IN_CAPTION:
2217 /* An end tag whose tag name is "caption" */
2218 if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') {
2219 /* If the stack of open elements does not have an element in table
2220 scope with the same tag name as the token, this is a parse error.
2221 Ignore the token. (fragment case) */
2222 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2223 $this->ignored = true;
2224 // Ignore
2225
2226 /* Otherwise: */
2227 } else {
2228 /* Generate implied end tags. */
2229 $this->generateImpliedEndTags();
2230
2231 /* Now, if the current node is not a caption element, then this
2232 is a parse error. */
2233 // XERROR: implement
2234
2235 /* Pop elements from this stack until a caption element has
2236 been popped from the stack. */
2237 do {
2238 $node = array_pop($this->stack);
2239 } while ($node->tagName !== 'caption');
2240
2241 /* Clear the list of active formatting elements up to the last
2242 marker. */
2243 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2244
2245 /* Switch the insertion mode to "in table". */
2246 $this->mode = self::IN_TABLE;
2247 }
2248
2249 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2250 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag
2251 name is "table" */
2252 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2253 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2254 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2255 $token['name'] === 'table')) {
2256 /* Parse error. Act as if an end tag with the tag name "caption"
2257 had been seen, then, if that token wasn't ignored, reprocess the
2258 current token. */
2259 $this->emitToken(array(
2260 'name' => 'caption',
2261 'type' => HTML5_Tokenizer::ENDTAG
2262 ));
2263
2264 if (!$this->ignored) $this->emitToken($token);
2265
2266 /* An end tag whose tag name is one of: "body", "col", "colgroup",
2267 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */
2268 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2269 array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',
2270 'thead', 'tr'))) {
2271 // Parse error. Ignore the token.
2272 $this->ignored = true;
2273
2274 /* Anything else */
2275 } else {
2276 /* Process the token as if the insertion mode was "in body". */
2277 $this->processWithRulesFor($token, self::IN_BODY);
2278 }
2279 break;
2280
2281 case self::IN_COLUMN_GROUP:
2282 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2283 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2284 or U+0020 SPACE */
2285 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2286 /* Append the character to the current node. */
2287 $this->insertText($token['data']);
2288
2289 /* A comment token */
2290 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2291 /* Append a Comment node to the current node with the data
2292 attribute set to the data given in the comment token. */
2293 $this->insertToken($token['data']);
2294
2295 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2296 // parse error
2297
2298 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2299 $this->processWithRulesFor($token, self::IN_BODY);
2300
2301 /* A start tag whose tag name is "col" */
2302 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') {
2303 /* Insert a col element for the token. Immediately pop the current
2304 node off the stack of open elements. */
2305 $this->insertElement($token);
2306 array_pop($this->stack);
2307 // XERROR: Acknowledge the token's self-closing flag, if it is set.
2308
2309 /* An end tag whose tag name is "colgroup" */
2310 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2311 $token['name'] === 'colgroup') {
2312 /* If the current node is the root html element, then this is a
2313 parse error, ignore the token. (fragment case) */
2314 if(end($this->stack)->tagName === 'html') {
2315 $this->ignored = true;
2316
2317 /* Otherwise, pop the current node (which will be a colgroup
2318 element) from the stack of open elements. Switch the insertion
2319 mode to "in table". */
2320 } else {
2321 array_pop($this->stack);
2322 $this->mode = self::IN_TABLE;
2323 }
2324
2325 /* An end tag whose tag name is "col" */
2326 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') {
2327 /* Parse error. Ignore the token. */
2328 $this->ignored = true;
2329
2330 /* An end-of-file token */
2331 /* If the current node is the root html element */
2332 } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') {
2333 /* Stop parsing */
2334
2335 /* Anything else */
2336 } else {
2337 /* Act as if an end tag with the tag name "colgroup" had been seen,
2338 and then, if that token wasn't ignored, reprocess the current token. */
2339 $this->emitToken(array(
2340 'name' => 'colgroup',
2341 'type' => HTML5_Tokenizer::ENDTAG
2342 ));
2343
2344 if (!$this->ignored) $this->emitToken($token);
2345 }
2346 break;
2347
2348 case self::IN_TABLE_BODY:
2349 $clear = array('tbody', 'tfoot', 'thead', 'html');
2350
2351 /* A start tag whose tag name is "tr" */
2352 if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') {
2353 /* Clear the stack back to a table body context. */
2354 $this->clearStackToTableContext($clear);
2355
2356 /* Insert a tr element for the token, then switch the insertion
2357 mode to "in row". */
2358 $this->insertElement($token);
2359 $this->mode = self::IN_ROW;
2360
2361 /* A start tag whose tag name is one of: "th", "td" */
2362 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2363 ($token['name'] === 'th' || $token['name'] === 'td')) {
2364 /* Parse error. Act as if a start tag with the tag name "tr" had
2365 been seen, then reprocess the current token. */
2366 $this->emitToken(array(
2367 'name' => 'tr',
2368 'type' => HTML5_Tokenizer::STARTTAG,
2369 'attr' => array()
2370 ));
2371
2372 $this->emitToken($token);
2373
2374 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2375 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2376 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2377 /* If the stack of open elements does not have an element in table
2378 scope with the same tag name as the token, this is a parse error.
2379 Ignore the token. */
2380 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2381 // Parse error
2382 $this->ignored = true;
2383
2384 /* Otherwise: */
2385 } else {
2386 /* Clear the stack back to a table body context. */
2387 $this->clearStackToTableContext($clear);
2388
2389 /* Pop the current node from the stack of open elements. Switch
2390 the insertion mode to "in table". */
2391 array_pop($this->stack);
2392 $this->mode = self::IN_TABLE;
2393 }
2394
2395 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2396 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */
2397 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2398 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) ||
2399 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2400 /* If the stack of open elements does not have a tbody, thead, or
2401 tfoot element in table scope, this is a parse error. Ignore the
2402 token. (fragment case) */
2403 if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) {
2404 // parse error
2405 $this->ignored = true;
2406
2407 /* Otherwise: */
2408 } else {
2409 /* Clear the stack back to a table body context. */
2410 $this->clearStackToTableContext($clear);
2411
2412 /* Act as if an end tag with the same tag name as the current
2413 node ("tbody", "tfoot", or "thead") had been seen, then
2414 reprocess the current token. */
2415 $this->emitToken(array(
2416 'name' => end($this->stack)->tagName,
2417 'type' => HTML5_Tokenizer::ENDTAG
2418 ));
2419
2420 $this->emitToken($token);
2421 }
2422
2423 /* An end tag whose tag name is one of: "body", "caption", "col",
2424 "colgroup", "html", "td", "th", "tr" */
2425 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2426 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) {
2427 /* Parse error. Ignore the token. */
2428 $this->ignored = true;
2429
2430 /* Anything else */
2431 } else {
2432 /* Process the token as if the insertion mode was "in table". */
2433 $this->processWithRulesFor($token, self::IN_TABLE);
2434 }
2435 break;
2436
2437 case self::IN_ROW:
2438 $clear = array('tr', 'html');
2439
2440 /* A start tag whose tag name is one of: "th", "td" */
2441 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2442 ($token['name'] === 'th' || $token['name'] === 'td')) {
2443 /* Clear the stack back to a table row context. */
2444 $this->clearStackToTableContext($clear);
2445
2446 /* Insert an HTML element for the token, then switch the insertion
2447 mode to "in cell". */
2448 $this->insertElement($token);
2449 $this->mode = self::IN_CELL;
2450
2451 /* Insert a marker at the end of the list of active formatting
2452 elements. */
2453 $this->a_formatting[] = self::MARKER;
2454
2455 /* An end tag whose tag name is "tr" */
2456 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') {
2457 /* If the stack of open elements does not have an element in table
2458 scope with the same tag name as the token, this is a parse error.
2459 Ignore the token. (fragment case) */
2460 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2461 // Ignore.
2462 $this->ignored = true;
2463
2464 /* Otherwise: */
2465 } else {
2466 /* Clear the stack back to a table row context. */
2467 $this->clearStackToTableContext($clear);
2468
2469 /* Pop the current node (which will be a tr element) from the
2470 stack of open elements. Switch the insertion mode to "in table
2471 body". */
2472 array_pop($this->stack);
2473 $this->mode = self::IN_TABLE_BODY;
2474 }
2475
2476 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2477 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */
2478 } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2479 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) ||
2480 ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) {
2481 /* Act as if an end tag with the tag name "tr" had been seen, then,
2482 if that token wasn't ignored, reprocess the current token. */
2483 $this->emitToken(array(
2484 'name' => 'tr',
2485 'type' => HTML5_Tokenizer::ENDTAG
2486 ));
2487 if (!$this->ignored) $this->emitToken($token);
2488
2489 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */
2490 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2491 in_array($token['name'], array('tbody', 'tfoot', 'thead'))) {
2492 /* If the stack of open elements does not have an element in table
2493 scope with the same tag name as the token, this is a parse error.
2494 Ignore the token. */
2495 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2496 $this->ignored = true;
2497
2498 /* Otherwise: */
2499 } else {
2500 /* Otherwise, act as if an end tag with the tag name "tr" had
2501 been seen, then reprocess the current token. */
2502 $this->emitToken(array(
2503 'name' => 'tr',
2504 'type' => HTML5_Tokenizer::ENDTAG
2505 ));
2506
2507 $this->emitToken($token);
2508 }
2509
2510 /* An end tag whose tag name is one of: "body", "caption", "col",
2511 "colgroup", "html", "td", "th" */
2512 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2513 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) {
2514 /* Parse error. Ignore the token. */
2515 $this->ignored = true;
2516
2517 /* Anything else */
2518 } else {
2519 /* Process the token as if the insertion mode was "in table". */
2520 $this->processWithRulesFor($token, self::IN_TABLE);
2521 }
2522 break;
2523
2524 case self::IN_CELL:
2525 /* An end tag whose tag name is one of: "td", "th" */
2526 if($token['type'] === HTML5_Tokenizer::ENDTAG &&
2527 ($token['name'] === 'td' || $token['name'] === 'th')) {
2528 /* If the stack of open elements does not have an element in table
2529 scope with the same tag name as that of the token, then this is a
2530 parse error and the token must be ignored. */
2531 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2532 $this->ignored = true;
2533
2534 /* Otherwise: */
2535 } else {
2536 /* Generate implied end tags, except for elements with the same
2537 tag name as the token. */
2538 $this->generateImpliedEndTags(array($token['name']));
2539
2540 /* Now, if the current node is not an element with the same tag
2541 name as the token, then this is a parse error. */
2542 // XERROR: Implement parse error code
2543
2544 /* Pop elements from this stack until an element with the same
2545 tag name as the token has been popped from the stack. */
2546 do {
2547 $node = array_pop($this->stack);
2548 } while ($node->tagName !== $token['name']);
2549
2550 /* Clear the list of active formatting elements up to the last
2551 marker. */
2552 $this->clearTheActiveFormattingElementsUpToTheLastMarker();
2553
2554 /* Switch the insertion mode to "in row". (The current node
2555 will be a tr element at this point.) */
2556 $this->mode = self::IN_ROW;
2557 }
2558
2559 /* A start tag whose tag name is one of: "caption", "col", "colgroup",
2560 "tbody", "td", "tfoot", "th", "thead", "tr" */
2561 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],
2562 array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
2563 'thead', 'tr'))) {
2564 /* If the stack of open elements does not have a td or th element
2565 in table scope, then this is a parse error; ignore the token.
2566 (fragment case) */
2567 if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
2568 // parse error
2569 $this->ignored = true;
2570
2571 /* Otherwise, close the cell (see below) and reprocess the current
2572 token. */
2573 } else {
2574 $this->closeCell();
2575 $this->emitToken($token);
2576 }
2577
2578 /* An end tag whose tag name is one of: "body", "caption", "col",
2579 "colgroup", "html" */
2580 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2581 array('body', 'caption', 'col', 'colgroup', 'html'))) {
2582 /* Parse error. Ignore the token. */
2583 $this->ignored = true;
2584
2585 /* An end tag whose tag name is one of: "table", "tbody", "tfoot",
2586 "thead", "tr" */
2587 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],
2588 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
2589 /* If the stack of open elements does not have a td or th element
2590 in table scope, then this is a parse error; ignore the token.
2591 (innerHTML case) */
2592 if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) {
2593 // Parse error
2594 $this->ignored = true;
2595
2596 /* Otherwise, close the cell (see below) and reprocess the current
2597 token. */
2598 } else {
2599 $this->closeCell();
2600 $this->emitToken($token);
2601 }
2602
2603 /* Anything else */
2604 } else {
2605 /* Process the token as if the insertion mode was "in body". */
2606 $this->processWithRulesFor($token, self::IN_BODY);
2607 }
2608 break;
2609
2610 case self::IN_SELECT:
2611 /* Handle the token as follows: */
2612
2613 /* A character token */
2614 if(
2615 $token['type'] === HTML5_Tokenizer::CHARACTER ||
2616 $token['type'] === HTML5_Tokenizer::SPACECHARACTER
2617 ) {
2618 /* Append the token's character to the current node. */
2619 $this->insertText($token['data']);
2620
2621 /* A comment token */
2622 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2623 /* Append a Comment node to the current node with the data
2624 attribute set to the data given in the comment token. */
2625 $this->insertComment($token['data']);
2626
2627 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2628 // parse error
2629
2630 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2631 $this->processWithRulesFor($token, self::INBODY);
2632
2633 /* A start tag token whose tag name is "option" */
2634 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2635 $token['name'] === 'option') {
2636 /* If the current node is an option element, act as if an end tag
2637 with the tag name "option" had been seen. */
2638 if(end($this->stack)->tagName === 'option') {
2639 $this->emitToken(array(
2640 'name' => 'option',
2641 'type' => HTML5_Tokenizer::ENDTAG
2642 ));
2643 }
2644
2645 /* Insert an HTML element for the token. */
2646 $this->insertElement($token);
2647
2648 /* A start tag token whose tag name is "optgroup" */
2649 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2650 $token['name'] === 'optgroup') {
2651 /* If the current node is an option element, act as if an end tag
2652 with the tag name "option" had been seen. */
2653 if(end($this->stack)->tagName === 'option') {
2654 $this->emitToken(array(
2655 'name' => 'option',
2656 'type' => HTML5_Tokenizer::ENDTAG
2657 ));
2658 }
2659
2660 /* If the current node is an optgroup element, act as if an end tag
2661 with the tag name "optgroup" had been seen. */
2662 if(end($this->stack)->tagName === 'optgroup') {
2663 $this->emitToken(array(
2664 'name' => 'optgroup',
2665 'type' => HTML5_Tokenizer::ENDTAG
2666 ));
2667 }
2668
2669 /* Insert an HTML element for the token. */
2670 $this->insertElement($token);
2671
2672 /* An end tag token whose tag name is "optgroup" */
2673 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2674 $token['name'] === 'optgroup') {
2675 /* First, if the current node is an option element, and the node
2676 immediately before it in the stack of open elements is an optgroup
2677 element, then act as if an end tag with the tag name "option" had
2678 been seen. */
2679 $elements_in_stack = count($this->stack);
2680
2681 if($this->stack[$elements_in_stack - 1]->tagName === 'option' &&
2682 $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') {
2683 $this->emitToken(array(
2684 'name' => 'option',
2685 'type' => HTML5_Tokenizer::ENDTAG
2686 ));
2687 }
2688
2689 /* If the current node is an optgroup element, then pop that node
2690 from the stack of open elements. Otherwise, this is a parse error,
2691 ignore the token. */
2692 if(end($this->stack)->tagName === 'optgroup') {
2693 array_pop($this->stack);
2694 } else {
2695 // parse error
2696 $this->ignored = true;
2697 }
2698
2699 /* An end tag token whose tag name is "option" */
2700 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2701 $token['name'] === 'option') {
2702 /* If the current node is an option element, then pop that node
2703 from the stack of open elements. Otherwise, this is a parse error,
2704 ignore the token. */
2705 if(end($this->stack)->tagName === 'option') {
2706 array_pop($this->stack);
2707 } else {
2708 // parse error
2709 $this->ignored = true;
2710 }
2711
2712 /* An end tag whose tag name is "select" */
2713 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2714 $token['name'] === 'select') {
2715 /* If the stack of open elements does not have an element in table
2716 scope with the same tag name as the token, this is a parse error.
2717 Ignore the token. (fragment case) */
2718 if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2719 $this->ignored = true;
2720 // parse error
2721
2722 /* Otherwise: */
2723 } else {
2724 /* Pop elements from the stack of open elements until a select
2725 element has been popped from the stack. */
2726 do {
2727 $node = array_pop($this->stack);
2728 } while ($node->tagName !== 'select');
2729
2730 /* Reset the insertion mode appropriately. */
2731 $this->resetInsertionMode();
2732 }
2733
2734 /* A start tag whose tag name is "select" */
2735 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') {
2736 /* Parse error. Act as if the token had been an end tag with the
2737 tag name "select" instead. */
2738 $this->emitToken(array(
2739 'name' => 'select',
2740 'type' => HTML5_Tokenizer::ENDTAG
2741 ));
2742
2743 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
2744 ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) {
2745 // parse error
2746 $this->emitToken(array(
2747 'name' => 'select',
2748 'type' => HTML5_Tokenizer::ENDTAG
2749 ));
2750 $this->emitToken($token);
2751
2752 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') {
2753 $this->processWithRulesFor($token, self::IN_HEAD);
2754
2755 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2756 // XERROR: If the current node is not the root html element, then this is a parse error.
2757 /* Stop parsing */
2758
2759 /* Anything else */
2760 } else {
2761 /* Parse error. Ignore the token. */
2762 $this->ignored = true;
2763 }
2764 break;
2765
2766 case self::IN_SELECT_IN_TABLE:
2767
2768 if($token['type'] === HTML5_Tokenizer::STARTTAG &&
2769 in_array($token['name'], array('caption', 'table', 'tbody',
2770 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2771 // parse error
2772 $this->emitToken(array(
2773 'name' => 'select',
2774 'type' => HTML5_Tokenizer::ENDTAG,
2775 ));
2776 $this->emitToken($token);
2777
2778 /* An end tag whose tag name is one of: "caption", "table", "tbody",
2779 "tfoot", "thead", "tr", "td", "th" */
2780 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
2781 in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) {
2782 /* Parse error. */
2783 // parse error
2784
2785 /* If the stack of open elements has an element in table scope with
2786 the same tag name as that of the token, then act as if an end tag
2787 with the tag name "select" had been seen, and reprocess the token.
2788 Otherwise, ignore the token. */
2789 if($this->elementInScope($token['name'], self::SCOPE_TABLE)) {
2790 $this->emitToken(array(
2791 'name' => 'select',
2792 'type' => HTML5_Tokenizer::ENDTAG
2793 ));
2794
2795 $this->emitToken($token);
2796 } else {
2797 $this->ignored = true;
2798 }
2799 } else {
2800 $this->processWithRulesFor($token, self::IN_SELECT);
2801 }
2802 break;
2803
2804 case self::IN_FOREIGN_CONTENT:
2805 if ($token['type'] === HTML5_Tokenizer::CHARACTER ||
2806 $token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2807 $this->insertText($token['data']);
2808 } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) {
2809 $this->insertComment($token['data']);
2810 } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2811 // XERROR: parse error
2812 } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG &&
2813 $token['name'] === 'script' && end($this->stack)->tagName === 'script' &&
2814 // XDOM
2815 end($this->stack)->namespaceURI === self::NS_SVG) {
2816 array_pop($this->stack);
2817 // a bunch of script running mumbo jumbo
2818 } elseif (
2819 ($token['type'] === HTML5_Tokenizer::STARTTAG &&
2820 ((
2821 $token['name'] !== 'mglyph' &&
2822 $token['name'] !== 'malignmark' &&
2823 // XDOM
2824 end($this->stack)->namespaceURI === self::NS_MATHML &&
2825 in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext'))
2826 ) ||
2827 (
2828 $token['name'] === 'svg' &&
2829 // XDOM
2830 end($this->stack)->namespaceURI === self::NS_MATHML &&
2831 end($this->stack)->tagName === 'annotation-xml'
2832 ) ||
2833 (
2834 // XDOM
2835 end($this->stack)->namespaceURI === self::NS_SVG &&
2836 in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title'))
2837 ) ||
2838 (
2839 // XSKETCHY && XDOM
2840 end($this->stack)->namespaceURI === self::NS_HTML
2841 ))
2842 ) || $token['type'] === HTML5_Tokenizer::ENDTAG
2843 ) {
2844 $this->processWithRulesFor($token, $this->secondary_mode);
2845 /* If, after doing so, the insertion mode is still "in foreign
2846 * content", but there is no element in scope that has a namespace
2847 * other than the HTML namespace, switch the insertion mode to the
2848 * secondary insertion mode. */
2849 if ($this->mode === self::IN_FOREIGN_CONTENT) {
2850 $found = false;
2851 // this basically duplicates elementInScope()
2852 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
2853 // XDOM
2854 $node = $this->stack[$i];
2855 if ($node->namespaceURI !== self::NS_HTML) {
2856 $found = true;
2857 break;
2858 } elseif (in_array($node->tagName, array('table', 'html',
2859 'applet', 'caption', 'td', 'th', 'button', 'marquee',
2860 'object')) || ($node->tagName === 'foreignObject' &&
2861 $node->namespaceURI === self::NS_SVG)) {
2862 break;
2863 }
2864 }
2865 if (!$found) {
2866 $this->mode = $this->secondary_mode;
2867 }
2868 }
2869 } elseif ($token['type'] === HTML5_Tokenizer::EOF || (
2870 $token['type'] === HTML5_Tokenizer::STARTTAG &&
2871 (in_array($token['name'], array('b', "big", "blockquote", "body", "br",
2872 "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",
2873 "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",
2874 "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",
2875 "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",
2876 "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') ||
2877 $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) {
2878 // XERROR: parse error
2879 do {
2880 $node = array_pop($this->stack);
2881 // XDOM
2882 } while ($node->namespaceURI !== self::NS_HTML);
2883 $this->stack[] = $node;
2884 $this->mode = $this->secondary_mode;
2885 $this->emitToken($token);
2886 } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) {
2887 static $svg_lookup = array(
2888 'altglyph' => 'altGlyph',
2889 'altglyphdef' => 'altGlyphDef',
2890 'altglyphitem' => 'altGlyphItem',
2891 'animatecolor' => 'animateColor',
2892 'animatemotion' => 'animateMotion',
2893 'animatetransform' => 'animateTransform',
2894 'clippath' => 'clipPath',
2895 'feblend' => 'feBlend',
2896 'fecolormatrix' => 'feColorMatrix',
2897 'fecomponenttransfer' => 'feComponentTransfer',
2898 'fecomposite' => 'feComposite',
2899 'feconvolvematrix' => 'feConvolveMatrix',
2900 'fediffuselighting' => 'feDiffuseLighting',
2901 'fedisplacementmap' => 'feDisplacementMap',
2902 'fedistantlight' => 'feDistantLight',
2903 'feflood' => 'feFlood',
2904 'fefunca' => 'feFuncA',
2905 'fefuncb' => 'feFuncB',
2906 'fefuncg' => 'feFuncG',
2907 'fefuncr' => 'feFuncR',
2908 'fegaussianblur' => 'feGaussianBlur',
2909 'feimage' => 'feImage',
2910 'femerge' => 'feMerge',
2911 'femergenode' => 'feMergeNode',
2912 'femorphology' => 'feMorphology',
2913 'feoffset' => 'feOffset',
2914 'fepointlight' => 'fePointLight',
2915 'fespecularlighting' => 'feSpecularLighting',
2916 'fespotlight' => 'feSpotLight',
2917 'fetile' => 'feTile',
2918 'feturbulence' => 'feTurbulence',
2919 'foreignobject' => 'foreignObject',
2920 'glyphref' => 'glyphRef',
2921 'lineargradient' => 'linearGradient',
2922 'radialgradient' => 'radialGradient',
2923 'textpath' => 'textPath',
2924 );
2925 // XDOM
2926 $current = end($this->stack);
2927 if ($current->namespaceURI === self::NS_MATHML) {
2928 $token = $this->adjustMathMLAttributes($token);
2929 }
2930 if ($current->namespaceURI === self::NS_SVG &&
2931 isset($svg_lookup[$token['name']])) {
2932 $token['name'] = $svg_lookup[$token['name']];
2933 }
2934 if ($current->namespaceURI === self::NS_SVG) {
2935 $token = $this->adjustSVGAttributes($token);
2936 }
2937 $token = $this->adjustForeignAttributes($token);
2938 $this->insertForeignElement($token, $current->namespaceURI);
2939 if (isset($token['self-closing'])) {
2940 array_pop($this->stack);
2941 // XERROR: acknowledge self-closing flag
2942 }
2943 }
2944 break;
2945
2946 case self::AFTER_BODY:
2947 /* Handle the token as follows: */
2948
2949 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2950 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2951 or U+0020 SPACE */
2952 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
2953 /* Process the token as it would be processed if the insertion mode
2954 was "in body". */
2955 $this->processWithRulesFor($token, self::IN_BODY);
2956
2957 /* A comment token */
2958 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
2959 /* Append a Comment node to the first element in the stack of open
2960 elements (the html element), with the data attribute set to the
2961 data given in the comment token. */
2962 // XDOM
2963 $comment = $this->dom->createComment($token['data']);
2964 $this->stack[0]->appendChild($comment);
2965
2966 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
2967 // parse error
2968
2969 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
2970 $this->processWithRulesFor($token, self::IN_BODY);
2971
2972 /* An end tag with the tag name "html" */
2973 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') {
2974 /* If the parser was originally created as part of the HTML
2975 * fragment parsing algorithm, this is a parse error; ignore
2976 * the token. (fragment case) */
2977 $this->ignored = true;
2978 // XERROR: implement this
2979
2980 $this->mode = self::AFTER_AFTER_BODY;
2981
2982 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
2983 /* Stop parsing */
2984
2985 /* Anything else */
2986 } else {
2987 /* Parse error. Set the insertion mode to "in body" and reprocess
2988 the token. */
2989 $this->mode = self::IN_BODY;
2990 $this->emitToken($token);
2991 }
2992 break;
2993
2994 case self::IN_FRAMESET:
2995 /* Handle the token as follows: */
2996
2997 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
2998 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
2999 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3000 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3001 /* Append the character to the current node. */
3002 $this->insertText($token['data']);
3003
3004 /* A comment token */
3005 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
3006 /* Append a Comment node to the current node with the data
3007 attribute set to the data given in the comment token. */
3008 $this->insertComment($token['data']);
3009
3010 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3011 // parse error
3012
3013 /* A start tag with the tag name "frameset" */
3014 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3015 $token['name'] === 'frameset') {
3016 $this->insertElement($token);
3017
3018 /* An end tag with the tag name "frameset" */
3019 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
3020 $token['name'] === 'frameset') {
3021 /* If the current node is the root html element, then this is a
3022 parse error; ignore the token. (fragment case) */
3023 if(end($this->stack)->tagName === 'html') {
3024 $this->ignored = true;
3025 // Parse error
3026
3027 } else {
3028 /* Otherwise, pop the current node from the stack of open
3029 elements. */
3030 array_pop($this->stack);
3031
3032 /* If the parser was not originally created as part of the HTML
3033 * fragment parsing algorithm (fragment case), and the current
3034 * node is no longer a frameset element, then switch the
3035 * insertion mode to "after frameset". */
3036 $this->mode = self::AFTER_FRAMESET;
3037 }
3038
3039 /* A start tag with the tag name "frame" */
3040 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3041 $token['name'] === 'frame') {
3042 /* Insert an HTML element for the token. */
3043 $this->insertElement($token);
3044
3045 /* Immediately pop the current node off the stack of open elements. */
3046 array_pop($this->stack);
3047
3048 // XERROR: Acknowledge the token's self-closing flag, if it is set.
3049
3050 /* A start tag with the tag name "noframes" */
3051 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3052 $token['name'] === 'noframes') {
3053 /* Process the token using the rules for the "in head" insertion mode. */
3054 $this->processwithRulesFor($token, self::IN_HEAD);
3055
3056 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3057 // XERROR: If the current node is not the root html element, then this is a parse error.
3058 /* Stop parsing */
3059 /* Anything else */
3060 } else {
3061 /* Parse error. Ignore the token. */
3062 $this->ignored = true;
3063 }
3064 break;
3065
3066 case self::AFTER_FRAMESET:
3067 /* Handle the token as follows: */
3068
3069 /* A character token that is one of one of U+0009 CHARACTER TABULATION,
3070 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),
3071 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */
3072 if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) {
3073 /* Append the character to the current node. */
3074 $this->insertText($token['data']);
3075
3076 /* A comment token */
3077 } elseif($token['type'] === HTML5_Tokenizer::COMMENT) {
3078 /* Append a Comment node to the current node with the data
3079 attribute set to the data given in the comment token. */
3080 $this->insertComment($token['data']);
3081
3082 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) {
3083 // parse error
3084
3085 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') {
3086 $this->processWithRulesFor($token, self::IN_BODY);
3087
3088 /* An end tag with the tag name "html" */
3089 } elseif($token['type'] === HTML5_Tokenizer::ENDTAG &&
3090 $token['name'] === 'html') {
3091 $this->mode = self::AFTER_AFTER_FRAMESET;
3092
3093 /* A start tag with the tag name "noframes" */
3094 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG &&
3095 $token['name'] === 'noframes') {
3096 $this->processWithRulesFor($token, self::IN_HEAD);
3097
3098 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3099 /* Stop parsing */
3100
3101 /* Anything else */
3102 } else {
3103 /* Parse error. Ignore the token. */
3104 $this->ignored = true;
3105 }
3106 break;
3107
3108 case self::AFTER_AFTER_BODY:
3109 /* A comment token */
3110 if($token['type'] === HTML5_Tokenizer::COMMENT) {
3111 /* Append a Comment node to the Document object with the data
3112 attribute set to the data given in the comment token. */
3113 // XDOM
3114 $comment = $this->dom->createComment($token['data']);
3115 $this->dom->appendChild($comment);
3116
3117 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3118 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3119 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3120 $this->processWithRulesFor($token, self::IN_BODY);
3121
3122 /* An end-of-file token */
3123 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3124 /* OMG DONE!! */
3125 } else {
3126 // parse error
3127 $this->mode = self::IN_BODY;
3128 $this->emitToken($token);
3129 }
3130 break;
3131
3132 case self::AFTER_AFTER_FRAMESET:
3133 /* A comment token */
3134 if($token['type'] === HTML5_Tokenizer::COMMENT) {
3135 /* Append a Comment node to the Document object with the data
3136 attribute set to the data given in the comment token. */
3137 // XDOM
3138 $comment = $this->dom->createComment($token['data']);
3139 $this->dom->appendChild($comment);
3140
3141 } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE ||
3142 $token['type'] === HTML5_Tokenizer::SPACECHARACTER ||
3143 ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) {
3144 $this->processWithRulesFor($token, self::IN_BODY);
3145
3146 /* An end-of-file token */
3147 } elseif($token['type'] === HTML5_Tokenizer::EOF) {
3148 /* OMG DONE!! */
3149 } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') {
3150 $this->processWithRulesFor($token, self::IN_HEAD);
3151 } else {
3152 // parse error
3153 }
3154 break;
3155 }
3156 // end funky indenting
3157 }
3158
3159 private function insertElement($token, $append = true) {
3160 $el = $this->dom->createElementNS(self::NS_HTML, $token['name']);
3161
3162 if (!empty($token['attr'])) {
3163 foreach($token['attr'] as $attr) {
3164 if(!$el->hasAttribute($attr['name'])) {
3165 $el->setAttribute($attr['name'], $attr['value']);
3166 }
3167 }
3168 }
3169 if ($append) {
3170 $this->appendToRealParent($el);
3171 $this->stack[] = $el;
3172 }
3173
3174 return $el;
3175 }
3176
3177 private function insertText($data) {
3178 if ($data === '') return;
3179 if ($this->ignore_lf_token) {
3180 if ($data[0] === "\n") {
3181 $data = substr($data, 1);
3182 if ($data === false) return;
3183 }
3184 }
3185 $text = $this->dom->createTextNode($data);
3186 $this->appendToRealParent($text);
3187 }
3188
3189 private function insertComment($data) {
3190 $comment = $this->dom->createComment($data);
3191 $this->appendToRealParent($comment);
3192 }
3193
3194 private function appendToRealParent($node) {
3195 // this is only for the foster_parent case
3196 /* If the current node is a table, tbody, tfoot, thead, or tr
3197 element, then, whenever a node would be inserted into the current
3198 node, it must instead be inserted into the foster parent element. */
3199 if(!$this->foster_parent || !in_array(end($this->stack)->tagName,
3200 array('table', 'tbody', 'tfoot', 'thead', 'tr'))) {
3201 end($this->stack)->appendChild($node);
3202 } else {
3203 $this->fosterParent($node);
3204 }
3205 }
3206
3207 private function elementInScope($el, $scope = self::SCOPE) {
3208 if(is_array($el)) {
3209 foreach($el as $element) {
3210 if($this->elementInScope($element, $scope)) {
3211 return true;
3212 }
3213 }
3214
3215 return false;
3216 }
3217
3218 $leng = count($this->stack);
3219
3220 for($n = 0; $n < $leng; $n++) {
3221 /* 1. Initialise node to be the current node (the bottommost node of
3222 the stack). */
3223 $node = $this->stack[$leng - 1 - $n];
3224
3225 if($node->tagName === $el) {
3226 /* 2. If node is the target node, terminate in a match state. */
3227 return true;
3228
3229 // We've expanded the logic for these states a little differently;
3230 // Hixie's refactoring into "specific scope" is more general, but
3231 // this "gets the job done"
3232
3233 // these are the common states for all scopes
3234 } elseif($node->tagName === 'table' || $node->tagName === 'html') {
3235 return false;
3236
3237 // these are valid for "in scope" and "in list item scope"
3238 } elseif($scope !== self::SCOPE_TABLE &&
3239 (in_array($node->tagName, array('applet', 'caption', 'td',
3240 'th', 'button', 'marquee', 'object')) ||
3241 $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) {
3242 return false;
3243
3244
3245 // these are valid for "in list item scope"
3246 } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) {
3247 return false;
3248 }
3249
3250 /* Otherwise, set node to the previous entry in the stack of open
3251 elements and return to step 2. (This will never fail, since the loop
3252 will always terminate in the previous step if the top of the stack
3253 is reached.) */
3254 }
3255 }
3256
3257 private function reconstructActiveFormattingElements() {
3258 /* 1. If there are no entries in the list of active formatting elements,
3259 then there is nothing to reconstruct; stop this algorithm. */
3260 $formatting_elements = count($this->a_formatting);
3261
3262 if($formatting_elements === 0) {
3263 return false;
3264 }
3265
3266 /* 3. Let entry be the last (most recently added) element in the list
3267 of active formatting elements. */
3268 $entry = end($this->a_formatting);
3269
3270 /* 2. If the last (most recently added) entry in the list of active
3271 formatting elements is a marker, or if it is an element that is in the
3272 stack of open elements, then there is nothing to reconstruct; stop this
3273 algorithm. */
3274 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3275 return false;
3276 }
3277
3278 for($a = $formatting_elements - 1; $a >= 0; true) {
3279 /* 4. If there are no entries before entry in the list of active
3280 formatting elements, then jump to step 8. */
3281 if($a === 0) {
3282 $step_seven = false;
3283 break;
3284 }
3285
3286 /* 5. Let entry be the entry one earlier than entry in the list of
3287 active formatting elements. */
3288 $a--;
3289 $entry = $this->a_formatting[$a];
3290
3291 /* 6. If entry is neither a marker nor an element that is also in
3292 thetack of open elements, go to step 4. */
3293 if($entry === self::MARKER || in_array($entry, $this->stack, true)) {
3294 break;
3295 }
3296 }
3297
3298 while(true) {
3299 /* 7. Let entry be the element one later than entry in the list of
3300 active formatting elements. */
3301 if(isset($step_seven) && $step_seven === true) {
3302 $a++;
3303 $entry = $this->a_formatting[$a];
3304 }
3305
3306 /* 8. Perform a shallow clone of the element entry to obtain clone. */
3307 $clone = $entry->cloneNode();
3308
3309 /* 9. Append clone to the current node and push it onto the stack
3310 of open elements so that it is the new current node. */
3311 $this->appendToRealParent($clone);
3312 $this->stack[] = $clone;
3313
3314 /* 10. Replace the entry for entry in the list with an entry for
3315 clone. */
3316 $this->a_formatting[$a] = $clone;
3317
3318 /* 11. If the entry for clone in the list of active formatting
3319 elements is not the last entry in the list, return to step 7. */
3320 if(end($this->a_formatting) !== $clone) {
3321 $step_seven = true;
3322 } else {
3323 break;
3324 }
3325 }
3326 }
3327
3328 private function clearTheActiveFormattingElementsUpToTheLastMarker() {
3329 /* When the steps below require the UA to clear the list of active
3330 formatting elements up to the last marker, the UA must perform the
3331 following steps: */
3332
3333 while(true) {
3334 /* 1. Let entry be the last (most recently added) entry in the list
3335 of active formatting elements. */
3336 $entry = end($this->a_formatting);
3337
3338 /* 2. Remove entry from the list of active formatting elements. */
3339 array_pop($this->a_formatting);
3340
3341 /* 3. If entry was a marker, then stop the algorithm at this point.
3342 The list has been cleared up to the last marker. */
3343 if($entry === self::MARKER) {
3344 break;
3345 }
3346 }
3347 }
3348
3349 private function generateImpliedEndTags($exclude = array()) {
3350 /* When the steps below require the UA to generate implied end tags,
3351 * then, while the current node is a dc element, a dd element, a ds
3352 * element, a dt element, an li element, an option element, an optgroup
3353 * element, a p element, an rp element, or an rt element, the UA must
3354 * pop the current node off the stack of open elements. */
3355 $node = end($this->stack);
3356 $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);
3357
3358 while(in_array(end($this->stack)->tagName, $elements)) {
3359 array_pop($this->stack);
3360 }
3361 }
3362
3363 private function getElementCategory($node) {
3364 if (!is_object($node)) debug_print_backtrace();
3365 $name = $node->tagName;
3366 if(in_array($name, $this->special))
3367 return self::SPECIAL;
3368
3369 elseif(in_array($name, $this->scoping))
3370 return self::SCOPING;
3371
3372 elseif(in_array($name, $this->formatting))
3373 return self::FORMATTING;
3374
3375 else
3376 return self::PHRASING;
3377 }
3378
3379 private function clearStackToTableContext($elements) {
3380 /* When the steps above require the UA to clear the stack back to a
3381 table context, it means that the UA must, while the current node is not
3382 a table element or an html element, pop elements from the stack of open
3383 elements. */
3384 while(true) {
3385 $name = end($this->stack)->tagName;
3386
3387 if(in_array($name, $elements)) {
3388 break;
3389 } else {
3390 array_pop($this->stack);
3391 }
3392 }
3393 }
3394
3395 private function resetInsertionMode($context = null) {
3396 /* 1. Let last be false. */
3397 $last = false;
3398 $leng = count($this->stack);
3399
3400 for($n = $leng - 1; $n >= 0; $n--) {
3401 /* 2. Let node be the last node in the stack of open elements. */
3402 $node = $this->stack[$n];
3403
3404 /* 3. If node is the first node in the stack of open elements, then
3405 * set last to true and set node to the context element. (fragment
3406 * case) */
3407 if($this->stack[0]->isSameNode($node)) {
3408 $last = true;
3409 $node = $context;
3410 }
3411
3412 /* 4. If node is a select element, then switch the insertion mode to
3413 "in select" and abort these steps. (fragment case) */
3414 if($node->tagName === 'select') {
3415 $this->mode = self::IN_SELECT;
3416 break;
3417
3418 /* 5. If node is a td or th element, then switch the insertion mode
3419 to "in cell" and abort these steps. */
3420 } elseif($node->tagName === 'td' || $node->nodeName === 'th') {
3421 $this->mode = self::IN_CELL;
3422 break;
3423
3424 /* 6. If node is a tr element, then switch the insertion mode to
3425 "in row" and abort these steps. */
3426 } elseif($node->tagName === 'tr') {
3427 $this->mode = self::IN_ROW;
3428 break;
3429
3430 /* 7. If node is a tbody, thead, or tfoot element, then switch the
3431 insertion mode to "in table body" and abort these steps. */
3432 } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) {
3433 $this->mode = self::IN_TABLE_BODY;
3434 break;
3435
3436 /* 8. If node is a caption element, then switch the insertion mode
3437 to "in caption" and abort these steps. */
3438 } elseif($node->tagName === 'caption') {
3439 $this->mode = self::IN_CAPTION;
3440 break;
3441
3442 /* 9. If node is a colgroup element, then switch the insertion mode
3443 to "in column group" and abort these steps. (innerHTML case) */
3444 } elseif($node->tagName === 'colgroup') {
3445 $this->mode = self::IN_COLUMN_GROUP;
3446 break;
3447
3448 /* 10. If node is a table element, then switch the insertion mode
3449 to "in table" and abort these steps. */
3450 } elseif($node->tagName === 'table') {
3451 $this->mode = self::IN_TABLE;
3452 break;
3453
3454 /* 11. If node is an element from the MathML namespace or the SVG
3455 * namespace, then switch the insertion mode to "in foreign
3456 * content", let the secondary insertion mode be "in body", and
3457 * abort these steps. */
3458 } elseif($node->namespaceURI === self::NS_SVG ||
3459 $node->namespaceURI === self::NS_MATHML) {
3460 $this->mode = self::IN_FOREIGN_CONTENT;
3461 $this->secondary_mode = self::IN_BODY;
3462 break;
3463
3464 /* 12. If node is a head element, then switch the insertion mode
3465 to "in body" ("in body"! not "in head"!) and abort these steps.
3466 (fragment case) */
3467 } elseif($node->tagName === 'head') {
3468 $this->mode = self::IN_BODY;
3469 break;
3470
3471 /* 13. If node is a body element, then switch the insertion mode to
3472 "in body" and abort these steps. */
3473 } elseif($node->tagName === 'body') {
3474 $this->mode = self::IN_BODY;
3475 break;
3476
3477 /* 14. If node is a frameset element, then switch the insertion
3478 mode to "in frameset" and abort these steps. (fragment case) */
3479 } elseif($node->tagName === 'frameset') {
3480 $this->mode = self::IN_FRAMESET;
3481 break;
3482
3483 /* 15. If node is an html element, then: if the head element
3484 pointer is null, switch the insertion mode to "before head",
3485 otherwise, switch the insertion mode to "after head". In either
3486 case, abort these steps. (fragment case) */
3487 } elseif($node->tagName === 'html') {
3488 $this->mode = ($this->head_pointer === null)
3489 ? self::BEFORE_HEAD
3490 : self::AFTER_HEAD;
3491
3492 break;
3493
3494 /* 16. If last is true, then set the insertion mode to "in body"
3495 and abort these steps. (fragment case) */
3496 } elseif($last) {
3497 $this->mode = self::IN_BODY;
3498 break;
3499 }
3500 }
3501 }
3502
3503 private function closeCell() {
3504 /* If the stack of open elements has a td or th element in table scope,
3505 then act as if an end tag token with that tag name had been seen. */
3506 foreach(array('td', 'th') as $cell) {
3507 if($this->elementInScope($cell, self::SCOPE_TABLE)) {
3508 $this->emitToken(array(
3509 'name' => $cell,
3510 'type' => HTML5_Tokenizer::ENDTAG
3511 ));
3512
3513 break;
3514 }
3515 }
3516 }
3517
3518 private function processWithRulesFor($token, $mode) {
3519 /* "using the rules for the m insertion mode", where m is one of these
3520 * modes, the user agent must use the rules described under the m
3521 * insertion mode's section, but must leave the insertion mode
3522 * unchanged unless the rules in m themselves switch the insertion mode
3523 * to a new value. */
3524 return $this->emitToken($token, $mode);
3525 }
3526
3527 private function insertCDATAElement($token) {
3528 $this->insertElement($token);
3529 $this->original_mode = $this->mode;
3530 $this->mode = self::IN_CDATA_RCDATA;
3531 $this->content_model = HTML5_Tokenizer::CDATA;
3532 }
3533
3534 private function insertRCDATAElement($token) {
3535 $this->insertElement($token);
3536 $this->original_mode = $this->mode;
3537 $this->mode = self::IN_CDATA_RCDATA;
3538 $this->content_model = HTML5_Tokenizer::RCDATA;
3539 }
3540
3541 private function getAttr($token, $key) {
3542 if (!isset($token['attr'])) return false;
3543 $ret = false;
3544 foreach ($token['attr'] as $keypair) {
3545 if ($keypair['name'] === $key) $ret = $keypair['value'];
3546 }
3547 return $ret;
3548 }
3549
3550 private function getCurrentTable() {
3551 /* The current table is the last table element in the stack of open
3552 * elements, if there is one. If there is no table element in the stack
3553 * of open elements (fragment case), then the current table is the
3554 * first element in the stack of open elements (the html element). */
3555 for ($i = count($this->stack) - 1; $i >= 0; $i--) {
3556 if ($this->stack[$i]->tagName === 'table') {
3557 return $this->stack[$i];
3558 }
3559 }
3560 return $this->stack[0];
3561 }
3562
3563 private function getFosterParent() {
3564 /* The foster parent element is the parent element of the last
3565 table element in the stack of open elements, if there is a
3566 table element and it has such a parent element. If there is no
3567 table element in the stack of open elements (innerHTML case),
3568 then the foster parent element is the first element in the
3569 stack of open elements (the html element). Otherwise, if there
3570 is a table element in the stack of open elements, but the last
3571 table element in the stack of open elements has no parent, or
3572 its parent node is not an element, then the foster parent
3573 element is the element before the last table element in the
3574 stack of open elements. */
3575 for($n = count($this->stack) - 1; $n >= 0; $n--) {
3576 if($this->stack[$n]->tagName === 'table') {
3577 $table = $this->stack[$n];
3578 break;
3579 }
3580 }
3581
3582 if(isset($table) && $table->parentNode !== null) {
3583 return $table->parentNode;
3584
3585 } elseif(!isset($table)) {
3586 return $this->stack[0];
3587
3588 } elseif(isset($table) && ($table->parentNode === null ||
3589 $table->parentNode->nodeType !== XML_ELEMENT_NODE)) {
3590 return $this->stack[$n - 1];
3591 }
3592 }
3593
3594 public function fosterParent($node) {
3595 $foster_parent = $this->getFosterParent();
3596 $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html
3597 /* When a node node is to be foster parented, the node node must be
3598 * be inserted into the foster parent element. */
3599 /* If the foster parent element is the parent element of the last table
3600 * element in the stack of open elements, then node must be inserted
3601 * immediately before the last table element in the stack of open
3602 * elements in the foster parent element; otherwise, node must be
3603 * appended to the foster parent element. */
3604 if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) {
3605 $foster_parent->insertBefore($node, $table);
3606 } else {
3607 $foster_parent->appendChild($node);
3608 }
3609 }
3610
3611 /**
3612 * For debugging, prints the stack
3613 */
3614 private function printStack() {
3615 $names = array();
3616 foreach ($this->stack as $i => $element) {
3617 $names[] = $element->tagName;
3618 }
3619 echo " -> stack [" . implode(', ', $names) . "]\n";
3620 }
3621
3622 /**
3623 * For debugging, prints active formatting elements
3624 */
3625 private function printActiveFormattingElements() {
3626 if (!$this->a_formatting) return;
3627 $names = array();
3628 foreach ($this->a_formatting as $node) {
3629 if ($node === self::MARKER) $names[] = 'MARKER';
3630 else $names[] = $node->tagName;
3631 }
3632 echo " -> active formatting [" . implode(', ', $names) . "]\n";
3633 }
3634
3635 public function currentTableIsTainted() {
3636 return !empty($this->getCurrentTable()->tainted);
3637 }
3638
3639 /**
3640 * Sets up the tree constructor for building a fragment.
3641 */
3642 public function setupContext($context = null) {
3643 $this->fragment = true;
3644 if ($context) {
3645 $context = $this->dom->createElementNS(self::NS_HTML, $context);
3646 /* 4.1. Set the HTML parser's tokenization stage's content model
3647 * flag according to the context element, as follows: */
3648 switch ($context->tagName) {
3649 case 'title': case 'textarea':
3650 $this->content_model = HTML5_Tokenizer::RCDATA;
3651 break;
3652 case 'style': case 'script': case 'xmp': case 'iframe':
3653 case 'noembed': case 'noframes':
3654 $this->content_model = HTML5_Tokenizer::CDATA;
3655 break;
3656 case 'noscript':
3657 // XSCRIPT: assuming scripting is enabled
3658 $this->content_model = HTML5_Tokenizer::CDATA;
3659 break;
3660 case 'plaintext':
3661 $this->content_model = HTML5_Tokenizer::PLAINTEXT;
3662 break;
3663 }
3664 /* 4.2. Let root be a new html element with no attributes. */
3665 $root = $this->dom->createElementNS(self::NS_HTML, 'html');
3666 $this->root = $root;
3667 /* 4.3 Append the element root to the Document node created above. */
3668 $this->dom->appendChild($root);
3669 /* 4.4 Set up the parser's stack of open elements so that it
3670 * contains just the single element root. */
3671 $this->stack = array($root);
3672 /* 4.5 Reset the parser's insertion mode appropriately. */
3673 $this->resetInsertionMode($context);
3674 /* 4.6 Set the parser's form element pointer to the nearest node
3675 * to the context element that is a form element (going straight up
3676 * the ancestor chain, and including the element itself, if it is a
3677 * form element), or, if there is no such form element, to null. */
3678 $node = $context;
3679 do {
3680 if ($node->tagName === 'form') {
3681 $this->form_pointer = $node;
3682 break;
3683 }
3684 } while ($node = $node->parentNode);
3685 }
3686 }
3687
3688 public function adjustMathMLAttributes($token) {
3689 foreach ($token['attr'] as &$kp) {
3690 if ($kp['name'] === 'definitionurl') {
3691 $kp['name'] = 'definitionURL';
3692 }
3693 }
3694 return $token;
3695 }
3696
3697 public function adjustSVGAttributes($token) {
3698 static $lookup = array(
3699 'attributename' => 'attributeName',
3700 'attributetype' => 'attributeType',
3701 'basefrequency' => 'baseFrequency',
3702 'baseprofile' => 'baseProfile',
3703 'calcmode' => 'calcMode',
3704 'clippathunits' => 'clipPathUnits',
3705 'contentscripttype' => 'contentScriptType',
3706 'contentstyletype' => 'contentStyleType',
3707 'diffuseconstant' => 'diffuseConstant',
3708 'edgemode' => 'edgeMode',
3709 'externalresourcesrequired' => 'externalResourcesRequired',
3710 'filterres' => 'filterRes',
3711 'filterunits' => 'filterUnits',
3712 'glyphref' => 'glyphRef',
3713 'gradienttransform' => 'gradientTransform',
3714 'gradientunits' => 'gradientUnits',
3715 'kernelmatrix' => 'kernelMatrix',
3716 'kernelunitlength' => 'kernelUnitLength',
3717 'keypoints' => 'keyPoints',
3718 'keysplines' => 'keySplines',
3719 'keytimes' => 'keyTimes',
3720 'lengthadjust' => 'lengthAdjust',
3721 'limitingconeangle' => 'limitingConeAngle',
3722 'markerheight' => 'markerHeight',
3723 'markerunits' => 'markerUnits',
3724 'markerwidth' => 'markerWidth',
3725 'maskcontentunits' => 'maskContentUnits',
3726 'maskunits' => 'maskUnits',
3727 'numoctaves' => 'numOctaves',
3728 'pathlength' => 'pathLength',
3729 'patterncontentunits' => 'patternContentUnits',
3730 'patterntransform' => 'patternTransform',
3731 'patternunits' => 'patternUnits',
3732 'pointsatx' => 'pointsAtX',
3733 'pointsaty' => 'pointsAtY',
3734 'pointsatz' => 'pointsAtZ',
3735 'preservealpha' => 'preserveAlpha',
3736 'preserveaspectratio' => 'preserveAspectRatio',
3737 'primitiveunits' => 'primitiveUnits',
3738 'refx' => 'refX',
3739 'refy' => 'refY',
3740 'repeatcount' => 'repeatCount',
3741 'repeatdur' => 'repeatDur',
3742 'requiredextensions' => 'requiredExtensions',
3743 'requiredfeatures' => 'requiredFeatures',
3744 'specularconstant' => 'specularConstant',
3745 'specularexponent' => 'specularExponent',
3746 'spreadmethod' => 'spreadMethod',
3747 'startoffset' => 'startOffset',
3748 'stddeviation' => 'stdDeviation',
3749 'stitchtiles' => 'stitchTiles',
3750 'surfacescale' => 'surfaceScale',
3751 'systemlanguage' => 'systemLanguage',
3752 'tablevalues' => 'tableValues',
3753 'targetx' => 'targetX',
3754 'targety' => 'targetY',
3755 'textlength' => 'textLength',
3756 'viewbox' => 'viewBox',
3757 'viewtarget' => 'viewTarget',
3758 'xchannelselector' => 'xChannelSelector',
3759 'ychannelselector' => 'yChannelSelector',
3760 'zoomandpan' => 'zoomAndPan',
3761 );
3762 foreach ($token['attr'] as &$kp) {
3763 if (isset($lookup[$kp['name']])) {
3764 $kp['name'] = $lookup[$kp['name']];
3765 }
3766 }
3767 return $token;
3768 }
3769
3770 public function adjustForeignAttributes($token) {
3771 static $lookup = array(
3772 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),
3773 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),
3774 'xlink:href' => array('xlink', 'href', self::NS_XLINK),
3775 'xlink:role' => array('xlink', 'role', self::NS_XLINK),
3776 'xlink:show' => array('xlink', 'show', self::NS_XLINK),
3777 'xlink:title' => array('xlink', 'title', self::NS_XLINK),
3778 'xlink:type' => array('xlink', 'type', self::NS_XLINK),
3779 'xml:base' => array('xml', 'base', self::NS_XML),
3780 'xml:lang' => array('xml', 'lang', self::NS_XML),
3781 'xml:space' => array('xml', 'space', self::NS_XML),
3782 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),
3783 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),
3784 );
3785 foreach ($token['attr'] as &$kp) {
3786 if (isset($lookup[$kp['name']])) {
3787 $kp['name'] = $lookup[$kp['name']];
3788 }
3789 }
3790 return $token;
3791 }
3792
3793 public function insertForeignElement($token, $namespaceURI) {
3794 $el = $this->dom->createElementNS($namespaceURI, $token['name']);
3795 if (!empty($token['attr'])) {
3796 foreach ($token['attr'] as $kp) {
3797 $attr = $kp['name'];
3798 if (is_array($attr)) {
3799 $ns = $attr[2];
3800 $attr = $attr[1];
3801 } else {
3802 $ns = self::NS_HTML;
3803 }
3804 if (!$el->hasAttributeNS($ns, $attr)) {
3805 // XSKETCHY: work around godawful libxml bug
3806 if ($ns === self::NS_XLINK) {
3807 $el->setAttribute('xlink:'.$attr, $kp['value']);
3808 } elseif ($ns === self::NS_HTML) {
3809 // Another godawful libxml bug
3810 $el->setAttribute($attr, $kp['value']);
3811 } else {
3812 $el->setAttributeNS($ns, $attr, $kp['value']);
3813 }
3814 }
3815 }
3816 }
3817 $this->appendToRealParent($el);
3818 $this->stack[] = $el;
3819 // XERROR: see below
3820 /* If the newly created element has an xmlns attribute in the XMLNS
3821 * namespace whose value is not exactly the same as the element's
3822 * namespace, that is a parse error. Similarly, if the newly created
3823 * element has an xmlns:xlink attribute in the XMLNS namespace whose
3824 * value is not the XLink Namespace, that is a parse error. */
3825 }
3826
3827 public function save() {
3828 $this->dom->normalize();
3829 if (!$this->fragment) {
3830 return $this->dom;
3831 } else {
3832 if ($this->root) {
3833 return $this->root->childNodes;
3834 } else {
3835 return $this->dom->childNodes;
3836 }
3837 }
3838 }
3839 }
3840