]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/htmlpurifier/HTMLPurifier/Lexer/PH5P.php
remove autoload section in composer.json
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / Lexer / PH5P.php
CommitLineData
d4949327
NL
1<?php\r
2\r
3/**\r
4 * Experimental HTML5-based parser using Jeroen van der Meer's PH5P library.\r
5 * Occupies space in the HTML5 pseudo-namespace, which may cause conflicts.\r
6 *\r
7 * @note\r
8 * Recent changes to PHP's DOM extension have resulted in some fatal\r
9 * error conditions with the original version of PH5P. Pending changes,\r
10 * this lexer will punt to DirectLex if DOM throws an exception.\r
11 */\r
12\r
13class HTMLPurifier_Lexer_PH5P extends HTMLPurifier_Lexer_DOMLex\r
14{\r
15 /**\r
16 * @param string $html\r
17 * @param HTMLPurifier_Config $config\r
18 * @param HTMLPurifier_Context $context\r
19 * @return HTMLPurifier_Token[]\r
20 */\r
21 public function tokenizeHTML($html, $config, $context)\r
22 {\r
23 $new_html = $this->normalize($html, $config, $context);\r
24 $new_html = $this->wrapHTML($new_html, $config, $context);\r
25 try {\r
26 $parser = new HTML5($new_html);\r
27 $doc = $parser->save();\r
28 } catch (DOMException $e) {\r
29 // Uh oh, it failed. Punt to DirectLex.\r
30 $lexer = new HTMLPurifier_Lexer_DirectLex();\r
31 $context->register('PH5PError', $e); // save the error, so we can detect it\r
32 return $lexer->tokenizeHTML($html, $config, $context); // use original HTML\r
33 }\r
34 $tokens = array();\r
35 $this->tokenizeDOM(\r
36 $doc->getElementsByTagName('html')->item(0)-> // <html>\r
37 getElementsByTagName('body')->item(0)-> // <body>\r
38 getElementsByTagName('div')->item(0) // <div>\r
39 ,\r
40 $tokens\r
41 );\r
42 return $tokens;\r
43 }\r
44}\r
45\r
46/*\r
47\r
48Copyright 2007 Jeroen van der Meer <http://jero.net/>\r
49\r
50Permission is hereby granted, free of charge, to any person obtaining a\r
51copy of this software and associated documentation files (the\r
52"Software"), to deal in the Software without restriction, including\r
53without limitation the rights to use, copy, modify, merge, publish,\r
54distribute, sublicense, and/or sell copies of the Software, and to\r
55permit persons to whom the Software is furnished to do so, subject to\r
56the following conditions:\r
57\r
58The above copyright notice and this permission notice shall be included\r
59in all copies or substantial portions of the Software.\r
60\r
61THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS\r
62OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF\r
63MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.\r
64IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY\r
65CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,\r
66TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE\r
67SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.\r
68\r
69*/\r
70\r
71class HTML5\r
72{\r
73 private $data;\r
74 private $char;\r
75 private $EOF;\r
76 private $state;\r
77 private $tree;\r
78 private $token;\r
79 private $content_model;\r
80 private $escape = false;\r
81 private $entities = array(\r
82 'AElig;',\r
83 'AElig',\r
84 'AMP;',\r
85 'AMP',\r
86 'Aacute;',\r
87 'Aacute',\r
88 'Acirc;',\r
89 'Acirc',\r
90 'Agrave;',\r
91 'Agrave',\r
92 'Alpha;',\r
93 'Aring;',\r
94 'Aring',\r
95 'Atilde;',\r
96 'Atilde',\r
97 'Auml;',\r
98 'Auml',\r
99 'Beta;',\r
100 'COPY;',\r
101 'COPY',\r
102 'Ccedil;',\r
103 'Ccedil',\r
104 'Chi;',\r
105 'Dagger;',\r
106 'Delta;',\r
107 'ETH;',\r
108 'ETH',\r
109 'Eacute;',\r
110 'Eacute',\r
111 'Ecirc;',\r
112 'Ecirc',\r
113 'Egrave;',\r
114 'Egrave',\r
115 'Epsilon;',\r
116 'Eta;',\r
117 'Euml;',\r
118 'Euml',\r
119 'GT;',\r
120 'GT',\r
121 'Gamma;',\r
122 'Iacute;',\r
123 'Iacute',\r
124 'Icirc;',\r
125 'Icirc',\r
126 'Igrave;',\r
127 'Igrave',\r
128 'Iota;',\r
129 'Iuml;',\r
130 'Iuml',\r
131 'Kappa;',\r
132 'LT;',\r
133 'LT',\r
134 'Lambda;',\r
135 'Mu;',\r
136 'Ntilde;',\r
137 'Ntilde',\r
138 'Nu;',\r
139 'OElig;',\r
140 'Oacute;',\r
141 'Oacute',\r
142 'Ocirc;',\r
143 'Ocirc',\r
144 'Ograve;',\r
145 'Ograve',\r
146 'Omega;',\r
147 'Omicron;',\r
148 'Oslash;',\r
149 'Oslash',\r
150 'Otilde;',\r
151 'Otilde',\r
152 'Ouml;',\r
153 'Ouml',\r
154 'Phi;',\r
155 'Pi;',\r
156 'Prime;',\r
157 'Psi;',\r
158 'QUOT;',\r
159 'QUOT',\r
160 'REG;',\r
161 'REG',\r
162 'Rho;',\r
163 'Scaron;',\r
164 'Sigma;',\r
165 'THORN;',\r
166 'THORN',\r
167 'TRADE;',\r
168 'Tau;',\r
169 'Theta;',\r
170 'Uacute;',\r
171 'Uacute',\r
172 'Ucirc;',\r
173 'Ucirc',\r
174 'Ugrave;',\r
175 'Ugrave',\r
176 'Upsilon;',\r
177 'Uuml;',\r
178 'Uuml',\r
179 'Xi;',\r
180 'Yacute;',\r
181 'Yacute',\r
182 'Yuml;',\r
183 'Zeta;',\r
184 'aacute;',\r
185 'aacute',\r
186 'acirc;',\r
187 'acirc',\r
188 'acute;',\r
189 'acute',\r
190 'aelig;',\r
191 'aelig',\r
192 'agrave;',\r
193 'agrave',\r
194 'alefsym;',\r
195 'alpha;',\r
196 'amp;',\r
197 'amp',\r
198 'and;',\r
199 'ang;',\r
200 'apos;',\r
201 'aring;',\r
202 'aring',\r
203 'asymp;',\r
204 'atilde;',\r
205 'atilde',\r
206 'auml;',\r
207 'auml',\r
208 'bdquo;',\r
209 'beta;',\r
210 'brvbar;',\r
211 'brvbar',\r
212 'bull;',\r
213 'cap;',\r
214 'ccedil;',\r
215 'ccedil',\r
216 'cedil;',\r
217 'cedil',\r
218 'cent;',\r
219 'cent',\r
220 'chi;',\r
221 'circ;',\r
222 'clubs;',\r
223 'cong;',\r
224 'copy;',\r
225 'copy',\r
226 'crarr;',\r
227 'cup;',\r
228 'curren;',\r
229 'curren',\r
230 'dArr;',\r
231 'dagger;',\r
232 'darr;',\r
233 'deg;',\r
234 'deg',\r
235 'delta;',\r
236 'diams;',\r
237 'divide;',\r
238 'divide',\r
239 'eacute;',\r
240 'eacute',\r
241 'ecirc;',\r
242 'ecirc',\r
243 'egrave;',\r
244 'egrave',\r
245 'empty;',\r
246 'emsp;',\r
247 'ensp;',\r
248 'epsilon;',\r
249 'equiv;',\r
250 'eta;',\r
251 'eth;',\r
252 'eth',\r
253 'euml;',\r
254 'euml',\r
255 'euro;',\r
256 'exist;',\r
257 'fnof;',\r
258 'forall;',\r
259 'frac12;',\r
260 'frac12',\r
261 'frac14;',\r
262 'frac14',\r
263 'frac34;',\r
264 'frac34',\r
265 'frasl;',\r
266 'gamma;',\r
267 'ge;',\r
268 'gt;',\r
269 'gt',\r
270 'hArr;',\r
271 'harr;',\r
272 'hearts;',\r
273 'hellip;',\r
274 'iacute;',\r
275 'iacute',\r
276 'icirc;',\r
277 'icirc',\r
278 'iexcl;',\r
279 'iexcl',\r
280 'igrave;',\r
281 'igrave',\r
282 'image;',\r
283 'infin;',\r
284 'int;',\r
285 'iota;',\r
286 'iquest;',\r
287 'iquest',\r
288 'isin;',\r
289 'iuml;',\r
290 'iuml',\r
291 'kappa;',\r
292 'lArr;',\r
293 'lambda;',\r
294 'lang;',\r
295 'laquo;',\r
296 'laquo',\r
297 'larr;',\r
298 'lceil;',\r
299 'ldquo;',\r
300 'le;',\r
301 'lfloor;',\r
302 'lowast;',\r
303 'loz;',\r
304 'lrm;',\r
305 'lsaquo;',\r
306 'lsquo;',\r
307 'lt;',\r
308 'lt',\r
309 'macr;',\r
310 'macr',\r
311 'mdash;',\r
312 'micro;',\r
313 'micro',\r
314 'middot;',\r
315 'middot',\r
316 'minus;',\r
317 'mu;',\r
318 'nabla;',\r
319 'nbsp;',\r
320 'nbsp',\r
321 'ndash;',\r
322 'ne;',\r
323 'ni;',\r
324 'not;',\r
325 'not',\r
326 'notin;',\r
327 'nsub;',\r
328 'ntilde;',\r
329 'ntilde',\r
330 'nu;',\r
331 'oacute;',\r
332 'oacute',\r
333 'ocirc;',\r
334 'ocirc',\r
335 'oelig;',\r
336 'ograve;',\r
337 'ograve',\r
338 'oline;',\r
339 'omega;',\r
340 'omicron;',\r
341 'oplus;',\r
342 'or;',\r
343 'ordf;',\r
344 'ordf',\r
345 'ordm;',\r
346 'ordm',\r
347 'oslash;',\r
348 'oslash',\r
349 'otilde;',\r
350 'otilde',\r
351 'otimes;',\r
352 'ouml;',\r
353 'ouml',\r
354 'para;',\r
355 'para',\r
356 'part;',\r
357 'permil;',\r
358 'perp;',\r
359 'phi;',\r
360 'pi;',\r
361 'piv;',\r
362 'plusmn;',\r
363 'plusmn',\r
364 'pound;',\r
365 'pound',\r
366 'prime;',\r
367 'prod;',\r
368 'prop;',\r
369 'psi;',\r
370 'quot;',\r
371 'quot',\r
372 'rArr;',\r
373 'radic;',\r
374 'rang;',\r
375 'raquo;',\r
376 'raquo',\r
377 'rarr;',\r
378 'rceil;',\r
379 'rdquo;',\r
380 'real;',\r
381 'reg;',\r
382 'reg',\r
383 'rfloor;',\r
384 'rho;',\r
385 'rlm;',\r
386 'rsaquo;',\r
387 'rsquo;',\r
388 'sbquo;',\r
389 'scaron;',\r
390 'sdot;',\r
391 'sect;',\r
392 'sect',\r
393 'shy;',\r
394 'shy',\r
395 'sigma;',\r
396 'sigmaf;',\r
397 'sim;',\r
398 'spades;',\r
399 'sub;',\r
400 'sube;',\r
401 'sum;',\r
402 'sup1;',\r
403 'sup1',\r
404 'sup2;',\r
405 'sup2',\r
406 'sup3;',\r
407 'sup3',\r
408 'sup;',\r
409 'supe;',\r
410 'szlig;',\r
411 'szlig',\r
412 'tau;',\r
413 'there4;',\r
414 'theta;',\r
415 'thetasym;',\r
416 'thinsp;',\r
417 'thorn;',\r
418 'thorn',\r
419 'tilde;',\r
420 'times;',\r
421 'times',\r
422 'trade;',\r
423 'uArr;',\r
424 'uacute;',\r
425 'uacute',\r
426 'uarr;',\r
427 'ucirc;',\r
428 'ucirc',\r
429 'ugrave;',\r
430 'ugrave',\r
431 'uml;',\r
432 'uml',\r
433 'upsih;',\r
434 'upsilon;',\r
435 'uuml;',\r
436 'uuml',\r
437 'weierp;',\r
438 'xi;',\r
439 'yacute;',\r
440 'yacute',\r
441 'yen;',\r
442 'yen',\r
443 'yuml;',\r
444 'yuml',\r
445 'zeta;',\r
446 'zwj;',\r
447 'zwnj;'\r
448 );\r
449\r
450 const PCDATA = 0;\r
451 const RCDATA = 1;\r
452 const CDATA = 2;\r
453 const PLAINTEXT = 3;\r
454\r
455 const DOCTYPE = 0;\r
456 const STARTTAG = 1;\r
457 const ENDTAG = 2;\r
458 const COMMENT = 3;\r
459 const CHARACTR = 4;\r
460 const EOF = 5;\r
461\r
462 public function __construct($data)\r
463 {\r
464 $this->data = $data;\r
465 $this->char = -1;\r
466 $this->EOF = strlen($data);\r
467 $this->tree = new HTML5TreeConstructer;\r
468 $this->content_model = self::PCDATA;\r
469\r
470 $this->state = 'data';\r
471\r
472 while ($this->state !== null) {\r
473 $this->{$this->state . 'State'}();\r
474 }\r
475 }\r
476\r
477 public function save()\r
478 {\r
479 return $this->tree->save();\r
480 }\r
481\r
482 private function char()\r
483 {\r
484 return ($this->char < $this->EOF)\r
485 ? $this->data[$this->char]\r
486 : false;\r
487 }\r
488\r
489 private function character($s, $l = 0)\r
490 {\r
491 if ($s + $l < $this->EOF) {\r
492 if ($l === 0) {\r
493 return $this->data[$s];\r
494 } else {\r
495 return substr($this->data, $s, $l);\r
496 }\r
497 }\r
498 }\r
499\r
500 private function characters($char_class, $start)\r
501 {\r
502 return preg_replace('#^([' . $char_class . ']+).*#s', '\\1', substr($this->data, $start));\r
503 }\r
504\r
505 private function dataState()\r
506 {\r
507 // Consume the next input character\r
508 $this->char++;\r
509 $char = $this->char();\r
510\r
511 if ($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) {\r
512 /* U+0026 AMPERSAND (&)\r
513 When the content model flag is set to one of the PCDATA or RCDATA\r
514 states: switch to the entity data state. Otherwise: treat it as per\r
515 the "anything else" entry below. */\r
516 $this->state = 'entityData';\r
517\r
518 } elseif ($char === '-') {\r
519 /* If the content model flag is set to either the RCDATA state or\r
520 the CDATA state, and the escape flag is false, and there are at\r
521 least three characters before this one in the input stream, and the\r
522 last four characters in the input stream, including this one, are\r
523 U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS,\r
524 and U+002D HYPHEN-MINUS ("<!--"), then set the escape flag to true. */\r
525 if (($this->content_model === self::RCDATA || $this->content_model ===\r
526 self::CDATA) && $this->escape === false &&\r
527 $this->char >= 3 && $this->character($this->char - 4, 4) === '<!--'\r
528 ) {\r
529 $this->escape = true;\r
530 }\r
531\r
532 /* In any case, emit the input character as a character token. Stay\r
533 in the data state. */\r
534 $this->emitToken(\r
535 array(\r
536 'type' => self::CHARACTR,\r
537 'data' => $char\r
538 )\r
539 );\r
540\r
541 /* U+003C LESS-THAN SIGN (<) */\r
542 } elseif ($char === '<' && ($this->content_model === self::PCDATA ||\r
543 (($this->content_model === self::RCDATA ||\r
544 $this->content_model === self::CDATA) && $this->escape === false))\r
545 ) {\r
546 /* When the content model flag is set to the PCDATA state: switch\r
547 to the tag open state.\r
548\r
549 When the content model flag is set to either the RCDATA state or\r
550 the CDATA state and the escape flag is false: switch to the tag\r
551 open state.\r
552\r
553 Otherwise: treat it as per the "anything else" entry below. */\r
554 $this->state = 'tagOpen';\r
555\r
556 /* U+003E GREATER-THAN SIGN (>) */\r
557 } elseif ($char === '>') {\r
558 /* If the content model flag is set to either the RCDATA state or\r
559 the CDATA state, and the escape flag is true, and the last three\r
560 characters in the input stream including this one are U+002D\r
561 HYPHEN-MINUS, U+002D HYPHEN-MINUS, U+003E GREATER-THAN SIGN ("-->"),\r
562 set the escape flag to false. */\r
563 if (($this->content_model === self::RCDATA ||\r
564 $this->content_model === self::CDATA) && $this->escape === true &&\r
565 $this->character($this->char, 3) === '-->'\r
566 ) {\r
567 $this->escape = false;\r
568 }\r
569\r
570 /* In any case, emit the input character as a character token.\r
571 Stay in the data state. */\r
572 $this->emitToken(\r
573 array(\r
574 'type' => self::CHARACTR,\r
575 'data' => $char\r
576 )\r
577 );\r
578\r
579 } elseif ($this->char === $this->EOF) {\r
580 /* EOF\r
581 Emit an end-of-file token. */\r
582 $this->EOF();\r
583\r
584 } elseif ($this->content_model === self::PLAINTEXT) {\r
585 /* When the content model flag is set to the PLAINTEXT state\r
586 THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of\r
587 the text and emit it as a character token. */\r
588 $this->emitToken(\r
589 array(\r
590 'type' => self::CHARACTR,\r
591 'data' => substr($this->data, $this->char)\r
592 )\r
593 );\r
594\r
595 $this->EOF();\r
596\r
597 } else {\r
598 /* Anything else\r
599 THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that\r
600 otherwise would also be treated as a character token and emit it\r
601 as a single character token. Stay in the data state. */\r
602 $len = strcspn($this->data, '<&', $this->char);\r
603 $char = substr($this->data, $this->char, $len);\r
604 $this->char += $len - 1;\r
605\r
606 $this->emitToken(\r
607 array(\r
608 'type' => self::CHARACTR,\r
609 'data' => $char\r
610 )\r
611 );\r
612\r
613 $this->state = 'data';\r
614 }\r
615 }\r
616\r
617 private function entityDataState()\r
618 {\r
619 // Attempt to consume an entity.\r
620 $entity = $this->entity();\r
621\r
622 // If nothing is returned, emit a U+0026 AMPERSAND character token.\r
623 // Otherwise, emit the character token that was returned.\r
624 $char = (!$entity) ? '&' : $entity;\r
625 $this->emitToken(\r
626 array(\r
627 'type' => self::CHARACTR,\r
628 'data' => $char\r
629 )\r
630 );\r
631\r
632 // Finally, switch to the data state.\r
633 $this->state = 'data';\r
634 }\r
635\r
636 private function tagOpenState()\r
637 {\r
638 switch ($this->content_model) {\r
639 case self::RCDATA:\r
640 case self::CDATA:\r
641 /* If the next input character is a U+002F SOLIDUS (/) character,\r
642 consume it and switch to the close tag open state. If the next\r
643 input character is not a U+002F SOLIDUS (/) character, emit a\r
644 U+003C LESS-THAN SIGN character token and switch to the data\r
645 state to process the next input character. */\r
646 if ($this->character($this->char + 1) === '/') {\r
647 $this->char++;\r
648 $this->state = 'closeTagOpen';\r
649\r
650 } else {\r
651 $this->emitToken(\r
652 array(\r
653 'type' => self::CHARACTR,\r
654 'data' => '<'\r
655 )\r
656 );\r
657\r
658 $this->state = 'data';\r
659 }\r
660 break;\r
661\r
662 case self::PCDATA:\r
663 // If the content model flag is set to the PCDATA state\r
664 // Consume the next input character:\r
665 $this->char++;\r
666 $char = $this->char();\r
667\r
668 if ($char === '!') {\r
669 /* U+0021 EXCLAMATION MARK (!)\r
670 Switch to the markup declaration open state. */\r
671 $this->state = 'markupDeclarationOpen';\r
672\r
673 } elseif ($char === '/') {\r
674 /* U+002F SOLIDUS (/)\r
675 Switch to the close tag open state. */\r
676 $this->state = 'closeTagOpen';\r
677\r
678 } elseif (preg_match('/^[A-Za-z]$/', $char)) {\r
679 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z\r
680 Create a new start tag token, set its tag name to the lowercase\r
681 version of the input character (add 0x0020 to the character's code\r
682 point), then switch to the tag name state. (Don't emit the token\r
683 yet; further details will be filled in before it is emitted.) */\r
684 $this->token = array(\r
685 'name' => strtolower($char),\r
686 'type' => self::STARTTAG,\r
687 'attr' => array()\r
688 );\r
689\r
690 $this->state = 'tagName';\r
691\r
692 } elseif ($char === '>') {\r
693 /* U+003E GREATER-THAN SIGN (>)\r
694 Parse error. Emit a U+003C LESS-THAN SIGN character token and a\r
695 U+003E GREATER-THAN SIGN character token. Switch to the data state. */\r
696 $this->emitToken(\r
697 array(\r
698 'type' => self::CHARACTR,\r
699 'data' => '<>'\r
700 )\r
701 );\r
702\r
703 $this->state = 'data';\r
704\r
705 } elseif ($char === '?') {\r
706 /* U+003F QUESTION MARK (?)\r
707 Parse error. Switch to the bogus comment state. */\r
708 $this->state = 'bogusComment';\r
709\r
710 } else {\r
711 /* Anything else\r
712 Parse error. Emit a U+003C LESS-THAN SIGN character token and\r
713 reconsume the current input character in the data state. */\r
714 $this->emitToken(\r
715 array(\r
716 'type' => self::CHARACTR,\r
717 'data' => '<'\r
718 )\r
719 );\r
720\r
721 $this->char--;\r
722 $this->state = 'data';\r
723 }\r
724 break;\r
725 }\r
726 }\r
727\r
728 private function closeTagOpenState()\r
729 {\r
730 $next_node = strtolower($this->characters('A-Za-z', $this->char + 1));\r
731 $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName;\r
732\r
733 if (($this->content_model === self::RCDATA || $this->content_model === self::CDATA) &&\r
734 (!$the_same || ($the_same && (!preg_match(\r
735 '/[\t\n\x0b\x0c >\/]/',\r
736 $this->character($this->char + 1 + strlen($next_node))\r
737 ) || $this->EOF === $this->char)))\r
738 ) {\r
739 /* If the content model flag is set to the RCDATA or CDATA states then\r
740 examine the next few characters. If they do not match the tag name of\r
741 the last start tag token emitted (case insensitively), or if they do but\r
742 they are not immediately followed by one of the following characters:\r
743 * U+0009 CHARACTER TABULATION\r
744 * U+000A LINE FEED (LF)\r
745 * U+000B LINE TABULATION\r
746 * U+000C FORM FEED (FF)\r
747 * U+0020 SPACE\r
748 * U+003E GREATER-THAN SIGN (>)\r
749 * U+002F SOLIDUS (/)\r
750 * EOF\r
751 ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character\r
752 token, a U+002F SOLIDUS character token, and switch to the data state\r
753 to process the next input character. */\r
754 $this->emitToken(\r
755 array(\r
756 'type' => self::CHARACTR,\r
757 'data' => '</'\r
758 )\r
759 );\r
760\r
761 $this->state = 'data';\r
762\r
763 } else {\r
764 /* Otherwise, if the content model flag is set to the PCDATA state,\r
765 or if the next few characters do match that tag name, consume the\r
766 next input character: */\r
767 $this->char++;\r
768 $char = $this->char();\r
769\r
770 if (preg_match('/^[A-Za-z]$/', $char)) {\r
771 /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z\r
772 Create a new end tag token, set its tag name to the lowercase version\r
773 of the input character (add 0x0020 to the character's code point), then\r
774 switch to the tag name state. (Don't emit the token yet; further details\r
775 will be filled in before it is emitted.) */\r
776 $this->token = array(\r
777 'name' => strtolower($char),\r
778 'type' => self::ENDTAG\r
779 );\r
780\r
781 $this->state = 'tagName';\r
782\r
783 } elseif ($char === '>') {\r
784 /* U+003E GREATER-THAN SIGN (>)\r
785 Parse error. Switch to the data state. */\r
786 $this->state = 'data';\r
787\r
788 } elseif ($this->char === $this->EOF) {\r
789 /* EOF\r
790 Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F\r
791 SOLIDUS character token. Reconsume the EOF character in the data state. */\r
792 $this->emitToken(\r
793 array(\r
794 'type' => self::CHARACTR,\r
795 'data' => '</'\r
796 )\r
797 );\r
798\r
799 $this->char--;\r
800 $this->state = 'data';\r
801\r
802 } else {\r
803 /* Parse error. Switch to the bogus comment state. */\r
804 $this->state = 'bogusComment';\r
805 }\r
806 }\r
807 }\r
808\r
809 private function tagNameState()\r
810 {\r
811 // Consume the next input character:\r
812 $this->char++;\r
813 $char = $this->character($this->char);\r
814\r
815 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
816 /* U+0009 CHARACTER TABULATION\r
817 U+000A LINE FEED (LF)\r
818 U+000B LINE TABULATION\r
819 U+000C FORM FEED (FF)\r
820 U+0020 SPACE\r
821 Switch to the before attribute name state. */\r
822 $this->state = 'beforeAttributeName';\r
823\r
824 } elseif ($char === '>') {\r
825 /* U+003E GREATER-THAN SIGN (>)\r
826 Emit the current tag token. Switch to the data state. */\r
827 $this->emitToken($this->token);\r
828 $this->state = 'data';\r
829\r
830 } elseif ($this->char === $this->EOF) {\r
831 /* EOF\r
832 Parse error. Emit the current tag token. Reconsume the EOF\r
833 character in the data state. */\r
834 $this->emitToken($this->token);\r
835\r
836 $this->char--;\r
837 $this->state = 'data';\r
838\r
839 } elseif ($char === '/') {\r
840 /* U+002F SOLIDUS (/)\r
841 Parse error unless this is a permitted slash. Switch to the before\r
842 attribute name state. */\r
843 $this->state = 'beforeAttributeName';\r
844\r
845 } else {\r
846 /* Anything else\r
847 Append the current input character to the current tag token's tag name.\r
848 Stay in the tag name state. */\r
849 $this->token['name'] .= strtolower($char);\r
850 $this->state = 'tagName';\r
851 }\r
852 }\r
853\r
854 private function beforeAttributeNameState()\r
855 {\r
856 // Consume the next input character:\r
857 $this->char++;\r
858 $char = $this->character($this->char);\r
859\r
860 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
861 /* U+0009 CHARACTER TABULATION\r
862 U+000A LINE FEED (LF)\r
863 U+000B LINE TABULATION\r
864 U+000C FORM FEED (FF)\r
865 U+0020 SPACE\r
866 Stay in the before attribute name state. */\r
867 $this->state = 'beforeAttributeName';\r
868\r
869 } elseif ($char === '>') {\r
870 /* U+003E GREATER-THAN SIGN (>)\r
871 Emit the current tag token. Switch to the data state. */\r
872 $this->emitToken($this->token);\r
873 $this->state = 'data';\r
874\r
875 } elseif ($char === '/') {\r
876 /* U+002F SOLIDUS (/)\r
877 Parse error unless this is a permitted slash. Stay in the before\r
878 attribute name state. */\r
879 $this->state = 'beforeAttributeName';\r
880\r
881 } elseif ($this->char === $this->EOF) {\r
882 /* EOF\r
883 Parse error. Emit the current tag token. Reconsume the EOF\r
884 character in the data state. */\r
885 $this->emitToken($this->token);\r
886\r
887 $this->char--;\r
888 $this->state = 'data';\r
889\r
890 } else {\r
891 /* Anything else\r
892 Start a new attribute in the current tag token. Set that attribute's\r
893 name to the current input character, and its value to the empty string.\r
894 Switch to the attribute name state. */\r
895 $this->token['attr'][] = array(\r
896 'name' => strtolower($char),\r
897 'value' => null\r
898 );\r
899\r
900 $this->state = 'attributeName';\r
901 }\r
902 }\r
903\r
904 private function attributeNameState()\r
905 {\r
906 // Consume the next input character:\r
907 $this->char++;\r
908 $char = $this->character($this->char);\r
909\r
910 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
911 /* U+0009 CHARACTER TABULATION\r
912 U+000A LINE FEED (LF)\r
913 U+000B LINE TABULATION\r
914 U+000C FORM FEED (FF)\r
915 U+0020 SPACE\r
916 Stay in the before attribute name state. */\r
917 $this->state = 'afterAttributeName';\r
918\r
919 } elseif ($char === '=') {\r
920 /* U+003D EQUALS SIGN (=)\r
921 Switch to the before attribute value state. */\r
922 $this->state = 'beforeAttributeValue';\r
923\r
924 } elseif ($char === '>') {\r
925 /* U+003E GREATER-THAN SIGN (>)\r
926 Emit the current tag token. Switch to the data state. */\r
927 $this->emitToken($this->token);\r
928 $this->state = 'data';\r
929\r
930 } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {\r
931 /* U+002F SOLIDUS (/)\r
932 Parse error unless this is a permitted slash. Switch to the before\r
933 attribute name state. */\r
934 $this->state = 'beforeAttributeName';\r
935\r
936 } elseif ($this->char === $this->EOF) {\r
937 /* EOF\r
938 Parse error. Emit the current tag token. Reconsume the EOF\r
939 character in the data state. */\r
940 $this->emitToken($this->token);\r
941\r
942 $this->char--;\r
943 $this->state = 'data';\r
944\r
945 } else {\r
946 /* Anything else\r
947 Append the current input character to the current attribute's name.\r
948 Stay in the attribute name state. */\r
949 $last = count($this->token['attr']) - 1;\r
950 $this->token['attr'][$last]['name'] .= strtolower($char);\r
951\r
952 $this->state = 'attributeName';\r
953 }\r
954 }\r
955\r
956 private function afterAttributeNameState()\r
957 {\r
958 // Consume the next input character:\r
959 $this->char++;\r
960 $char = $this->character($this->char);\r
961\r
962 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
963 /* U+0009 CHARACTER TABULATION\r
964 U+000A LINE FEED (LF)\r
965 U+000B LINE TABULATION\r
966 U+000C FORM FEED (FF)\r
967 U+0020 SPACE\r
968 Stay in the after attribute name state. */\r
969 $this->state = 'afterAttributeName';\r
970\r
971 } elseif ($char === '=') {\r
972 /* U+003D EQUALS SIGN (=)\r
973 Switch to the before attribute value state. */\r
974 $this->state = 'beforeAttributeValue';\r
975\r
976 } elseif ($char === '>') {\r
977 /* U+003E GREATER-THAN SIGN (>)\r
978 Emit the current tag token. Switch to the data state. */\r
979 $this->emitToken($this->token);\r
980 $this->state = 'data';\r
981\r
982 } elseif ($char === '/' && $this->character($this->char + 1) !== '>') {\r
983 /* U+002F SOLIDUS (/)\r
984 Parse error unless this is a permitted slash. Switch to the\r
985 before attribute name state. */\r
986 $this->state = 'beforeAttributeName';\r
987\r
988 } elseif ($this->char === $this->EOF) {\r
989 /* EOF\r
990 Parse error. Emit the current tag token. Reconsume the EOF\r
991 character in the data state. */\r
992 $this->emitToken($this->token);\r
993\r
994 $this->char--;\r
995 $this->state = 'data';\r
996\r
997 } else {\r
998 /* Anything else\r
999 Start a new attribute in the current tag token. Set that attribute's\r
1000 name to the current input character, and its value to the empty string.\r
1001 Switch to the attribute name state. */\r
1002 $this->token['attr'][] = array(\r
1003 'name' => strtolower($char),\r
1004 'value' => null\r
1005 );\r
1006\r
1007 $this->state = 'attributeName';\r
1008 }\r
1009 }\r
1010\r
1011 private function beforeAttributeValueState()\r
1012 {\r
1013 // Consume the next input character:\r
1014 $this->char++;\r
1015 $char = $this->character($this->char);\r
1016\r
1017 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
1018 /* U+0009 CHARACTER TABULATION\r
1019 U+000A LINE FEED (LF)\r
1020 U+000B LINE TABULATION\r
1021 U+000C FORM FEED (FF)\r
1022 U+0020 SPACE\r
1023 Stay in the before attribute value state. */\r
1024 $this->state = 'beforeAttributeValue';\r
1025\r
1026 } elseif ($char === '"') {\r
1027 /* U+0022 QUOTATION MARK (")\r
1028 Switch to the attribute value (double-quoted) state. */\r
1029 $this->state = 'attributeValueDoubleQuoted';\r
1030\r
1031 } elseif ($char === '&') {\r
1032 /* U+0026 AMPERSAND (&)\r
1033 Switch to the attribute value (unquoted) state and reconsume\r
1034 this input character. */\r
1035 $this->char--;\r
1036 $this->state = 'attributeValueUnquoted';\r
1037\r
1038 } elseif ($char === '\'') {\r
1039 /* U+0027 APOSTROPHE (')\r
1040 Switch to the attribute value (single-quoted) state. */\r
1041 $this->state = 'attributeValueSingleQuoted';\r
1042\r
1043 } elseif ($char === '>') {\r
1044 /* U+003E GREATER-THAN SIGN (>)\r
1045 Emit the current tag token. Switch to the data state. */\r
1046 $this->emitToken($this->token);\r
1047 $this->state = 'data';\r
1048\r
1049 } else {\r
1050 /* Anything else\r
1051 Append the current input character to the current attribute's value.\r
1052 Switch to the attribute value (unquoted) state. */\r
1053 $last = count($this->token['attr']) - 1;\r
1054 $this->token['attr'][$last]['value'] .= $char;\r
1055\r
1056 $this->state = 'attributeValueUnquoted';\r
1057 }\r
1058 }\r
1059\r
1060 private function attributeValueDoubleQuotedState()\r
1061 {\r
1062 // Consume the next input character:\r
1063 $this->char++;\r
1064 $char = $this->character($this->char);\r
1065\r
1066 if ($char === '"') {\r
1067 /* U+0022 QUOTATION MARK (")\r
1068 Switch to the before attribute name state. */\r
1069 $this->state = 'beforeAttributeName';\r
1070\r
1071 } elseif ($char === '&') {\r
1072 /* U+0026 AMPERSAND (&)\r
1073 Switch to the entity in attribute value state. */\r
1074 $this->entityInAttributeValueState('double');\r
1075\r
1076 } elseif ($this->char === $this->EOF) {\r
1077 /* EOF\r
1078 Parse error. Emit the current tag token. Reconsume the character\r
1079 in the data state. */\r
1080 $this->emitToken($this->token);\r
1081\r
1082 $this->char--;\r
1083 $this->state = 'data';\r
1084\r
1085 } else {\r
1086 /* Anything else\r
1087 Append the current input character to the current attribute's value.\r
1088 Stay in the attribute value (double-quoted) state. */\r
1089 $last = count($this->token['attr']) - 1;\r
1090 $this->token['attr'][$last]['value'] .= $char;\r
1091\r
1092 $this->state = 'attributeValueDoubleQuoted';\r
1093 }\r
1094 }\r
1095\r
1096 private function attributeValueSingleQuotedState()\r
1097 {\r
1098 // Consume the next input character:\r
1099 $this->char++;\r
1100 $char = $this->character($this->char);\r
1101\r
1102 if ($char === '\'') {\r
1103 /* U+0022 QUOTATION MARK (')\r
1104 Switch to the before attribute name state. */\r
1105 $this->state = 'beforeAttributeName';\r
1106\r
1107 } elseif ($char === '&') {\r
1108 /* U+0026 AMPERSAND (&)\r
1109 Switch to the entity in attribute value state. */\r
1110 $this->entityInAttributeValueState('single');\r
1111\r
1112 } elseif ($this->char === $this->EOF) {\r
1113 /* EOF\r
1114 Parse error. Emit the current tag token. Reconsume the character\r
1115 in the data state. */\r
1116 $this->emitToken($this->token);\r
1117\r
1118 $this->char--;\r
1119 $this->state = 'data';\r
1120\r
1121 } else {\r
1122 /* Anything else\r
1123 Append the current input character to the current attribute's value.\r
1124 Stay in the attribute value (single-quoted) state. */\r
1125 $last = count($this->token['attr']) - 1;\r
1126 $this->token['attr'][$last]['value'] .= $char;\r
1127\r
1128 $this->state = 'attributeValueSingleQuoted';\r
1129 }\r
1130 }\r
1131\r
1132 private function attributeValueUnquotedState()\r
1133 {\r
1134 // Consume the next input character:\r
1135 $this->char++;\r
1136 $char = $this->character($this->char);\r
1137\r
1138 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
1139 /* U+0009 CHARACTER TABULATION\r
1140 U+000A LINE FEED (LF)\r
1141 U+000B LINE TABULATION\r
1142 U+000C FORM FEED (FF)\r
1143 U+0020 SPACE\r
1144 Switch to the before attribute name state. */\r
1145 $this->state = 'beforeAttributeName';\r
1146\r
1147 } elseif ($char === '&') {\r
1148 /* U+0026 AMPERSAND (&)\r
1149 Switch to the entity in attribute value state. */\r
1150 $this->entityInAttributeValueState();\r
1151\r
1152 } elseif ($char === '>') {\r
1153 /* U+003E GREATER-THAN SIGN (>)\r
1154 Emit the current tag token. Switch to the data state. */\r
1155 $this->emitToken($this->token);\r
1156 $this->state = 'data';\r
1157\r
1158 } else {\r
1159 /* Anything else\r
1160 Append the current input character to the current attribute's value.\r
1161 Stay in the attribute value (unquoted) state. */\r
1162 $last = count($this->token['attr']) - 1;\r
1163 $this->token['attr'][$last]['value'] .= $char;\r
1164\r
1165 $this->state = 'attributeValueUnquoted';\r
1166 }\r
1167 }\r
1168\r
1169 private function entityInAttributeValueState()\r
1170 {\r
1171 // Attempt to consume an entity.\r
1172 $entity = $this->entity();\r
1173\r
1174 // If nothing is returned, append a U+0026 AMPERSAND character to the\r
1175 // current attribute's value. Otherwise, emit the character token that\r
1176 // was returned.\r
1177 $char = (!$entity)\r
1178 ? '&'\r
1179 : $entity;\r
1180\r
1181 $last = count($this->token['attr']) - 1;\r
1182 $this->token['attr'][$last]['value'] .= $char;\r
1183 }\r
1184\r
1185 private function bogusCommentState()\r
1186 {\r
1187 /* Consume every character up to the first U+003E GREATER-THAN SIGN\r
1188 character (>) or the end of the file (EOF), whichever comes first. Emit\r
1189 a comment token whose data is the concatenation of all the characters\r
1190 starting from and including the character that caused the state machine\r
1191 to switch into the bogus comment state, up to and including the last\r
1192 consumed character before the U+003E character, if any, or up to the\r
1193 end of the file otherwise. (If the comment was started by the end of\r
1194 the file (EOF), the token is empty.) */\r
1195 $data = $this->characters('^>', $this->char);\r
1196 $this->emitToken(\r
1197 array(\r
1198 'data' => $data,\r
1199 'type' => self::COMMENT\r
1200 )\r
1201 );\r
1202\r
1203 $this->char += strlen($data);\r
1204\r
1205 /* Switch to the data state. */\r
1206 $this->state = 'data';\r
1207\r
1208 /* If the end of the file was reached, reconsume the EOF character. */\r
1209 if ($this->char === $this->EOF) {\r
1210 $this->char = $this->EOF - 1;\r
1211 }\r
1212 }\r
1213\r
1214 private function markupDeclarationOpenState()\r
1215 {\r
1216 /* If the next two characters are both U+002D HYPHEN-MINUS (-)\r
1217 characters, consume those two characters, create a comment token whose\r
1218 data is the empty string, and switch to the comment state. */\r
1219 if ($this->character($this->char + 1, 2) === '--') {\r
1220 $this->char += 2;\r
1221 $this->state = 'comment';\r
1222 $this->token = array(\r
1223 'data' => null,\r
1224 'type' => self::COMMENT\r
1225 );\r
1226\r
1227 /* Otherwise if the next seven chacacters are a case-insensitive match\r
1228 for the word "DOCTYPE", then consume those characters and switch to the\r
1229 DOCTYPE state. */\r
1230 } elseif (strtolower($this->character($this->char + 1, 7)) === 'doctype') {\r
1231 $this->char += 7;\r
1232 $this->state = 'doctype';\r
1233\r
1234 /* Otherwise, is is a parse error. Switch to the bogus comment state.\r
1235 The next character that is consumed, if any, is the first character\r
1236 that will be in the comment. */\r
1237 } else {\r
1238 $this->char++;\r
1239 $this->state = 'bogusComment';\r
1240 }\r
1241 }\r
1242\r
1243 private function commentState()\r
1244 {\r
1245 /* Consume the next input character: */\r
1246 $this->char++;\r
1247 $char = $this->char();\r
1248\r
1249 /* U+002D HYPHEN-MINUS (-) */\r
1250 if ($char === '-') {\r
1251 /* Switch to the comment dash state */\r
1252 $this->state = 'commentDash';\r
1253\r
1254 /* EOF */\r
1255 } elseif ($this->char === $this->EOF) {\r
1256 /* Parse error. Emit the comment token. Reconsume the EOF character\r
1257 in the data state. */\r
1258 $this->emitToken($this->token);\r
1259 $this->char--;\r
1260 $this->state = 'data';\r
1261\r
1262 /* Anything else */\r
1263 } else {\r
1264 /* Append the input character to the comment token's data. Stay in\r
1265 the comment state. */\r
1266 $this->token['data'] .= $char;\r
1267 }\r
1268 }\r
1269\r
1270 private function commentDashState()\r
1271 {\r
1272 /* Consume the next input character: */\r
1273 $this->char++;\r
1274 $char = $this->char();\r
1275\r
1276 /* U+002D HYPHEN-MINUS (-) */\r
1277 if ($char === '-') {\r
1278 /* Switch to the comment end state */\r
1279 $this->state = 'commentEnd';\r
1280\r
1281 /* EOF */\r
1282 } elseif ($this->char === $this->EOF) {\r
1283 /* Parse error. Emit the comment token. Reconsume the EOF character\r
1284 in the data state. */\r
1285 $this->emitToken($this->token);\r
1286 $this->char--;\r
1287 $this->state = 'data';\r
1288\r
1289 /* Anything else */\r
1290 } else {\r
1291 /* Append a U+002D HYPHEN-MINUS (-) character and the input\r
1292 character to the comment token's data. Switch to the comment state. */\r
1293 $this->token['data'] .= '-' . $char;\r
1294 $this->state = 'comment';\r
1295 }\r
1296 }\r
1297\r
1298 private function commentEndState()\r
1299 {\r
1300 /* Consume the next input character: */\r
1301 $this->char++;\r
1302 $char = $this->char();\r
1303\r
1304 if ($char === '>') {\r
1305 $this->emitToken($this->token);\r
1306 $this->state = 'data';\r
1307\r
1308 } elseif ($char === '-') {\r
1309 $this->token['data'] .= '-';\r
1310\r
1311 } elseif ($this->char === $this->EOF) {\r
1312 $this->emitToken($this->token);\r
1313 $this->char--;\r
1314 $this->state = 'data';\r
1315\r
1316 } else {\r
1317 $this->token['data'] .= '--' . $char;\r
1318 $this->state = 'comment';\r
1319 }\r
1320 }\r
1321\r
1322 private function doctypeState()\r
1323 {\r
1324 /* Consume the next input character: */\r
1325 $this->char++;\r
1326 $char = $this->char();\r
1327\r
1328 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
1329 $this->state = 'beforeDoctypeName';\r
1330\r
1331 } else {\r
1332 $this->char--;\r
1333 $this->state = 'beforeDoctypeName';\r
1334 }\r
1335 }\r
1336\r
1337 private function beforeDoctypeNameState()\r
1338 {\r
1339 /* Consume the next input character: */\r
1340 $this->char++;\r
1341 $char = $this->char();\r
1342\r
1343 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
1344 // Stay in the before DOCTYPE name state.\r
1345\r
1346 } elseif (preg_match('/^[a-z]$/', $char)) {\r
1347 $this->token = array(\r
1348 'name' => strtoupper($char),\r
1349 'type' => self::DOCTYPE,\r
1350 'error' => true\r
1351 );\r
1352\r
1353 $this->state = 'doctypeName';\r
1354\r
1355 } elseif ($char === '>') {\r
1356 $this->emitToken(\r
1357 array(\r
1358 'name' => null,\r
1359 'type' => self::DOCTYPE,\r
1360 'error' => true\r
1361 )\r
1362 );\r
1363\r
1364 $this->state = 'data';\r
1365\r
1366 } elseif ($this->char === $this->EOF) {\r
1367 $this->emitToken(\r
1368 array(\r
1369 'name' => null,\r
1370 'type' => self::DOCTYPE,\r
1371 'error' => true\r
1372 )\r
1373 );\r
1374\r
1375 $this->char--;\r
1376 $this->state = 'data';\r
1377\r
1378 } else {\r
1379 $this->token = array(\r
1380 'name' => $char,\r
1381 'type' => self::DOCTYPE,\r
1382 'error' => true\r
1383 );\r
1384\r
1385 $this->state = 'doctypeName';\r
1386 }\r
1387 }\r
1388\r
1389 private function doctypeNameState()\r
1390 {\r
1391 /* Consume the next input character: */\r
1392 $this->char++;\r
1393 $char = $this->char();\r
1394\r
1395 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
1396 $this->state = 'AfterDoctypeName';\r
1397\r
1398 } elseif ($char === '>') {\r
1399 $this->emitToken($this->token);\r
1400 $this->state = 'data';\r
1401\r
1402 } elseif (preg_match('/^[a-z]$/', $char)) {\r
1403 $this->token['name'] .= strtoupper($char);\r
1404\r
1405 } elseif ($this->char === $this->EOF) {\r
1406 $this->emitToken($this->token);\r
1407 $this->char--;\r
1408 $this->state = 'data';\r
1409\r
1410 } else {\r
1411 $this->token['name'] .= $char;\r
1412 }\r
1413\r
1414 $this->token['error'] = ($this->token['name'] === 'HTML')\r
1415 ? false\r
1416 : true;\r
1417 }\r
1418\r
1419 private function afterDoctypeNameState()\r
1420 {\r
1421 /* Consume the next input character: */\r
1422 $this->char++;\r
1423 $char = $this->char();\r
1424\r
1425 if (preg_match('/^[\t\n\x0b\x0c ]$/', $char)) {\r
1426 // Stay in the DOCTYPE name state.\r
1427\r
1428 } elseif ($char === '>') {\r
1429 $this->emitToken($this->token);\r
1430 $this->state = 'data';\r
1431\r
1432 } elseif ($this->char === $this->EOF) {\r
1433 $this->emitToken($this->token);\r
1434 $this->char--;\r
1435 $this->state = 'data';\r
1436\r
1437 } else {\r
1438 $this->token['error'] = true;\r
1439 $this->state = 'bogusDoctype';\r
1440 }\r
1441 }\r
1442\r
1443 private function bogusDoctypeState()\r
1444 {\r
1445 /* Consume the next input character: */\r
1446 $this->char++;\r
1447 $char = $this->char();\r
1448\r
1449 if ($char === '>') {\r
1450 $this->emitToken($this->token);\r
1451 $this->state = 'data';\r
1452\r
1453 } elseif ($this->char === $this->EOF) {\r
1454 $this->emitToken($this->token);\r
1455 $this->char--;\r
1456 $this->state = 'data';\r
1457\r
1458 } else {\r
1459 // Stay in the bogus DOCTYPE state.\r
1460 }\r
1461 }\r
1462\r
1463 private function entity()\r
1464 {\r
1465 $start = $this->char;\r
1466\r
1467 // This section defines how to consume an entity. This definition is\r
1468 // used when parsing entities in text and in attributes.\r
1469\r
1470 // The behaviour depends on the identity of the next character (the\r
1471 // one immediately after the U+0026 AMPERSAND character):\r
1472\r
1473 switch ($this->character($this->char + 1)) {\r
1474 // U+0023 NUMBER SIGN (#)\r
1475 case '#':\r
1476\r
1477 // The behaviour further depends on the character after the\r
1478 // U+0023 NUMBER SIGN:\r
1479 switch ($this->character($this->char + 1)) {\r
1480 // U+0078 LATIN SMALL LETTER X\r
1481 // U+0058 LATIN CAPITAL LETTER X\r
1482 case 'x':\r
1483 case 'X':\r
1484 // Follow the steps below, but using the range of\r
1485 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT\r
1486 // NINE, U+0061 LATIN SMALL LETTER A through to U+0066\r
1487 // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER\r
1488 // A, through to U+0046 LATIN CAPITAL LETTER F (in other\r
1489 // words, 0-9, A-F, a-f).\r
1490 $char = 1;\r
1491 $char_class = '0-9A-Fa-f';\r
1492 break;\r
1493\r
1494 // Anything else\r
1495 default:\r
1496 // Follow the steps below, but using the range of\r
1497 // characters U+0030 DIGIT ZERO through to U+0039 DIGIT\r
1498 // NINE (i.e. just 0-9).\r
1499 $char = 0;\r
1500 $char_class = '0-9';\r
1501 break;\r
1502 }\r
1503\r
1504 // Consume as many characters as match the range of characters\r
1505 // given above.\r
1506 $this->char++;\r
1507 $e_name = $this->characters($char_class, $this->char + $char + 1);\r
1508 $entity = $this->character($start, $this->char);\r
1509 $cond = strlen($e_name) > 0;\r
1510\r
1511 // The rest of the parsing happens bellow.\r
1512 break;\r
1513\r
1514 // Anything else\r
1515 default:\r
1516 // Consume the maximum number of characters possible, with the\r
1517 // consumed characters case-sensitively matching one of the\r
1518 // identifiers in the first column of the entities table.\r
1519 $e_name = $this->characters('0-9A-Za-z;', $this->char + 1);\r
1520 $len = strlen($e_name);\r
1521\r
1522 for ($c = 1; $c <= $len; $c++) {\r
1523 $id = substr($e_name, 0, $c);\r
1524 $this->char++;\r
1525\r
1526 if (in_array($id, $this->entities)) {\r
1527 if ($e_name[$c - 1] !== ';') {\r
1528 if ($c < $len && $e_name[$c] == ';') {\r
1529 $this->char++; // consume extra semicolon\r
1530 }\r
1531 }\r
1532 $entity = $id;\r
1533 break;\r
1534 }\r
1535 }\r
1536\r
1537 $cond = isset($entity);\r
1538 // The rest of the parsing happens bellow.\r
1539 break;\r
1540 }\r
1541\r
1542 if (!$cond) {\r
1543 // If no match can be made, then this is a parse error. No\r
1544 // characters are consumed, and nothing is returned.\r
1545 $this->char = $start;\r
1546 return false;\r
1547 }\r
1548\r
1549 // Return a character token for the character corresponding to the\r
1550 // entity name (as given by the second column of the entities table).\r
1551 return html_entity_decode('&' . $entity . ';', ENT_QUOTES, 'UTF-8');\r
1552 }\r
1553\r
1554 private function emitToken($token)\r
1555 {\r
1556 $emit = $this->tree->emitToken($token);\r
1557\r
1558 if (is_int($emit)) {\r
1559 $this->content_model = $emit;\r
1560\r
1561 } elseif ($token['type'] === self::ENDTAG) {\r
1562 $this->content_model = self::PCDATA;\r
1563 }\r
1564 }\r
1565\r
1566 private function EOF()\r
1567 {\r
1568 $this->state = null;\r
1569 $this->tree->emitToken(\r
1570 array(\r
1571 'type' => self::EOF\r
1572 )\r
1573 );\r
1574 }\r
1575}\r
1576\r
1577class HTML5TreeConstructer\r
1578{\r
1579 public $stack = array();\r
1580\r
1581 private $phase;\r
1582 private $mode;\r
1583 private $dom;\r
1584 private $foster_parent = null;\r
1585 private $a_formatting = array();\r
1586\r
1587 private $head_pointer = null;\r
1588 private $form_pointer = null;\r
1589\r
1590 private $scoping = array('button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th');\r
1591 private $formatting = array(\r
1592 'a',\r
1593 'b',\r
1594 'big',\r
1595 'em',\r
1596 'font',\r
1597 'i',\r
1598 'nobr',\r
1599 's',\r
1600 'small',\r
1601 'strike',\r
1602 'strong',\r
1603 'tt',\r
1604 'u'\r
1605 );\r
1606 private $special = array(\r
1607 'address',\r
1608 'area',\r
1609 'base',\r
1610 'basefont',\r
1611 'bgsound',\r
1612 'blockquote',\r
1613 'body',\r
1614 'br',\r
1615 'center',\r
1616 'col',\r
1617 'colgroup',\r
1618 'dd',\r
1619 'dir',\r
1620 'div',\r
1621 'dl',\r
1622 'dt',\r
1623 'embed',\r
1624 'fieldset',\r
1625 'form',\r
1626 'frame',\r
1627 'frameset',\r
1628 'h1',\r
1629 'h2',\r
1630 'h3',\r
1631 'h4',\r
1632 'h5',\r
1633 'h6',\r
1634 'head',\r
1635 'hr',\r
1636 'iframe',\r
1637 'image',\r
1638 'img',\r
1639 'input',\r
1640 'isindex',\r
1641 'li',\r
1642 'link',\r
1643 'listing',\r
1644 'menu',\r
1645 'meta',\r
1646 'noembed',\r
1647 'noframes',\r
1648 'noscript',\r
1649 'ol',\r
1650 'optgroup',\r
1651 'option',\r
1652 'p',\r
1653 'param',\r
1654 'plaintext',\r
1655 'pre',\r
1656 'script',\r
1657 'select',\r
1658 'spacer',\r
1659 'style',\r
1660 'tbody',\r
1661 'textarea',\r
1662 'tfoot',\r
1663 'thead',\r
1664 'title',\r
1665 'tr',\r
1666 'ul',\r
1667 'wbr'\r
1668 );\r
1669\r
1670 // The different phases.\r
1671 const INIT_PHASE = 0;\r
1672 const ROOT_PHASE = 1;\r
1673 const MAIN_PHASE = 2;\r
1674 const END_PHASE = 3;\r
1675\r
1676 // The different insertion modes for the main phase.\r
1677 const BEFOR_HEAD = 0;\r
1678 const IN_HEAD = 1;\r
1679 const AFTER_HEAD = 2;\r
1680 const IN_BODY = 3;\r
1681 const IN_TABLE = 4;\r
1682 const IN_CAPTION = 5;\r
1683 const IN_CGROUP = 6;\r
1684 const IN_TBODY = 7;\r
1685 const IN_ROW = 8;\r
1686 const IN_CELL = 9;\r
1687 const IN_SELECT = 10;\r
1688 const AFTER_BODY = 11;\r
1689 const IN_FRAME = 12;\r
1690 const AFTR_FRAME = 13;\r
1691\r
1692 // The different types of elements.\r
1693 const SPECIAL = 0;\r
1694 const SCOPING = 1;\r
1695 const FORMATTING = 2;\r
1696 const PHRASING = 3;\r
1697\r
1698 const MARKER = 0;\r
1699\r
1700 public function __construct()\r
1701 {\r
1702 $this->phase = self::INIT_PHASE;\r
1703 $this->mode = self::BEFOR_HEAD;\r
1704 $this->dom = new DOMDocument;\r
1705\r
1706 $this->dom->encoding = 'UTF-8';\r
1707 $this->dom->preserveWhiteSpace = true;\r
1708 $this->dom->substituteEntities = true;\r
1709 $this->dom->strictErrorChecking = false;\r
1710 }\r
1711\r
1712 // Process tag tokens\r
1713 public function emitToken($token)\r
1714 {\r
1715 switch ($this->phase) {\r
1716 case self::INIT_PHASE:\r
1717 return $this->initPhase($token);\r
1718 break;\r
1719 case self::ROOT_PHASE:\r
1720 return $this->rootElementPhase($token);\r
1721 break;\r
1722 case self::MAIN_PHASE:\r
1723 return $this->mainPhase($token);\r
1724 break;\r
1725 case self::END_PHASE :\r
1726 return $this->trailingEndPhase($token);\r
1727 break;\r
1728 }\r
1729 }\r
1730\r
1731 private function initPhase($token)\r
1732 {\r
1733 /* Initially, the tree construction stage must handle each token\r
1734 emitted from the tokenisation stage as follows: */\r
1735\r
1736 /* A DOCTYPE token that is marked as being in error\r
1737 A comment token\r
1738 A start tag token\r
1739 An end tag token\r
1740 A character token that is not one of one of U+0009 CHARACTER TABULATION,\r
1741 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
1742 or U+0020 SPACE\r
1743 An end-of-file token */\r
1744 if ((isset($token['error']) && $token['error']) ||\r
1745 $token['type'] === HTML5::COMMENT ||\r
1746 $token['type'] === HTML5::STARTTAG ||\r
1747 $token['type'] === HTML5::ENDTAG ||\r
1748 $token['type'] === HTML5::EOF ||\r
1749 ($token['type'] === HTML5::CHARACTR && isset($token['data']) &&\r
1750 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))\r
1751 ) {\r
1752 /* This specification does not define how to handle this case. In\r
1753 particular, user agents may ignore the entirety of this specification\r
1754 altogether for such documents, and instead invoke special parse modes\r
1755 with a greater emphasis on backwards compatibility. */\r
1756\r
1757 $this->phase = self::ROOT_PHASE;\r
1758 return $this->rootElementPhase($token);\r
1759\r
1760 /* A DOCTYPE token marked as being correct */\r
1761 } elseif (isset($token['error']) && !$token['error']) {\r
1762 /* Append a DocumentType node to the Document node, with the name\r
1763 attribute set to the name given in the DOCTYPE token (which will be\r
1764 "HTML"), and the other attributes specific to DocumentType objects\r
1765 set to null, empty lists, or the empty string as appropriate. */\r
1766 $doctype = new DOMDocumentType(null, null, 'HTML');\r
1767\r
1768 /* Then, switch to the root element phase of the tree construction\r
1769 stage. */\r
1770 $this->phase = self::ROOT_PHASE;\r
1771\r
1772 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
1773 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
1774 or U+0020 SPACE */\r
1775 } elseif (isset($token['data']) && preg_match(\r
1776 '/^[\t\n\x0b\x0c ]+$/',\r
1777 $token['data']\r
1778 )\r
1779 ) {\r
1780 /* Append that character to the Document node. */\r
1781 $text = $this->dom->createTextNode($token['data']);\r
1782 $this->dom->appendChild($text);\r
1783 }\r
1784 }\r
1785\r
1786 private function rootElementPhase($token)\r
1787 {\r
1788 /* After the initial phase, as each token is emitted from the tokenisation\r
1789 stage, it must be processed as described in this section. */\r
1790\r
1791 /* A DOCTYPE token */\r
1792 if ($token['type'] === HTML5::DOCTYPE) {\r
1793 // Parse error. Ignore the token.\r
1794\r
1795 /* A comment token */\r
1796 } elseif ($token['type'] === HTML5::COMMENT) {\r
1797 /* Append a Comment node to the Document object with the data\r
1798 attribute set to the data given in the comment token. */\r
1799 $comment = $this->dom->createComment($token['data']);\r
1800 $this->dom->appendChild($comment);\r
1801\r
1802 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
1803 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
1804 or U+0020 SPACE */\r
1805 } elseif ($token['type'] === HTML5::CHARACTR &&\r
1806 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
1807 ) {\r
1808 /* Append that character to the Document node. */\r
1809 $text = $this->dom->createTextNode($token['data']);\r
1810 $this->dom->appendChild($text);\r
1811\r
1812 /* A character token that is not one of U+0009 CHARACTER TABULATION,\r
1813 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED\r
1814 (FF), or U+0020 SPACE\r
1815 A start tag token\r
1816 An end tag token\r
1817 An end-of-file token */\r
1818 } elseif (($token['type'] === HTML5::CHARACTR &&\r
1819 !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||\r
1820 $token['type'] === HTML5::STARTTAG ||\r
1821 $token['type'] === HTML5::ENDTAG ||\r
1822 $token['type'] === HTML5::EOF\r
1823 ) {\r
1824 /* Create an HTMLElement node with the tag name html, in the HTML\r
1825 namespace. Append it to the Document object. Switch to the main\r
1826 phase and reprocess the current token. */\r
1827 $html = $this->dom->createElement('html');\r
1828 $this->dom->appendChild($html);\r
1829 $this->stack[] = $html;\r
1830\r
1831 $this->phase = self::MAIN_PHASE;\r
1832 return $this->mainPhase($token);\r
1833 }\r
1834 }\r
1835\r
1836 private function mainPhase($token)\r
1837 {\r
1838 /* Tokens in the main phase must be handled as follows: */\r
1839\r
1840 /* A DOCTYPE token */\r
1841 if ($token['type'] === HTML5::DOCTYPE) {\r
1842 // Parse error. Ignore the token.\r
1843\r
1844 /* A start tag token with the tag name "html" */\r
1845 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') {\r
1846 /* If this start tag token was not the first start tag token, then\r
1847 it is a parse error. */\r
1848\r
1849 /* For each attribute on the token, check to see if the attribute\r
1850 is already present on the top element of the stack of open elements.\r
1851 If it is not, add the attribute and its corresponding value to that\r
1852 element. */\r
1853 foreach ($token['attr'] as $attr) {\r
1854 if (!$this->stack[0]->hasAttribute($attr['name'])) {\r
1855 $this->stack[0]->setAttribute($attr['name'], $attr['value']);\r
1856 }\r
1857 }\r
1858\r
1859 /* An end-of-file token */\r
1860 } elseif ($token['type'] === HTML5::EOF) {\r
1861 /* Generate implied end tags. */\r
1862 $this->generateImpliedEndTags();\r
1863\r
1864 /* Anything else. */\r
1865 } else {\r
1866 /* Depends on the insertion mode: */\r
1867 switch ($this->mode) {\r
1868 case self::BEFOR_HEAD:\r
1869 return $this->beforeHead($token);\r
1870 break;\r
1871 case self::IN_HEAD:\r
1872 return $this->inHead($token);\r
1873 break;\r
1874 case self::AFTER_HEAD:\r
1875 return $this->afterHead($token);\r
1876 break;\r
1877 case self::IN_BODY:\r
1878 return $this->inBody($token);\r
1879 break;\r
1880 case self::IN_TABLE:\r
1881 return $this->inTable($token);\r
1882 break;\r
1883 case self::IN_CAPTION:\r
1884 return $this->inCaption($token);\r
1885 break;\r
1886 case self::IN_CGROUP:\r
1887 return $this->inColumnGroup($token);\r
1888 break;\r
1889 case self::IN_TBODY:\r
1890 return $this->inTableBody($token);\r
1891 break;\r
1892 case self::IN_ROW:\r
1893 return $this->inRow($token);\r
1894 break;\r
1895 case self::IN_CELL:\r
1896 return $this->inCell($token);\r
1897 break;\r
1898 case self::IN_SELECT:\r
1899 return $this->inSelect($token);\r
1900 break;\r
1901 case self::AFTER_BODY:\r
1902 return $this->afterBody($token);\r
1903 break;\r
1904 case self::IN_FRAME:\r
1905 return $this->inFrameset($token);\r
1906 break;\r
1907 case self::AFTR_FRAME:\r
1908 return $this->afterFrameset($token);\r
1909 break;\r
1910 case self::END_PHASE:\r
1911 return $this->trailingEndPhase($token);\r
1912 break;\r
1913 }\r
1914 }\r
1915 }\r
1916\r
1917 private function beforeHead($token)\r
1918 {\r
1919 /* Handle the token as follows: */\r
1920\r
1921 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
1922 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
1923 or U+0020 SPACE */\r
1924 if ($token['type'] === HTML5::CHARACTR &&\r
1925 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
1926 ) {\r
1927 /* Append the character to the current node. */\r
1928 $this->insertText($token['data']);\r
1929\r
1930 /* A comment token */\r
1931 } elseif ($token['type'] === HTML5::COMMENT) {\r
1932 /* Append a Comment node to the current node with the data attribute\r
1933 set to the data given in the comment token. */\r
1934 $this->insertComment($token['data']);\r
1935\r
1936 /* A start tag token with the tag name "head" */\r
1937 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') {\r
1938 /* Create an element for the token, append the new element to the\r
1939 current node and push it onto the stack of open elements. */\r
1940 $element = $this->insertElement($token);\r
1941\r
1942 /* Set the head element pointer to this new element node. */\r
1943 $this->head_pointer = $element;\r
1944\r
1945 /* Change the insertion mode to "in head". */\r
1946 $this->mode = self::IN_HEAD;\r
1947\r
1948 /* A start tag token whose tag name is one of: "base", "link", "meta",\r
1949 "script", "style", "title". Or an end tag with the tag name "html".\r
1950 Or a character token that is not one of U+0009 CHARACTER TABULATION,\r
1951 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
1952 or U+0020 SPACE. Or any other start tag token */\r
1953 } elseif ($token['type'] === HTML5::STARTTAG ||\r
1954 ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') ||\r
1955 ($token['type'] === HTML5::CHARACTR && !preg_match(\r
1956 '/^[\t\n\x0b\x0c ]$/',\r
1957 $token['data']\r
1958 ))\r
1959 ) {\r
1960 /* Act as if a start tag token with the tag name "head" and no\r
1961 attributes had been seen, then reprocess the current token. */\r
1962 $this->beforeHead(\r
1963 array(\r
1964 'name' => 'head',\r
1965 'type' => HTML5::STARTTAG,\r
1966 'attr' => array()\r
1967 )\r
1968 );\r
1969\r
1970 return $this->inHead($token);\r
1971\r
1972 /* Any other end tag */\r
1973 } elseif ($token['type'] === HTML5::ENDTAG) {\r
1974 /* Parse error. Ignore the token. */\r
1975 }\r
1976 }\r
1977\r
1978 private function inHead($token)\r
1979 {\r
1980 /* Handle the token as follows: */\r
1981\r
1982 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
1983 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
1984 or U+0020 SPACE.\r
1985\r
1986 THIS DIFFERS FROM THE SPEC: If the current node is either a title, style\r
1987 or script element, append the character to the current node regardless\r
1988 of its content. */\r
1989 if (($token['type'] === HTML5::CHARACTR &&\r
1990 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || (\r
1991 $token['type'] === HTML5::CHARACTR && in_array(\r
1992 end($this->stack)->nodeName,\r
1993 array('title', 'style', 'script')\r
1994 ))\r
1995 ) {\r
1996 /* Append the character to the current node. */\r
1997 $this->insertText($token['data']);\r
1998\r
1999 /* A comment token */\r
2000 } elseif ($token['type'] === HTML5::COMMENT) {\r
2001 /* Append a Comment node to the current node with the data attribute\r
2002 set to the data given in the comment token. */\r
2003 $this->insertComment($token['data']);\r
2004\r
2005 } elseif ($token['type'] === HTML5::ENDTAG &&\r
2006 in_array($token['name'], array('title', 'style', 'script'))\r
2007 ) {\r
2008 array_pop($this->stack);\r
2009 return HTML5::PCDATA;\r
2010\r
2011 /* A start tag with the tag name "title" */\r
2012 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') {\r
2013 /* Create an element for the token and append the new element to the\r
2014 node pointed to by the head element pointer, or, if that is null\r
2015 (innerHTML case), to the current node. */\r
2016 if ($this->head_pointer !== null) {\r
2017 $element = $this->insertElement($token, false);\r
2018 $this->head_pointer->appendChild($element);\r
2019\r
2020 } else {\r
2021 $element = $this->insertElement($token);\r
2022 }\r
2023\r
2024 /* Switch the tokeniser's content model flag to the RCDATA state. */\r
2025 return HTML5::RCDATA;\r
2026\r
2027 /* A start tag with the tag name "style" */\r
2028 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') {\r
2029 /* Create an element for the token and append the new element to the\r
2030 node pointed to by the head element pointer, or, if that is null\r
2031 (innerHTML case), to the current node. */\r
2032 if ($this->head_pointer !== null) {\r
2033 $element = $this->insertElement($token, false);\r
2034 $this->head_pointer->appendChild($element);\r
2035\r
2036 } else {\r
2037 $this->insertElement($token);\r
2038 }\r
2039\r
2040 /* Switch the tokeniser's content model flag to the CDATA state. */\r
2041 return HTML5::CDATA;\r
2042\r
2043 /* A start tag with the tag name "script" */\r
2044 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') {\r
2045 /* Create an element for the token. */\r
2046 $element = $this->insertElement($token, false);\r
2047 $this->head_pointer->appendChild($element);\r
2048\r
2049 /* Switch the tokeniser's content model flag to the CDATA state. */\r
2050 return HTML5::CDATA;\r
2051\r
2052 /* A start tag with the tag name "base", "link", or "meta" */\r
2053 } elseif ($token['type'] === HTML5::STARTTAG && in_array(\r
2054 $token['name'],\r
2055 array('base', 'link', 'meta')\r
2056 )\r
2057 ) {\r
2058 /* Create an element for the token and append the new element to the\r
2059 node pointed to by the head element pointer, or, if that is null\r
2060 (innerHTML case), to the current node. */\r
2061 if ($this->head_pointer !== null) {\r
2062 $element = $this->insertElement($token, false);\r
2063 $this->head_pointer->appendChild($element);\r
2064 array_pop($this->stack);\r
2065\r
2066 } else {\r
2067 $this->insertElement($token);\r
2068 }\r
2069\r
2070 /* An end tag with the tag name "head" */\r
2071 } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') {\r
2072 /* If the current node is a head element, pop the current node off\r
2073 the stack of open elements. */\r
2074 if ($this->head_pointer->isSameNode(end($this->stack))) {\r
2075 array_pop($this->stack);\r
2076\r
2077 /* Otherwise, this is a parse error. */\r
2078 } else {\r
2079 // k\r
2080 }\r
2081\r
2082 /* Change the insertion mode to "after head". */\r
2083 $this->mode = self::AFTER_HEAD;\r
2084\r
2085 /* A start tag with the tag name "head" or an end tag except "html". */\r
2086 } elseif (($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') ||\r
2087 ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')\r
2088 ) {\r
2089 // Parse error. Ignore the token.\r
2090\r
2091 /* Anything else */\r
2092 } else {\r
2093 /* If the current node is a head element, act as if an end tag\r
2094 token with the tag name "head" had been seen. */\r
2095 if ($this->head_pointer->isSameNode(end($this->stack))) {\r
2096 $this->inHead(\r
2097 array(\r
2098 'name' => 'head',\r
2099 'type' => HTML5::ENDTAG\r
2100 )\r
2101 );\r
2102\r
2103 /* Otherwise, change the insertion mode to "after head". */\r
2104 } else {\r
2105 $this->mode = self::AFTER_HEAD;\r
2106 }\r
2107\r
2108 /* Then, reprocess the current token. */\r
2109 return $this->afterHead($token);\r
2110 }\r
2111 }\r
2112\r
2113 private function afterHead($token)\r
2114 {\r
2115 /* Handle the token as follows: */\r
2116\r
2117 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
2118 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
2119 or U+0020 SPACE */\r
2120 if ($token['type'] === HTML5::CHARACTR &&\r
2121 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
2122 ) {\r
2123 /* Append the character to the current node. */\r
2124 $this->insertText($token['data']);\r
2125\r
2126 /* A comment token */\r
2127 } elseif ($token['type'] === HTML5::COMMENT) {\r
2128 /* Append a Comment node to the current node with the data attribute\r
2129 set to the data given in the comment token. */\r
2130 $this->insertComment($token['data']);\r
2131\r
2132 /* A start tag token with the tag name "body" */\r
2133 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') {\r
2134 /* Insert a body element for the token. */\r
2135 $this->insertElement($token);\r
2136\r
2137 /* Change the insertion mode to "in body". */\r
2138 $this->mode = self::IN_BODY;\r
2139\r
2140 /* A start tag token with the tag name "frameset" */\r
2141 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') {\r
2142 /* Insert a frameset element for the token. */\r
2143 $this->insertElement($token);\r
2144\r
2145 /* Change the insertion mode to "in frameset". */\r
2146 $this->mode = self::IN_FRAME;\r
2147\r
2148 /* A start tag token whose tag name is one of: "base", "link", "meta",\r
2149 "script", "style", "title" */\r
2150 } elseif ($token['type'] === HTML5::STARTTAG && in_array(\r
2151 $token['name'],\r
2152 array('base', 'link', 'meta', 'script', 'style', 'title')\r
2153 )\r
2154 ) {\r
2155 /* Parse error. Switch the insertion mode back to "in head" and\r
2156 reprocess the token. */\r
2157 $this->mode = self::IN_HEAD;\r
2158 return $this->inHead($token);\r
2159\r
2160 /* Anything else */\r
2161 } else {\r
2162 /* Act as if a start tag token with the tag name "body" and no\r
2163 attributes had been seen, and then reprocess the current token. */\r
2164 $this->afterHead(\r
2165 array(\r
2166 'name' => 'body',\r
2167 'type' => HTML5::STARTTAG,\r
2168 'attr' => array()\r
2169 )\r
2170 );\r
2171\r
2172 return $this->inBody($token);\r
2173 }\r
2174 }\r
2175\r
2176 private function inBody($token)\r
2177 {\r
2178 /* Handle the token as follows: */\r
2179\r
2180 switch ($token['type']) {\r
2181 /* A character token */\r
2182 case HTML5::CHARACTR:\r
2183 /* Reconstruct the active formatting elements, if any. */\r
2184 $this->reconstructActiveFormattingElements();\r
2185\r
2186 /* Append the token's character to the current node. */\r
2187 $this->insertText($token['data']);\r
2188 break;\r
2189\r
2190 /* A comment token */\r
2191 case HTML5::COMMENT:\r
2192 /* Append a Comment node to the current node with the data\r
2193 attribute set to the data given in the comment token. */\r
2194 $this->insertComment($token['data']);\r
2195 break;\r
2196\r
2197 case HTML5::STARTTAG:\r
2198 switch ($token['name']) {\r
2199 /* A start tag token whose tag name is one of: "script",\r
2200 "style" */\r
2201 case 'script':\r
2202 case 'style':\r
2203 /* Process the token as if the insertion mode had been "in\r
2204 head". */\r
2205 return $this->inHead($token);\r
2206 break;\r
2207\r
2208 /* A start tag token whose tag name is one of: "base", "link",\r
2209 "meta", "title" */\r
2210 case 'base':\r
2211 case 'link':\r
2212 case 'meta':\r
2213 case 'title':\r
2214 /* Parse error. Process the token as if the insertion mode\r
2215 had been "in head". */\r
2216 return $this->inHead($token);\r
2217 break;\r
2218\r
2219 /* A start tag token with the tag name "body" */\r
2220 case 'body':\r
2221 /* Parse error. If the second element on the stack of open\r
2222 elements is not a body element, or, if the stack of open\r
2223 elements has only one node on it, then ignore the token.\r
2224 (innerHTML case) */\r
2225 if (count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') {\r
2226 // Ignore\r
2227\r
2228 /* Otherwise, for each attribute on the token, check to see\r
2229 if the attribute is already present on the body element (the\r
2230 second element) on the stack of open elements. If it is not,\r
2231 add the attribute and its corresponding value to that\r
2232 element. */\r
2233 } else {\r
2234 foreach ($token['attr'] as $attr) {\r
2235 if (!$this->stack[1]->hasAttribute($attr['name'])) {\r
2236 $this->stack[1]->setAttribute($attr['name'], $attr['value']);\r
2237 }\r
2238 }\r
2239 }\r
2240 break;\r
2241\r
2242 /* A start tag whose tag name is one of: "address",\r
2243 "blockquote", "center", "dir", "div", "dl", "fieldset",\r
2244 "listing", "menu", "ol", "p", "ul" */\r
2245 case 'address':\r
2246 case 'blockquote':\r
2247 case 'center':\r
2248 case 'dir':\r
2249 case 'div':\r
2250 case 'dl':\r
2251 case 'fieldset':\r
2252 case 'listing':\r
2253 case 'menu':\r
2254 case 'ol':\r
2255 case 'p':\r
2256 case 'ul':\r
2257 /* If the stack of open elements has a p element in scope,\r
2258 then act as if an end tag with the tag name p had been\r
2259 seen. */\r
2260 if ($this->elementInScope('p')) {\r
2261 $this->emitToken(\r
2262 array(\r
2263 'name' => 'p',\r
2264 'type' => HTML5::ENDTAG\r
2265 )\r
2266 );\r
2267 }\r
2268\r
2269 /* Insert an HTML element for the token. */\r
2270 $this->insertElement($token);\r
2271 break;\r
2272\r
2273 /* A start tag whose tag name is "form" */\r
2274 case 'form':\r
2275 /* If the form element pointer is not null, ignore the\r
2276 token with a parse error. */\r
2277 if ($this->form_pointer !== null) {\r
2278 // Ignore.\r
2279\r
2280 /* Otherwise: */\r
2281 } else {\r
2282 /* If the stack of open elements has a p element in\r
2283 scope, then act as if an end tag with the tag name p\r
2284 had been seen. */\r
2285 if ($this->elementInScope('p')) {\r
2286 $this->emitToken(\r
2287 array(\r
2288 'name' => 'p',\r
2289 'type' => HTML5::ENDTAG\r
2290 )\r
2291 );\r
2292 }\r
2293\r
2294 /* Insert an HTML element for the token, and set the\r
2295 form element pointer to point to the element created. */\r
2296 $element = $this->insertElement($token);\r
2297 $this->form_pointer = $element;\r
2298 }\r
2299 break;\r
2300\r
2301 /* A start tag whose tag name is "li", "dd" or "dt" */\r
2302 case 'li':\r
2303 case 'dd':\r
2304 case 'dt':\r
2305 /* If the stack of open elements has a p element in scope,\r
2306 then act as if an end tag with the tag name p had been\r
2307 seen. */\r
2308 if ($this->elementInScope('p')) {\r
2309 $this->emitToken(\r
2310 array(\r
2311 'name' => 'p',\r
2312 'type' => HTML5::ENDTAG\r
2313 )\r
2314 );\r
2315 }\r
2316\r
2317 $stack_length = count($this->stack) - 1;\r
2318\r
2319 for ($n = $stack_length; 0 <= $n; $n--) {\r
2320 /* 1. Initialise node to be the current node (the\r
2321 bottommost node of the stack). */\r
2322 $stop = false;\r
2323 $node = $this->stack[$n];\r
2324 $cat = $this->getElementCategory($node->tagName);\r
2325\r
2326 /* 2. If node is an li, dd or dt element, then pop all\r
2327 the nodes from the current node up to node, including\r
2328 node, then stop this algorithm. */\r
2329 if ($token['name'] === $node->tagName || ($token['name'] !== 'li'\r
2330 && ($node->tagName === 'dd' || $node->tagName === 'dt'))\r
2331 ) {\r
2332 for ($x = $stack_length; $x >= $n; $x--) {\r
2333 array_pop($this->stack);\r
2334 }\r
2335\r
2336 break;\r
2337 }\r
2338\r
2339 /* 3. If node is not in the formatting category, and is\r
2340 not in the phrasing category, and is not an address or\r
2341 div element, then stop this algorithm. */\r
2342 if ($cat !== self::FORMATTING && $cat !== self::PHRASING &&\r
2343 $node->tagName !== 'address' && $node->tagName !== 'div'\r
2344 ) {\r
2345 break;\r
2346 }\r
2347 }\r
2348\r
2349 /* Finally, insert an HTML element with the same tag\r
2350 name as the token's. */\r
2351 $this->insertElement($token);\r
2352 break;\r
2353\r
2354 /* A start tag token whose tag name is "plaintext" */\r
2355 case 'plaintext':\r
2356 /* If the stack of open elements has a p element in scope,\r
2357 then act as if an end tag with the tag name p had been\r
2358 seen. */\r
2359 if ($this->elementInScope('p')) {\r
2360 $this->emitToken(\r
2361 array(\r
2362 'name' => 'p',\r
2363 'type' => HTML5::ENDTAG\r
2364 )\r
2365 );\r
2366 }\r
2367\r
2368 /* Insert an HTML element for the token. */\r
2369 $this->insertElement($token);\r
2370\r
2371 return HTML5::PLAINTEXT;\r
2372 break;\r
2373\r
2374 /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4",\r
2375 "h5", "h6" */\r
2376 case 'h1':\r
2377 case 'h2':\r
2378 case 'h3':\r
2379 case 'h4':\r
2380 case 'h5':\r
2381 case 'h6':\r
2382 /* If the stack of open elements has a p element in scope,\r
2383 then act as if an end tag with the tag name p had been seen. */\r
2384 if ($this->elementInScope('p')) {\r
2385 $this->emitToken(\r
2386 array(\r
2387 'name' => 'p',\r
2388 'type' => HTML5::ENDTAG\r
2389 )\r
2390 );\r
2391 }\r
2392\r
2393 /* If the stack of open elements has in scope an element whose\r
2394 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then\r
2395 this is a parse error; pop elements from the stack until an\r
2396 element with one of those tag names has been popped from the\r
2397 stack. */\r
2398 while ($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) {\r
2399 array_pop($this->stack);\r
2400 }\r
2401\r
2402 /* Insert an HTML element for the token. */\r
2403 $this->insertElement($token);\r
2404 break;\r
2405\r
2406 /* A start tag whose tag name is "a" */\r
2407 case 'a':\r
2408 /* If the list of active formatting elements contains\r
2409 an element whose tag name is "a" between the end of the\r
2410 list and the last marker on the list (or the start of\r
2411 the list if there is no marker on the list), then this\r
2412 is a parse error; act as if an end tag with the tag name\r
2413 "a" had been seen, then remove that element from the list\r
2414 of active formatting elements and the stack of open\r
2415 elements if the end tag didn't already remove it (it\r
2416 might not have if the element is not in table scope). */\r
2417 $leng = count($this->a_formatting);\r
2418\r
2419 for ($n = $leng - 1; $n >= 0; $n--) {\r
2420 if ($this->a_formatting[$n] === self::MARKER) {\r
2421 break;\r
2422\r
2423 } elseif ($this->a_formatting[$n]->nodeName === 'a') {\r
2424 $this->emitToken(\r
2425 array(\r
2426 'name' => 'a',\r
2427 'type' => HTML5::ENDTAG\r
2428 )\r
2429 );\r
2430 break;\r
2431 }\r
2432 }\r
2433\r
2434 /* Reconstruct the active formatting elements, if any. */\r
2435 $this->reconstructActiveFormattingElements();\r
2436\r
2437 /* Insert an HTML element for the token. */\r
2438 $el = $this->insertElement($token);\r
2439\r
2440 /* Add that element to the list of active formatting\r
2441 elements. */\r
2442 $this->a_formatting[] = $el;\r
2443 break;\r
2444\r
2445 /* A start tag whose tag name is one of: "b", "big", "em", "font",\r
2446 "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */\r
2447 case 'b':\r
2448 case 'big':\r
2449 case 'em':\r
2450 case 'font':\r
2451 case 'i':\r
2452 case 'nobr':\r
2453 case 's':\r
2454 case 'small':\r
2455 case 'strike':\r
2456 case 'strong':\r
2457 case 'tt':\r
2458 case 'u':\r
2459 /* Reconstruct the active formatting elements, if any. */\r
2460 $this->reconstructActiveFormattingElements();\r
2461\r
2462 /* Insert an HTML element for the token. */\r
2463 $el = $this->insertElement($token);\r
2464\r
2465 /* Add that element to the list of active formatting\r
2466 elements. */\r
2467 $this->a_formatting[] = $el;\r
2468 break;\r
2469\r
2470 /* A start tag token whose tag name is "button" */\r
2471 case 'button':\r
2472 /* If the stack of open elements has a button element in scope,\r
2473 then this is a parse error; act as if an end tag with the tag\r
2474 name "button" had been seen, then reprocess the token. (We don't\r
2475 do that. Unnecessary.) */\r
2476 if ($this->elementInScope('button')) {\r
2477 $this->inBody(\r
2478 array(\r
2479 'name' => 'button',\r
2480 'type' => HTML5::ENDTAG\r
2481 )\r
2482 );\r
2483 }\r
2484\r
2485 /* Reconstruct the active formatting elements, if any. */\r
2486 $this->reconstructActiveFormattingElements();\r
2487\r
2488 /* Insert an HTML element for the token. */\r
2489 $this->insertElement($token);\r
2490\r
2491 /* Insert a marker at the end of the list of active\r
2492 formatting elements. */\r
2493 $this->a_formatting[] = self::MARKER;\r
2494 break;\r
2495\r
2496 /* A start tag token whose tag name is one of: "marquee", "object" */\r
2497 case 'marquee':\r
2498 case 'object':\r
2499 /* Reconstruct the active formatting elements, if any. */\r
2500 $this->reconstructActiveFormattingElements();\r
2501\r
2502 /* Insert an HTML element for the token. */\r
2503 $this->insertElement($token);\r
2504\r
2505 /* Insert a marker at the end of the list of active\r
2506 formatting elements. */\r
2507 $this->a_formatting[] = self::MARKER;\r
2508 break;\r
2509\r
2510 /* A start tag token whose tag name is "xmp" */\r
2511 case 'xmp':\r
2512 /* Reconstruct the active formatting elements, if any. */\r
2513 $this->reconstructActiveFormattingElements();\r
2514\r
2515 /* Insert an HTML element for the token. */\r
2516 $this->insertElement($token);\r
2517\r
2518 /* Switch the content model flag to the CDATA state. */\r
2519 return HTML5::CDATA;\r
2520 break;\r
2521\r
2522 /* A start tag whose tag name is "table" */\r
2523 case 'table':\r
2524 /* If the stack of open elements has a p element in scope,\r
2525 then act as if an end tag with the tag name p had been seen. */\r
2526 if ($this->elementInScope('p')) {\r
2527 $this->emitToken(\r
2528 array(\r
2529 'name' => 'p',\r
2530 'type' => HTML5::ENDTAG\r
2531 )\r
2532 );\r
2533 }\r
2534\r
2535 /* Insert an HTML element for the token. */\r
2536 $this->insertElement($token);\r
2537\r
2538 /* Change the insertion mode to "in table". */\r
2539 $this->mode = self::IN_TABLE;\r
2540 break;\r
2541\r
2542 /* A start tag whose tag name is one of: "area", "basefont",\r
2543 "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */\r
2544 case 'area':\r
2545 case 'basefont':\r
2546 case 'bgsound':\r
2547 case 'br':\r
2548 case 'embed':\r
2549 case 'img':\r
2550 case 'param':\r
2551 case 'spacer':\r
2552 case 'wbr':\r
2553 /* Reconstruct the active formatting elements, if any. */\r
2554 $this->reconstructActiveFormattingElements();\r
2555\r
2556 /* Insert an HTML element for the token. */\r
2557 $this->insertElement($token);\r
2558\r
2559 /* Immediately pop the current node off the stack of open elements. */\r
2560 array_pop($this->stack);\r
2561 break;\r
2562\r
2563 /* A start tag whose tag name is "hr" */\r
2564 case 'hr':\r
2565 /* If the stack of open elements has a p element in scope,\r
2566 then act as if an end tag with the tag name p had been seen. */\r
2567 if ($this->elementInScope('p')) {\r
2568 $this->emitToken(\r
2569 array(\r
2570 'name' => 'p',\r
2571 'type' => HTML5::ENDTAG\r
2572 )\r
2573 );\r
2574 }\r
2575\r
2576 /* Insert an HTML element for the token. */\r
2577 $this->insertElement($token);\r
2578\r
2579 /* Immediately pop the current node off the stack of open elements. */\r
2580 array_pop($this->stack);\r
2581 break;\r
2582\r
2583 /* A start tag whose tag name is "image" */\r
2584 case 'image':\r
2585 /* Parse error. Change the token's tag name to "img" and\r
2586 reprocess it. (Don't ask.) */\r
2587 $token['name'] = 'img';\r
2588 return $this->inBody($token);\r
2589 break;\r
2590\r
2591 /* A start tag whose tag name is "input" */\r
2592 case 'input':\r
2593 /* Reconstruct the active formatting elements, if any. */\r
2594 $this->reconstructActiveFormattingElements();\r
2595\r
2596 /* Insert an input element for the token. */\r
2597 $element = $this->insertElement($token, false);\r
2598\r
2599 /* If the form element pointer is not null, then associate the\r
2600 input element with the form element pointed to by the form\r
2601 element pointer. */\r
2602 $this->form_pointer !== null\r
2603 ? $this->form_pointer->appendChild($element)\r
2604 : end($this->stack)->appendChild($element);\r
2605\r
2606 /* Pop that input element off the stack of open elements. */\r
2607 array_pop($this->stack);\r
2608 break;\r
2609\r
2610 /* A start tag whose tag name is "isindex" */\r
2611 case 'isindex':\r
2612 /* Parse error. */\r
2613 // w/e\r
2614\r
2615 /* If the form element pointer is not null,\r
2616 then ignore the token. */\r
2617 if ($this->form_pointer === null) {\r
2618 /* Act as if a start tag token with the tag name "form" had\r
2619 been seen. */\r
2620 $this->inBody(\r
2621 array(\r
2622 'name' => 'body',\r
2623 'type' => HTML5::STARTTAG,\r
2624 'attr' => array()\r
2625 )\r
2626 );\r
2627\r
2628 /* Act as if a start tag token with the tag name "hr" had\r
2629 been seen. */\r
2630 $this->inBody(\r
2631 array(\r
2632 'name' => 'hr',\r
2633 'type' => HTML5::STARTTAG,\r
2634 'attr' => array()\r
2635 )\r
2636 );\r
2637\r
2638 /* Act as if a start tag token with the tag name "p" had\r
2639 been seen. */\r
2640 $this->inBody(\r
2641 array(\r
2642 'name' => 'p',\r
2643 'type' => HTML5::STARTTAG,\r
2644 'attr' => array()\r
2645 )\r
2646 );\r
2647\r
2648 /* Act as if a start tag token with the tag name "label"\r
2649 had been seen. */\r
2650 $this->inBody(\r
2651 array(\r
2652 'name' => 'label',\r
2653 'type' => HTML5::STARTTAG,\r
2654 'attr' => array()\r
2655 )\r
2656 );\r
2657\r
2658 /* Act as if a stream of character tokens had been seen. */\r
2659 $this->insertText(\r
2660 'This is a searchable index. ' .\r
2661 'Insert your search keywords here: '\r
2662 );\r
2663\r
2664 /* Act as if a start tag token with the tag name "input"\r
2665 had been seen, with all the attributes from the "isindex"\r
2666 token, except with the "name" attribute set to the value\r
2667 "isindex" (ignoring any explicit "name" attribute). */\r
2668 $attr = $token['attr'];\r
2669 $attr[] = array('name' => 'name', 'value' => 'isindex');\r
2670\r
2671 $this->inBody(\r
2672 array(\r
2673 'name' => 'input',\r
2674 'type' => HTML5::STARTTAG,\r
2675 'attr' => $attr\r
2676 )\r
2677 );\r
2678\r
2679 /* Act as if a stream of character tokens had been seen\r
2680 (see below for what they should say). */\r
2681 $this->insertText(\r
2682 'This is a searchable index. ' .\r
2683 'Insert your search keywords here: '\r
2684 );\r
2685\r
2686 /* Act as if an end tag token with the tag name "label"\r
2687 had been seen. */\r
2688 $this->inBody(\r
2689 array(\r
2690 'name' => 'label',\r
2691 'type' => HTML5::ENDTAG\r
2692 )\r
2693 );\r
2694\r
2695 /* Act as if an end tag token with the tag name "p" had\r
2696 been seen. */\r
2697 $this->inBody(\r
2698 array(\r
2699 'name' => 'p',\r
2700 'type' => HTML5::ENDTAG\r
2701 )\r
2702 );\r
2703\r
2704 /* Act as if a start tag token with the tag name "hr" had\r
2705 been seen. */\r
2706 $this->inBody(\r
2707 array(\r
2708 'name' => 'hr',\r
2709 'type' => HTML5::ENDTAG\r
2710 )\r
2711 );\r
2712\r
2713 /* Act as if an end tag token with the tag name "form" had\r
2714 been seen. */\r
2715 $this->inBody(\r
2716 array(\r
2717 'name' => 'form',\r
2718 'type' => HTML5::ENDTAG\r
2719 )\r
2720 );\r
2721 }\r
2722 break;\r
2723\r
2724 /* A start tag whose tag name is "textarea" */\r
2725 case 'textarea':\r
2726 $this->insertElement($token);\r
2727\r
2728 /* Switch the tokeniser's content model flag to the\r
2729 RCDATA state. */\r
2730 return HTML5::RCDATA;\r
2731 break;\r
2732\r
2733 /* A start tag whose tag name is one of: "iframe", "noembed",\r
2734 "noframes" */\r
2735 case 'iframe':\r
2736 case 'noembed':\r
2737 case 'noframes':\r
2738 $this->insertElement($token);\r
2739\r
2740 /* Switch the tokeniser's content model flag to the CDATA state. */\r
2741 return HTML5::CDATA;\r
2742 break;\r
2743\r
2744 /* A start tag whose tag name is "select" */\r
2745 case 'select':\r
2746 /* Reconstruct the active formatting elements, if any. */\r
2747 $this->reconstructActiveFormattingElements();\r
2748\r
2749 /* Insert an HTML element for the token. */\r
2750 $this->insertElement($token);\r
2751\r
2752 /* Change the insertion mode to "in select". */\r
2753 $this->mode = self::IN_SELECT;\r
2754 break;\r
2755\r
2756 /* A start or end tag whose tag name is one of: "caption", "col",\r
2757 "colgroup", "frame", "frameset", "head", "option", "optgroup",\r
2758 "tbody", "td", "tfoot", "th", "thead", "tr". */\r
2759 case 'caption':\r
2760 case 'col':\r
2761 case 'colgroup':\r
2762 case 'frame':\r
2763 case 'frameset':\r
2764 case 'head':\r
2765 case 'option':\r
2766 case 'optgroup':\r
2767 case 'tbody':\r
2768 case 'td':\r
2769 case 'tfoot':\r
2770 case 'th':\r
2771 case 'thead':\r
2772 case 'tr':\r
2773 // Parse error. Ignore the token.\r
2774 break;\r
2775\r
2776 /* A start or end tag whose tag name is one of: "event-source",\r
2777 "section", "nav", "article", "aside", "header", "footer",\r
2778 "datagrid", "command" */\r
2779 case 'event-source':\r
2780 case 'section':\r
2781 case 'nav':\r
2782 case 'article':\r
2783 case 'aside':\r
2784 case 'header':\r
2785 case 'footer':\r
2786 case 'datagrid':\r
2787 case 'command':\r
2788 // Work in progress!\r
2789 break;\r
2790\r
2791 /* A start tag token not covered by the previous entries */\r
2792 default:\r
2793 /* Reconstruct the active formatting elements, if any. */\r
2794 $this->reconstructActiveFormattingElements();\r
2795\r
2796 $this->insertElement($token, true, true);\r
2797 break;\r
2798 }\r
2799 break;\r
2800\r
2801 case HTML5::ENDTAG:\r
2802 switch ($token['name']) {\r
2803 /* An end tag with the tag name "body" */\r
2804 case 'body':\r
2805 /* If the second element in the stack of open elements is\r
2806 not a body element, this is a parse error. Ignore the token.\r
2807 (innerHTML case) */\r
2808 if (count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') {\r
2809 // Ignore.\r
2810\r
2811 /* If the current node is not the body element, then this\r
2812 is a parse error. */\r
2813 } elseif (end($this->stack)->nodeName !== 'body') {\r
2814 // Parse error.\r
2815 }\r
2816\r
2817 /* Change the insertion mode to "after body". */\r
2818 $this->mode = self::AFTER_BODY;\r
2819 break;\r
2820\r
2821 /* An end tag with the tag name "html" */\r
2822 case 'html':\r
2823 /* Act as if an end tag with tag name "body" had been seen,\r
2824 then, if that token wasn't ignored, reprocess the current\r
2825 token. */\r
2826 $this->inBody(\r
2827 array(\r
2828 'name' => 'body',\r
2829 'type' => HTML5::ENDTAG\r
2830 )\r
2831 );\r
2832\r
2833 return $this->afterBody($token);\r
2834 break;\r
2835\r
2836 /* An end tag whose tag name is one of: "address", "blockquote",\r
2837 "center", "dir", "div", "dl", "fieldset", "listing", "menu",\r
2838 "ol", "pre", "ul" */\r
2839 case 'address':\r
2840 case 'blockquote':\r
2841 case 'center':\r
2842 case 'dir':\r
2843 case 'div':\r
2844 case 'dl':\r
2845 case 'fieldset':\r
2846 case 'listing':\r
2847 case 'menu':\r
2848 case 'ol':\r
2849 case 'pre':\r
2850 case 'ul':\r
2851 /* If the stack of open elements has an element in scope\r
2852 with the same tag name as that of the token, then generate\r
2853 implied end tags. */\r
2854 if ($this->elementInScope($token['name'])) {\r
2855 $this->generateImpliedEndTags();\r
2856\r
2857 /* Now, if the current node is not an element with\r
2858 the same tag name as that of the token, then this\r
2859 is a parse error. */\r
2860 // w/e\r
2861\r
2862 /* If the stack of open elements has an element in\r
2863 scope with the same tag name as that of the token,\r
2864 then pop elements from this stack until an element\r
2865 with that tag name has been popped from the stack. */\r
2866 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
2867 if ($this->stack[$n]->nodeName === $token['name']) {\r
2868 $n = -1;\r
2869 }\r
2870\r
2871 array_pop($this->stack);\r
2872 }\r
2873 }\r
2874 break;\r
2875\r
2876 /* An end tag whose tag name is "form" */\r
2877 case 'form':\r
2878 /* If the stack of open elements has an element in scope\r
2879 with the same tag name as that of the token, then generate\r
2880 implied end tags. */\r
2881 if ($this->elementInScope($token['name'])) {\r
2882 $this->generateImpliedEndTags();\r
2883\r
2884 }\r
2885\r
2886 if (end($this->stack)->nodeName !== $token['name']) {\r
2887 /* Now, if the current node is not an element with the\r
2888 same tag name as that of the token, then this is a parse\r
2889 error. */\r
2890 // w/e\r
2891\r
2892 } else {\r
2893 /* Otherwise, if the current node is an element with\r
2894 the same tag name as that of the token pop that element\r
2895 from the stack. */\r
2896 array_pop($this->stack);\r
2897 }\r
2898\r
2899 /* In any case, set the form element pointer to null. */\r
2900 $this->form_pointer = null;\r
2901 break;\r
2902\r
2903 /* An end tag whose tag name is "p" */\r
2904 case 'p':\r
2905 /* If the stack of open elements has a p element in scope,\r
2906 then generate implied end tags, except for p elements. */\r
2907 if ($this->elementInScope('p')) {\r
2908 $this->generateImpliedEndTags(array('p'));\r
2909\r
2910 /* If the current node is not a p element, then this is\r
2911 a parse error. */\r
2912 // k\r
2913\r
2914 /* If the stack of open elements has a p element in\r
2915 scope, then pop elements from this stack until the stack\r
2916 no longer has a p element in scope. */\r
2917 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
2918 if ($this->elementInScope('p')) {\r
2919 array_pop($this->stack);\r
2920\r
2921 } else {\r
2922 break;\r
2923 }\r
2924 }\r
2925 }\r
2926 break;\r
2927\r
2928 /* An end tag whose tag name is "dd", "dt", or "li" */\r
2929 case 'dd':\r
2930 case 'dt':\r
2931 case 'li':\r
2932 /* If the stack of open elements has an element in scope\r
2933 whose tag name matches the tag name of the token, then\r
2934 generate implied end tags, except for elements with the\r
2935 same tag name as the token. */\r
2936 if ($this->elementInScope($token['name'])) {\r
2937 $this->generateImpliedEndTags(array($token['name']));\r
2938\r
2939 /* If the current node is not an element with the same\r
2940 tag name as the token, then this is a parse error. */\r
2941 // w/e\r
2942\r
2943 /* If the stack of open elements has an element in scope\r
2944 whose tag name matches the tag name of the token, then\r
2945 pop elements from this stack until an element with that\r
2946 tag name has been popped from the stack. */\r
2947 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
2948 if ($this->stack[$n]->nodeName === $token['name']) {\r
2949 $n = -1;\r
2950 }\r
2951\r
2952 array_pop($this->stack);\r
2953 }\r
2954 }\r
2955 break;\r
2956\r
2957 /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4",\r
2958 "h5", "h6" */\r
2959 case 'h1':\r
2960 case 'h2':\r
2961 case 'h3':\r
2962 case 'h4':\r
2963 case 'h5':\r
2964 case 'h6':\r
2965 $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6');\r
2966\r
2967 /* If the stack of open elements has in scope an element whose\r
2968 tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then\r
2969 generate implied end tags. */\r
2970 if ($this->elementInScope($elements)) {\r
2971 $this->generateImpliedEndTags();\r
2972\r
2973 /* Now, if the current node is not an element with the same\r
2974 tag name as that of the token, then this is a parse error. */\r
2975 // w/e\r
2976\r
2977 /* If the stack of open elements has in scope an element\r
2978 whose tag name is one of "h1", "h2", "h3", "h4", "h5", or\r
2979 "h6", then pop elements from the stack until an element\r
2980 with one of those tag names has been popped from the stack. */\r
2981 while ($this->elementInScope($elements)) {\r
2982 array_pop($this->stack);\r
2983 }\r
2984 }\r
2985 break;\r
2986\r
2987 /* An end tag whose tag name is one of: "a", "b", "big", "em",\r
2988 "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */\r
2989 case 'a':\r
2990 case 'b':\r
2991 case 'big':\r
2992 case 'em':\r
2993 case 'font':\r
2994 case 'i':\r
2995 case 'nobr':\r
2996 case 's':\r
2997 case 'small':\r
2998 case 'strike':\r
2999 case 'strong':\r
3000 case 'tt':\r
3001 case 'u':\r
3002 /* 1. Let the formatting element be the last element in\r
3003 the list of active formatting elements that:\r
3004 * is between the end of the list and the last scope\r
3005 marker in the list, if any, or the start of the list\r
3006 otherwise, and\r
3007 * has the same tag name as the token.\r
3008 */\r
3009 while (true) {\r
3010 for ($a = count($this->a_formatting) - 1; $a >= 0; $a--) {\r
3011 if ($this->a_formatting[$a] === self::MARKER) {\r
3012 break;\r
3013\r
3014 } elseif ($this->a_formatting[$a]->tagName === $token['name']) {\r
3015 $formatting_element = $this->a_formatting[$a];\r
3016 $in_stack = in_array($formatting_element, $this->stack, true);\r
3017 $fe_af_pos = $a;\r
3018 break;\r
3019 }\r
3020 }\r
3021\r
3022 /* If there is no such node, or, if that node is\r
3023 also in the stack of open elements but the element\r
3024 is not in scope, then this is a parse error. Abort\r
3025 these steps. The token is ignored. */\r
3026 if (!isset($formatting_element) || ($in_stack &&\r
3027 !$this->elementInScope($token['name']))\r
3028 ) {\r
3029 break;\r
3030\r
3031 /* Otherwise, if there is such a node, but that node\r
3032 is not in the stack of open elements, then this is a\r
3033 parse error; remove the element from the list, and\r
3034 abort these steps. */\r
3035 } elseif (isset($formatting_element) && !$in_stack) {\r
3036 unset($this->a_formatting[$fe_af_pos]);\r
3037 $this->a_formatting = array_merge($this->a_formatting);\r
3038 break;\r
3039 }\r
3040\r
3041 /* 2. Let the furthest block be the topmost node in the\r
3042 stack of open elements that is lower in the stack\r
3043 than the formatting element, and is not an element in\r
3044 the phrasing or formatting categories. There might\r
3045 not be one. */\r
3046 $fe_s_pos = array_search($formatting_element, $this->stack, true);\r
3047 $length = count($this->stack);\r
3048\r
3049 for ($s = $fe_s_pos + 1; $s < $length; $s++) {\r
3050 $category = $this->getElementCategory($this->stack[$s]->nodeName);\r
3051\r
3052 if ($category !== self::PHRASING && $category !== self::FORMATTING) {\r
3053 $furthest_block = $this->stack[$s];\r
3054 }\r
3055 }\r
3056\r
3057 /* 3. If there is no furthest block, then the UA must\r
3058 skip the subsequent steps and instead just pop all\r
3059 the nodes from the bottom of the stack of open\r
3060 elements, from the current node up to the formatting\r
3061 element, and remove the formatting element from the\r
3062 list of active formatting elements. */\r
3063 if (!isset($furthest_block)) {\r
3064 for ($n = $length - 1; $n >= $fe_s_pos; $n--) {\r
3065 array_pop($this->stack);\r
3066 }\r
3067\r
3068 unset($this->a_formatting[$fe_af_pos]);\r
3069 $this->a_formatting = array_merge($this->a_formatting);\r
3070 break;\r
3071 }\r
3072\r
3073 /* 4. Let the common ancestor be the element\r
3074 immediately above the formatting element in the stack\r
3075 of open elements. */\r
3076 $common_ancestor = $this->stack[$fe_s_pos - 1];\r
3077\r
3078 /* 5. If the furthest block has a parent node, then\r
3079 remove the furthest block from its parent node. */\r
3080 if ($furthest_block->parentNode !== null) {\r
3081 $furthest_block->parentNode->removeChild($furthest_block);\r
3082 }\r
3083\r
3084 /* 6. Let a bookmark note the position of the\r
3085 formatting element in the list of active formatting\r
3086 elements relative to the elements on either side\r
3087 of it in the list. */\r
3088 $bookmark = $fe_af_pos;\r
3089\r
3090 /* 7. Let node and last node be the furthest block.\r
3091 Follow these steps: */\r
3092 $node = $furthest_block;\r
3093 $last_node = $furthest_block;\r
3094\r
3095 while (true) {\r
3096 for ($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) {\r
3097 /* 7.1 Let node be the element immediately\r
3098 prior to node in the stack of open elements. */\r
3099 $node = $this->stack[$n];\r
3100\r
3101 /* 7.2 If node is not in the list of active\r
3102 formatting elements, then remove node from\r
3103 the stack of open elements and then go back\r
3104 to step 1. */\r
3105 if (!in_array($node, $this->a_formatting, true)) {\r
3106 unset($this->stack[$n]);\r
3107 $this->stack = array_merge($this->stack);\r
3108\r
3109 } else {\r
3110 break;\r
3111 }\r
3112 }\r
3113\r
3114 /* 7.3 Otherwise, if node is the formatting\r
3115 element, then go to the next step in the overall\r
3116 algorithm. */\r
3117 if ($node === $formatting_element) {\r
3118 break;\r
3119\r
3120 /* 7.4 Otherwise, if last node is the furthest\r
3121 block, then move the aforementioned bookmark to\r
3122 be immediately after the node in the list of\r
3123 active formatting elements. */\r
3124 } elseif ($last_node === $furthest_block) {\r
3125 $bookmark = array_search($node, $this->a_formatting, true) + 1;\r
3126 }\r
3127\r
3128 /* 7.5 If node has any children, perform a\r
3129 shallow clone of node, replace the entry for\r
3130 node in the list of active formatting elements\r
3131 with an entry for the clone, replace the entry\r
3132 for node in the stack of open elements with an\r
3133 entry for the clone, and let node be the clone. */\r
3134 if ($node->hasChildNodes()) {\r
3135 $clone = $node->cloneNode();\r
3136 $s_pos = array_search($node, $this->stack, true);\r
3137 $a_pos = array_search($node, $this->a_formatting, true);\r
3138\r
3139 $this->stack[$s_pos] = $clone;\r
3140 $this->a_formatting[$a_pos] = $clone;\r
3141 $node = $clone;\r
3142 }\r
3143\r
3144 /* 7.6 Insert last node into node, first removing\r
3145 it from its previous parent node if any. */\r
3146 if ($last_node->parentNode !== null) {\r
3147 $last_node->parentNode->removeChild($last_node);\r
3148 }\r
3149\r
3150 $node->appendChild($last_node);\r
3151\r
3152 /* 7.7 Let last node be node. */\r
3153 $last_node = $node;\r
3154 }\r
3155\r
3156 /* 8. Insert whatever last node ended up being in\r
3157 the previous step into the common ancestor node,\r
3158 first removing it from its previous parent node if\r
3159 any. */\r
3160 if ($last_node->parentNode !== null) {\r
3161 $last_node->parentNode->removeChild($last_node);\r
3162 }\r
3163\r
3164 $common_ancestor->appendChild($last_node);\r
3165\r
3166 /* 9. Perform a shallow clone of the formatting\r
3167 element. */\r
3168 $clone = $formatting_element->cloneNode();\r
3169\r
3170 /* 10. Take all of the child nodes of the furthest\r
3171 block and append them to the clone created in the\r
3172 last step. */\r
3173 while ($furthest_block->hasChildNodes()) {\r
3174 $child = $furthest_block->firstChild;\r
3175 $furthest_block->removeChild($child);\r
3176 $clone->appendChild($child);\r
3177 }\r
3178\r
3179 /* 11. Append that clone to the furthest block. */\r
3180 $furthest_block->appendChild($clone);\r
3181\r
3182 /* 12. Remove the formatting element from the list\r
3183 of active formatting elements, and insert the clone\r
3184 into the list of active formatting elements at the\r
3185 position of the aforementioned bookmark. */\r
3186 $fe_af_pos = array_search($formatting_element, $this->a_formatting, true);\r
3187 unset($this->a_formatting[$fe_af_pos]);\r
3188 $this->a_formatting = array_merge($this->a_formatting);\r
3189\r
3190 $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1);\r
3191 $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting));\r
3192 $this->a_formatting = array_merge($af_part1, array($clone), $af_part2);\r
3193\r
3194 /* 13. Remove the formatting element from the stack\r
3195 of open elements, and insert the clone into the stack\r
3196 of open elements immediately after (i.e. in a more\r
3197 deeply nested position than) the position of the\r
3198 furthest block in that stack. */\r
3199 $fe_s_pos = array_search($formatting_element, $this->stack, true);\r
3200 $fb_s_pos = array_search($furthest_block, $this->stack, true);\r
3201 unset($this->stack[$fe_s_pos]);\r
3202\r
3203 $s_part1 = array_slice($this->stack, 0, $fb_s_pos);\r
3204 $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack));\r
3205 $this->stack = array_merge($s_part1, array($clone), $s_part2);\r
3206\r
3207 /* 14. Jump back to step 1 in this series of steps. */\r
3208 unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block);\r
3209 }\r
3210 break;\r
3211\r
3212 /* An end tag token whose tag name is one of: "button",\r
3213 "marquee", "object" */\r
3214 case 'button':\r
3215 case 'marquee':\r
3216 case 'object':\r
3217 /* If the stack of open elements has an element in scope whose\r
3218 tag name matches the tag name of the token, then generate implied\r
3219 tags. */\r
3220 if ($this->elementInScope($token['name'])) {\r
3221 $this->generateImpliedEndTags();\r
3222\r
3223 /* Now, if the current node is not an element with the same\r
3224 tag name as the token, then this is a parse error. */\r
3225 // k\r
3226\r
3227 /* Now, if the stack of open elements has an element in scope\r
3228 whose tag name matches the tag name of the token, then pop\r
3229 elements from the stack until that element has been popped from\r
3230 the stack, and clear the list of active formatting elements up\r
3231 to the last marker. */\r
3232 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
3233 if ($this->stack[$n]->nodeName === $token['name']) {\r
3234 $n = -1;\r
3235 }\r
3236\r
3237 array_pop($this->stack);\r
3238 }\r
3239\r
3240 $marker = end(array_keys($this->a_formatting, self::MARKER, true));\r
3241\r
3242 for ($n = count($this->a_formatting) - 1; $n > $marker; $n--) {\r
3243 array_pop($this->a_formatting);\r
3244 }\r
3245 }\r
3246 break;\r
3247\r
3248 /* Or an end tag whose tag name is one of: "area", "basefont",\r
3249 "bgsound", "br", "embed", "hr", "iframe", "image", "img",\r
3250 "input", "isindex", "noembed", "noframes", "param", "select",\r
3251 "spacer", "table", "textarea", "wbr" */\r
3252 case 'area':\r
3253 case 'basefont':\r
3254 case 'bgsound':\r
3255 case 'br':\r
3256 case 'embed':\r
3257 case 'hr':\r
3258 case 'iframe':\r
3259 case 'image':\r
3260 case 'img':\r
3261 case 'input':\r
3262 case 'isindex':\r
3263 case 'noembed':\r
3264 case 'noframes':\r
3265 case 'param':\r
3266 case 'select':\r
3267 case 'spacer':\r
3268 case 'table':\r
3269 case 'textarea':\r
3270 case 'wbr':\r
3271 // Parse error. Ignore the token.\r
3272 break;\r
3273\r
3274 /* An end tag token not covered by the previous entries */\r
3275 default:\r
3276 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
3277 /* Initialise node to be the current node (the bottommost\r
3278 node of the stack). */\r
3279 $node = end($this->stack);\r
3280\r
3281 /* If node has the same tag name as the end tag token,\r
3282 then: */\r
3283 if ($token['name'] === $node->nodeName) {\r
3284 /* Generate implied end tags. */\r
3285 $this->generateImpliedEndTags();\r
3286\r
3287 /* If the tag name of the end tag token does not\r
3288 match the tag name of the current node, this is a\r
3289 parse error. */\r
3290 // k\r
3291\r
3292 /* Pop all the nodes from the current node up to\r
3293 node, including node, then stop this algorithm. */\r
3294 for ($x = count($this->stack) - $n; $x >= $n; $x--) {\r
3295 array_pop($this->stack);\r
3296 }\r
3297\r
3298 } else {\r
3299 $category = $this->getElementCategory($node);\r
3300\r
3301 if ($category !== self::SPECIAL && $category !== self::SCOPING) {\r
3302 /* Otherwise, if node is in neither the formatting\r
3303 category nor the phrasing category, then this is a\r
3304 parse error. Stop this algorithm. The end tag token\r
3305 is ignored. */\r
3306 return false;\r
3307 }\r
3308 }\r
3309 }\r
3310 break;\r
3311 }\r
3312 break;\r
3313 }\r
3314 }\r
3315\r
3316 private function inTable($token)\r
3317 {\r
3318 $clear = array('html', 'table');\r
3319\r
3320 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
3321 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
3322 or U+0020 SPACE */\r
3323 if ($token['type'] === HTML5::CHARACTR &&\r
3324 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
3325 ) {\r
3326 /* Append the character to the current node. */\r
3327 $text = $this->dom->createTextNode($token['data']);\r
3328 end($this->stack)->appendChild($text);\r
3329\r
3330 /* A comment token */\r
3331 } elseif ($token['type'] === HTML5::COMMENT) {\r
3332 /* Append a Comment node to the current node with the data\r
3333 attribute set to the data given in the comment token. */\r
3334 $comment = $this->dom->createComment($token['data']);\r
3335 end($this->stack)->appendChild($comment);\r
3336\r
3337 /* A start tag whose tag name is "caption" */\r
3338 } elseif ($token['type'] === HTML5::STARTTAG &&\r
3339 $token['name'] === 'caption'\r
3340 ) {\r
3341 /* Clear the stack back to a table context. */\r
3342 $this->clearStackToTableContext($clear);\r
3343\r
3344 /* Insert a marker at the end of the list of active\r
3345 formatting elements. */\r
3346 $this->a_formatting[] = self::MARKER;\r
3347\r
3348 /* Insert an HTML element for the token, then switch the\r
3349 insertion mode to "in caption". */\r
3350 $this->insertElement($token);\r
3351 $this->mode = self::IN_CAPTION;\r
3352\r
3353 /* A start tag whose tag name is "colgroup" */\r
3354 } elseif ($token['type'] === HTML5::STARTTAG &&\r
3355 $token['name'] === 'colgroup'\r
3356 ) {\r
3357 /* Clear the stack back to a table context. */\r
3358 $this->clearStackToTableContext($clear);\r
3359\r
3360 /* Insert an HTML element for the token, then switch the\r
3361 insertion mode to "in column group". */\r
3362 $this->insertElement($token);\r
3363 $this->mode = self::IN_CGROUP;\r
3364\r
3365 /* A start tag whose tag name is "col" */\r
3366 } elseif ($token['type'] === HTML5::STARTTAG &&\r
3367 $token['name'] === 'col'\r
3368 ) {\r
3369 $this->inTable(\r
3370 array(\r
3371 'name' => 'colgroup',\r
3372 'type' => HTML5::STARTTAG,\r
3373 'attr' => array()\r
3374 )\r
3375 );\r
3376\r
3377 $this->inColumnGroup($token);\r
3378\r
3379 /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */\r
3380 } elseif ($token['type'] === HTML5::STARTTAG && in_array(\r
3381 $token['name'],\r
3382 array('tbody', 'tfoot', 'thead')\r
3383 )\r
3384 ) {\r
3385 /* Clear the stack back to a table context. */\r
3386 $this->clearStackToTableContext($clear);\r
3387\r
3388 /* Insert an HTML element for the token, then switch the insertion\r
3389 mode to "in table body". */\r
3390 $this->insertElement($token);\r
3391 $this->mode = self::IN_TBODY;\r
3392\r
3393 /* A start tag whose tag name is one of: "td", "th", "tr" */\r
3394 } elseif ($token['type'] === HTML5::STARTTAG &&\r
3395 in_array($token['name'], array('td', 'th', 'tr'))\r
3396 ) {\r
3397 /* Act as if a start tag token with the tag name "tbody" had been\r
3398 seen, then reprocess the current token. */\r
3399 $this->inTable(\r
3400 array(\r
3401 'name' => 'tbody',\r
3402 'type' => HTML5::STARTTAG,\r
3403 'attr' => array()\r
3404 )\r
3405 );\r
3406\r
3407 return $this->inTableBody($token);\r
3408\r
3409 /* A start tag whose tag name is "table" */\r
3410 } elseif ($token['type'] === HTML5::STARTTAG &&\r
3411 $token['name'] === 'table'\r
3412 ) {\r
3413 /* Parse error. Act as if an end tag token with the tag name "table"\r
3414 had been seen, then, if that token wasn't ignored, reprocess the\r
3415 current token. */\r
3416 $this->inTable(\r
3417 array(\r
3418 'name' => 'table',\r
3419 'type' => HTML5::ENDTAG\r
3420 )\r
3421 );\r
3422\r
3423 return $this->mainPhase($token);\r
3424\r
3425 /* An end tag whose tag name is "table" */\r
3426 } elseif ($token['type'] === HTML5::ENDTAG &&\r
3427 $token['name'] === 'table'\r
3428 ) {\r
3429 /* If the stack of open elements does not have an element in table\r
3430 scope with the same tag name as the token, this is a parse error.\r
3431 Ignore the token. (innerHTML case) */\r
3432 if (!$this->elementInScope($token['name'], true)) {\r
3433 return false;\r
3434\r
3435 /* Otherwise: */\r
3436 } else {\r
3437 /* Generate implied end tags. */\r
3438 $this->generateImpliedEndTags();\r
3439\r
3440 /* Now, if the current node is not a table element, then this\r
3441 is a parse error. */\r
3442 // w/e\r
3443\r
3444 /* Pop elements from this stack until a table element has been\r
3445 popped from the stack. */\r
3446 while (true) {\r
3447 $current = end($this->stack)->nodeName;\r
3448 array_pop($this->stack);\r
3449\r
3450 if ($current === 'table') {\r
3451 break;\r
3452 }\r
3453 }\r
3454\r
3455 /* Reset the insertion mode appropriately. */\r
3456 $this->resetInsertionMode();\r
3457 }\r
3458\r
3459 /* An end tag whose tag name is one of: "body", "caption", "col",\r
3460 "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */\r
3461 } elseif ($token['type'] === HTML5::ENDTAG && in_array(\r
3462 $token['name'],\r
3463 array(\r
3464 'body',\r
3465 'caption',\r
3466 'col',\r
3467 'colgroup',\r
3468 'html',\r
3469 'tbody',\r
3470 'td',\r
3471 'tfoot',\r
3472 'th',\r
3473 'thead',\r
3474 'tr'\r
3475 )\r
3476 )\r
3477 ) {\r
3478 // Parse error. Ignore the token.\r
3479\r
3480 /* Anything else */\r
3481 } else {\r
3482 /* Parse error. Process the token as if the insertion mode was "in\r
3483 body", with the following exception: */\r
3484\r
3485 /* If the current node is a table, tbody, tfoot, thead, or tr\r
3486 element, then, whenever a node would be inserted into the current\r
3487 node, it must instead be inserted into the foster parent element. */\r
3488 if (in_array(\r
3489 end($this->stack)->nodeName,\r
3490 array('table', 'tbody', 'tfoot', 'thead', 'tr')\r
3491 )\r
3492 ) {\r
3493 /* The foster parent element is the parent element of the last\r
3494 table element in the stack of open elements, if there is a\r
3495 table element and it has such a parent element. If there is no\r
3496 table element in the stack of open elements (innerHTML case),\r
3497 then the foster parent element is the first element in the\r
3498 stack of open elements (the html element). Otherwise, if there\r
3499 is a table element in the stack of open elements, but the last\r
3500 table element in the stack of open elements has no parent, or\r
3501 its parent node is not an element, then the foster parent\r
3502 element is the element before the last table element in the\r
3503 stack of open elements. */\r
3504 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
3505 if ($this->stack[$n]->nodeName === 'table') {\r
3506 $table = $this->stack[$n];\r
3507 break;\r
3508 }\r
3509 }\r
3510\r
3511 if (isset($table) && $table->parentNode !== null) {\r
3512 $this->foster_parent = $table->parentNode;\r
3513\r
3514 } elseif (!isset($table)) {\r
3515 $this->foster_parent = $this->stack[0];\r
3516\r
3517 } elseif (isset($table) && ($table->parentNode === null ||\r
3518 $table->parentNode->nodeType !== XML_ELEMENT_NODE)\r
3519 ) {\r
3520 $this->foster_parent = $this->stack[$n - 1];\r
3521 }\r
3522 }\r
3523\r
3524 $this->inBody($token);\r
3525 }\r
3526 }\r
3527\r
3528 private function inCaption($token)\r
3529 {\r
3530 /* An end tag whose tag name is "caption" */\r
3531 if ($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') {\r
3532 /* If the stack of open elements does not have an element in table\r
3533 scope with the same tag name as the token, this is a parse error.\r
3534 Ignore the token. (innerHTML case) */\r
3535 if (!$this->elementInScope($token['name'], true)) {\r
3536 // Ignore\r
3537\r
3538 /* Otherwise: */\r
3539 } else {\r
3540 /* Generate implied end tags. */\r
3541 $this->generateImpliedEndTags();\r
3542\r
3543 /* Now, if the current node is not a caption element, then this\r
3544 is a parse error. */\r
3545 // w/e\r
3546\r
3547 /* Pop elements from this stack until a caption element has\r
3548 been popped from the stack. */\r
3549 while (true) {\r
3550 $node = end($this->stack)->nodeName;\r
3551 array_pop($this->stack);\r
3552\r
3553 if ($node === 'caption') {\r
3554 break;\r
3555 }\r
3556 }\r
3557\r
3558 /* Clear the list of active formatting elements up to the last\r
3559 marker. */\r
3560 $this->clearTheActiveFormattingElementsUpToTheLastMarker();\r
3561\r
3562 /* Switch the insertion mode to "in table". */\r
3563 $this->mode = self::IN_TABLE;\r
3564 }\r
3565\r
3566 /* A start tag whose tag name is one of: "caption", "col", "colgroup",\r
3567 "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag\r
3568 name is "table" */\r
3569 } elseif (($token['type'] === HTML5::STARTTAG && in_array(\r
3570 $token['name'],\r
3571 array(\r
3572 'caption',\r
3573 'col',\r
3574 'colgroup',\r
3575 'tbody',\r
3576 'td',\r
3577 'tfoot',\r
3578 'th',\r
3579 'thead',\r
3580 'tr'\r
3581 )\r
3582 )) || ($token['type'] === HTML5::ENDTAG &&\r
3583 $token['name'] === 'table')\r
3584 ) {\r
3585 /* Parse error. Act as if an end tag with the tag name "caption"\r
3586 had been seen, then, if that token wasn't ignored, reprocess the\r
3587 current token. */\r
3588 $this->inCaption(\r
3589 array(\r
3590 'name' => 'caption',\r
3591 'type' => HTML5::ENDTAG\r
3592 )\r
3593 );\r
3594\r
3595 return $this->inTable($token);\r
3596\r
3597 /* An end tag whose tag name is one of: "body", "col", "colgroup",\r
3598 "html", "tbody", "td", "tfoot", "th", "thead", "tr" */\r
3599 } elseif ($token['type'] === HTML5::ENDTAG && in_array(\r
3600 $token['name'],\r
3601 array(\r
3602 'body',\r
3603 'col',\r
3604 'colgroup',\r
3605 'html',\r
3606 'tbody',\r
3607 'tfoot',\r
3608 'th',\r
3609 'thead',\r
3610 'tr'\r
3611 )\r
3612 )\r
3613 ) {\r
3614 // Parse error. Ignore the token.\r
3615\r
3616 /* Anything else */\r
3617 } else {\r
3618 /* Process the token as if the insertion mode was "in body". */\r
3619 $this->inBody($token);\r
3620 }\r
3621 }\r
3622\r
3623 private function inColumnGroup($token)\r
3624 {\r
3625 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
3626 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
3627 or U+0020 SPACE */\r
3628 if ($token['type'] === HTML5::CHARACTR &&\r
3629 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
3630 ) {\r
3631 /* Append the character to the current node. */\r
3632 $text = $this->dom->createTextNode($token['data']);\r
3633 end($this->stack)->appendChild($text);\r
3634\r
3635 /* A comment token */\r
3636 } elseif ($token['type'] === HTML5::COMMENT) {\r
3637 /* Append a Comment node to the current node with the data\r
3638 attribute set to the data given in the comment token. */\r
3639 $comment = $this->dom->createComment($token['data']);\r
3640 end($this->stack)->appendChild($comment);\r
3641\r
3642 /* A start tag whose tag name is "col" */\r
3643 } elseif ($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') {\r
3644 /* Insert a col element for the token. Immediately pop the current\r
3645 node off the stack of open elements. */\r
3646 $this->insertElement($token);\r
3647 array_pop($this->stack);\r
3648\r
3649 /* An end tag whose tag name is "colgroup" */\r
3650 } elseif ($token['type'] === HTML5::ENDTAG &&\r
3651 $token['name'] === 'colgroup'\r
3652 ) {\r
3653 /* If the current node is the root html element, then this is a\r
3654 parse error, ignore the token. (innerHTML case) */\r
3655 if (end($this->stack)->nodeName === 'html') {\r
3656 // Ignore\r
3657\r
3658 /* Otherwise, pop the current node (which will be a colgroup\r
3659 element) from the stack of open elements. Switch the insertion\r
3660 mode to "in table". */\r
3661 } else {\r
3662 array_pop($this->stack);\r
3663 $this->mode = self::IN_TABLE;\r
3664 }\r
3665\r
3666 /* An end tag whose tag name is "col" */\r
3667 } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') {\r
3668 /* Parse error. Ignore the token. */\r
3669\r
3670 /* Anything else */\r
3671 } else {\r
3672 /* Act as if an end tag with the tag name "colgroup" had been seen,\r
3673 and then, if that token wasn't ignored, reprocess the current token. */\r
3674 $this->inColumnGroup(\r
3675 array(\r
3676 'name' => 'colgroup',\r
3677 'type' => HTML5::ENDTAG\r
3678 )\r
3679 );\r
3680\r
3681 return $this->inTable($token);\r
3682 }\r
3683 }\r
3684\r
3685 private function inTableBody($token)\r
3686 {\r
3687 $clear = array('tbody', 'tfoot', 'thead', 'html');\r
3688\r
3689 /* A start tag whose tag name is "tr" */\r
3690 if ($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') {\r
3691 /* Clear the stack back to a table body context. */\r
3692 $this->clearStackToTableContext($clear);\r
3693\r
3694 /* Insert a tr element for the token, then switch the insertion\r
3695 mode to "in row". */\r
3696 $this->insertElement($token);\r
3697 $this->mode = self::IN_ROW;\r
3698\r
3699 /* A start tag whose tag name is one of: "th", "td" */\r
3700 } elseif ($token['type'] === HTML5::STARTTAG &&\r
3701 ($token['name'] === 'th' || $token['name'] === 'td')\r
3702 ) {\r
3703 /* Parse error. Act as if a start tag with the tag name "tr" had\r
3704 been seen, then reprocess the current token. */\r
3705 $this->inTableBody(\r
3706 array(\r
3707 'name' => 'tr',\r
3708 'type' => HTML5::STARTTAG,\r
3709 'attr' => array()\r
3710 )\r
3711 );\r
3712\r
3713 return $this->inRow($token);\r
3714\r
3715 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */\r
3716 } elseif ($token['type'] === HTML5::ENDTAG &&\r
3717 in_array($token['name'], array('tbody', 'tfoot', 'thead'))\r
3718 ) {\r
3719 /* If the stack of open elements does not have an element in table\r
3720 scope with the same tag name as the token, this is a parse error.\r
3721 Ignore the token. */\r
3722 if (!$this->elementInScope($token['name'], true)) {\r
3723 // Ignore\r
3724\r
3725 /* Otherwise: */\r
3726 } else {\r
3727 /* Clear the stack back to a table body context. */\r
3728 $this->clearStackToTableContext($clear);\r
3729\r
3730 /* Pop the current node from the stack of open elements. Switch\r
3731 the insertion mode to "in table". */\r
3732 array_pop($this->stack);\r
3733 $this->mode = self::IN_TABLE;\r
3734 }\r
3735\r
3736 /* A start tag whose tag name is one of: "caption", "col", "colgroup",\r
3737 "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */\r
3738 } elseif (($token['type'] === HTML5::STARTTAG && in_array(\r
3739 $token['name'],\r
3740 array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead')\r
3741 )) ||\r
3742 ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')\r
3743 ) {\r
3744 /* If the stack of open elements does not have a tbody, thead, or\r
3745 tfoot element in table scope, this is a parse error. Ignore the\r
3746 token. (innerHTML case) */\r
3747 if (!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) {\r
3748 // Ignore.\r
3749\r
3750 /* Otherwise: */\r
3751 } else {\r
3752 /* Clear the stack back to a table body context. */\r
3753 $this->clearStackToTableContext($clear);\r
3754\r
3755 /* Act as if an end tag with the same tag name as the current\r
3756 node ("tbody", "tfoot", or "thead") had been seen, then\r
3757 reprocess the current token. */\r
3758 $this->inTableBody(\r
3759 array(\r
3760 'name' => end($this->stack)->nodeName,\r
3761 'type' => HTML5::ENDTAG\r
3762 )\r
3763 );\r
3764\r
3765 return $this->mainPhase($token);\r
3766 }\r
3767\r
3768 /* An end tag whose tag name is one of: "body", "caption", "col",\r
3769 "colgroup", "html", "td", "th", "tr" */\r
3770 } elseif ($token['type'] === HTML5::ENDTAG && in_array(\r
3771 $token['name'],\r
3772 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')\r
3773 )\r
3774 ) {\r
3775 /* Parse error. Ignore the token. */\r
3776\r
3777 /* Anything else */\r
3778 } else {\r
3779 /* Process the token as if the insertion mode was "in table". */\r
3780 $this->inTable($token);\r
3781 }\r
3782 }\r
3783\r
3784 private function inRow($token)\r
3785 {\r
3786 $clear = array('tr', 'html');\r
3787\r
3788 /* A start tag whose tag name is one of: "th", "td" */\r
3789 if ($token['type'] === HTML5::STARTTAG &&\r
3790 ($token['name'] === 'th' || $token['name'] === 'td')\r
3791 ) {\r
3792 /* Clear the stack back to a table row context. */\r
3793 $this->clearStackToTableContext($clear);\r
3794\r
3795 /* Insert an HTML element for the token, then switch the insertion\r
3796 mode to "in cell". */\r
3797 $this->insertElement($token);\r
3798 $this->mode = self::IN_CELL;\r
3799\r
3800 /* Insert a marker at the end of the list of active formatting\r
3801 elements. */\r
3802 $this->a_formatting[] = self::MARKER;\r
3803\r
3804 /* An end tag whose tag name is "tr" */\r
3805 } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') {\r
3806 /* If the stack of open elements does not have an element in table\r
3807 scope with the same tag name as the token, this is a parse error.\r
3808 Ignore the token. (innerHTML case) */\r
3809 if (!$this->elementInScope($token['name'], true)) {\r
3810 // Ignore.\r
3811\r
3812 /* Otherwise: */\r
3813 } else {\r
3814 /* Clear the stack back to a table row context. */\r
3815 $this->clearStackToTableContext($clear);\r
3816\r
3817 /* Pop the current node (which will be a tr element) from the\r
3818 stack of open elements. Switch the insertion mode to "in table\r
3819 body". */\r
3820 array_pop($this->stack);\r
3821 $this->mode = self::IN_TBODY;\r
3822 }\r
3823\r
3824 /* A start tag whose tag name is one of: "caption", "col", "colgroup",\r
3825 "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */\r
3826 } elseif ($token['type'] === HTML5::STARTTAG && in_array(\r
3827 $token['name'],\r
3828 array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr')\r
3829 )\r
3830 ) {\r
3831 /* Act as if an end tag with the tag name "tr" had been seen, then,\r
3832 if that token wasn't ignored, reprocess the current token. */\r
3833 $this->inRow(\r
3834 array(\r
3835 'name' => 'tr',\r
3836 'type' => HTML5::ENDTAG\r
3837 )\r
3838 );\r
3839\r
3840 return $this->inCell($token);\r
3841\r
3842 /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */\r
3843 } elseif ($token['type'] === HTML5::ENDTAG &&\r
3844 in_array($token['name'], array('tbody', 'tfoot', 'thead'))\r
3845 ) {\r
3846 /* If the stack of open elements does not have an element in table\r
3847 scope with the same tag name as the token, this is a parse error.\r
3848 Ignore the token. */\r
3849 if (!$this->elementInScope($token['name'], true)) {\r
3850 // Ignore.\r
3851\r
3852 /* Otherwise: */\r
3853 } else {\r
3854 /* Otherwise, act as if an end tag with the tag name "tr" had\r
3855 been seen, then reprocess the current token. */\r
3856 $this->inRow(\r
3857 array(\r
3858 'name' => 'tr',\r
3859 'type' => HTML5::ENDTAG\r
3860 )\r
3861 );\r
3862\r
3863 return $this->inCell($token);\r
3864 }\r
3865\r
3866 /* An end tag whose tag name is one of: "body", "caption", "col",\r
3867 "colgroup", "html", "td", "th" */\r
3868 } elseif ($token['type'] === HTML5::ENDTAG && in_array(\r
3869 $token['name'],\r
3870 array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr')\r
3871 )\r
3872 ) {\r
3873 /* Parse error. Ignore the token. */\r
3874\r
3875 /* Anything else */\r
3876 } else {\r
3877 /* Process the token as if the insertion mode was "in table". */\r
3878 $this->inTable($token);\r
3879 }\r
3880 }\r
3881\r
3882 private function inCell($token)\r
3883 {\r
3884 /* An end tag whose tag name is one of: "td", "th" */\r
3885 if ($token['type'] === HTML5::ENDTAG &&\r
3886 ($token['name'] === 'td' || $token['name'] === 'th')\r
3887 ) {\r
3888 /* If the stack of open elements does not have an element in table\r
3889 scope with the same tag name as that of the token, then this is a\r
3890 parse error and the token must be ignored. */\r
3891 if (!$this->elementInScope($token['name'], true)) {\r
3892 // Ignore.\r
3893\r
3894 /* Otherwise: */\r
3895 } else {\r
3896 /* Generate implied end tags, except for elements with the same\r
3897 tag name as the token. */\r
3898 $this->generateImpliedEndTags(array($token['name']));\r
3899\r
3900 /* Now, if the current node is not an element with the same tag\r
3901 name as the token, then this is a parse error. */\r
3902 // k\r
3903\r
3904 /* Pop elements from this stack until an element with the same\r
3905 tag name as the token has been popped from the stack. */\r
3906 while (true) {\r
3907 $node = end($this->stack)->nodeName;\r
3908 array_pop($this->stack);\r
3909\r
3910 if ($node === $token['name']) {\r
3911 break;\r
3912 }\r
3913 }\r
3914\r
3915 /* Clear the list of active formatting elements up to the last\r
3916 marker. */\r
3917 $this->clearTheActiveFormattingElementsUpToTheLastMarker();\r
3918\r
3919 /* Switch the insertion mode to "in row". (The current node\r
3920 will be a tr element at this point.) */\r
3921 $this->mode = self::IN_ROW;\r
3922 }\r
3923\r
3924 /* A start tag whose tag name is one of: "caption", "col", "colgroup",\r
3925 "tbody", "td", "tfoot", "th", "thead", "tr" */\r
3926 } elseif ($token['type'] === HTML5::STARTTAG && in_array(\r
3927 $token['name'],\r
3928 array(\r
3929 'caption',\r
3930 'col',\r
3931 'colgroup',\r
3932 'tbody',\r
3933 'td',\r
3934 'tfoot',\r
3935 'th',\r
3936 'thead',\r
3937 'tr'\r
3938 )\r
3939 )\r
3940 ) {\r
3941 /* If the stack of open elements does not have a td or th element\r
3942 in table scope, then this is a parse error; ignore the token.\r
3943 (innerHTML case) */\r
3944 if (!$this->elementInScope(array('td', 'th'), true)) {\r
3945 // Ignore.\r
3946\r
3947 /* Otherwise, close the cell (see below) and reprocess the current\r
3948 token. */\r
3949 } else {\r
3950 $this->closeCell();\r
3951 return $this->inRow($token);\r
3952 }\r
3953\r
3954 /* A start tag whose tag name is one of: "caption", "col", "colgroup",\r
3955 "tbody", "td", "tfoot", "th", "thead", "tr" */\r
3956 } elseif ($token['type'] === HTML5::STARTTAG && in_array(\r
3957 $token['name'],\r
3958 array(\r
3959 'caption',\r
3960 'col',\r
3961 'colgroup',\r
3962 'tbody',\r
3963 'td',\r
3964 'tfoot',\r
3965 'th',\r
3966 'thead',\r
3967 'tr'\r
3968 )\r
3969 )\r
3970 ) {\r
3971 /* If the stack of open elements does not have a td or th element\r
3972 in table scope, then this is a parse error; ignore the token.\r
3973 (innerHTML case) */\r
3974 if (!$this->elementInScope(array('td', 'th'), true)) {\r
3975 // Ignore.\r
3976\r
3977 /* Otherwise, close the cell (see below) and reprocess the current\r
3978 token. */\r
3979 } else {\r
3980 $this->closeCell();\r
3981 return $this->inRow($token);\r
3982 }\r
3983\r
3984 /* An end tag whose tag name is one of: "body", "caption", "col",\r
3985 "colgroup", "html" */\r
3986 } elseif ($token['type'] === HTML5::ENDTAG && in_array(\r
3987 $token['name'],\r
3988 array('body', 'caption', 'col', 'colgroup', 'html')\r
3989 )\r
3990 ) {\r
3991 /* Parse error. Ignore the token. */\r
3992\r
3993 /* An end tag whose tag name is one of: "table", "tbody", "tfoot",\r
3994 "thead", "tr" */\r
3995 } elseif ($token['type'] === HTML5::ENDTAG && in_array(\r
3996 $token['name'],\r
3997 array('table', 'tbody', 'tfoot', 'thead', 'tr')\r
3998 )\r
3999 ) {\r
4000 /* If the stack of open elements does not have an element in table\r
4001 scope with the same tag name as that of the token (which can only\r
4002 happen for "tbody", "tfoot" and "thead", or, in the innerHTML case),\r
4003 then this is a parse error and the token must be ignored. */\r
4004 if (!$this->elementInScope($token['name'], true)) {\r
4005 // Ignore.\r
4006\r
4007 /* Otherwise, close the cell (see below) and reprocess the current\r
4008 token. */\r
4009 } else {\r
4010 $this->closeCell();\r
4011 return $this->inRow($token);\r
4012 }\r
4013\r
4014 /* Anything else */\r
4015 } else {\r
4016 /* Process the token as if the insertion mode was "in body". */\r
4017 $this->inBody($token);\r
4018 }\r
4019 }\r
4020\r
4021 private function inSelect($token)\r
4022 {\r
4023 /* Handle the token as follows: */\r
4024\r
4025 /* A character token */\r
4026 if ($token['type'] === HTML5::CHARACTR) {\r
4027 /* Append the token's character to the current node. */\r
4028 $this->insertText($token['data']);\r
4029\r
4030 /* A comment token */\r
4031 } elseif ($token['type'] === HTML5::COMMENT) {\r
4032 /* Append a Comment node to the current node with the data\r
4033 attribute set to the data given in the comment token. */\r
4034 $this->insertComment($token['data']);\r
4035\r
4036 /* A start tag token whose tag name is "option" */\r
4037 } elseif ($token['type'] === HTML5::STARTTAG &&\r
4038 $token['name'] === 'option'\r
4039 ) {\r
4040 /* If the current node is an option element, act as if an end tag\r
4041 with the tag name "option" had been seen. */\r
4042 if (end($this->stack)->nodeName === 'option') {\r
4043 $this->inSelect(\r
4044 array(\r
4045 'name' => 'option',\r
4046 'type' => HTML5::ENDTAG\r
4047 )\r
4048 );\r
4049 }\r
4050\r
4051 /* Insert an HTML element for the token. */\r
4052 $this->insertElement($token);\r
4053\r
4054 /* A start tag token whose tag name is "optgroup" */\r
4055 } elseif ($token['type'] === HTML5::STARTTAG &&\r
4056 $token['name'] === 'optgroup'\r
4057 ) {\r
4058 /* If the current node is an option element, act as if an end tag\r
4059 with the tag name "option" had been seen. */\r
4060 if (end($this->stack)->nodeName === 'option') {\r
4061 $this->inSelect(\r
4062 array(\r
4063 'name' => 'option',\r
4064 'type' => HTML5::ENDTAG\r
4065 )\r
4066 );\r
4067 }\r
4068\r
4069 /* If the current node is an optgroup element, act as if an end tag\r
4070 with the tag name "optgroup" had been seen. */\r
4071 if (end($this->stack)->nodeName === 'optgroup') {\r
4072 $this->inSelect(\r
4073 array(\r
4074 'name' => 'optgroup',\r
4075 'type' => HTML5::ENDTAG\r
4076 )\r
4077 );\r
4078 }\r
4079\r
4080 /* Insert an HTML element for the token. */\r
4081 $this->insertElement($token);\r
4082\r
4083 /* An end tag token whose tag name is "optgroup" */\r
4084 } elseif ($token['type'] === HTML5::ENDTAG &&\r
4085 $token['name'] === 'optgroup'\r
4086 ) {\r
4087 /* First, if the current node is an option element, and the node\r
4088 immediately before it in the stack of open elements is an optgroup\r
4089 element, then act as if an end tag with the tag name "option" had\r
4090 been seen. */\r
4091 $elements_in_stack = count($this->stack);\r
4092\r
4093 if ($this->stack[$elements_in_stack - 1]->nodeName === 'option' &&\r
4094 $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup'\r
4095 ) {\r
4096 $this->inSelect(\r
4097 array(\r
4098 'name' => 'option',\r
4099 'type' => HTML5::ENDTAG\r
4100 )\r
4101 );\r
4102 }\r
4103\r
4104 /* If the current node is an optgroup element, then pop that node\r
4105 from the stack of open elements. Otherwise, this is a parse error,\r
4106 ignore the token. */\r
4107 if ($this->stack[$elements_in_stack - 1] === 'optgroup') {\r
4108 array_pop($this->stack);\r
4109 }\r
4110\r
4111 /* An end tag token whose tag name is "option" */\r
4112 } elseif ($token['type'] === HTML5::ENDTAG &&\r
4113 $token['name'] === 'option'\r
4114 ) {\r
4115 /* If the current node is an option element, then pop that node\r
4116 from the stack of open elements. Otherwise, this is a parse error,\r
4117 ignore the token. */\r
4118 if (end($this->stack)->nodeName === 'option') {\r
4119 array_pop($this->stack);\r
4120 }\r
4121\r
4122 /* An end tag whose tag name is "select" */\r
4123 } elseif ($token['type'] === HTML5::ENDTAG &&\r
4124 $token['name'] === 'select'\r
4125 ) {\r
4126 /* If the stack of open elements does not have an element in table\r
4127 scope with the same tag name as the token, this is a parse error.\r
4128 Ignore the token. (innerHTML case) */\r
4129 if (!$this->elementInScope($token['name'], true)) {\r
4130 // w/e\r
4131\r
4132 /* Otherwise: */\r
4133 } else {\r
4134 /* Pop elements from the stack of open elements until a select\r
4135 element has been popped from the stack. */\r
4136 while (true) {\r
4137 $current = end($this->stack)->nodeName;\r
4138 array_pop($this->stack);\r
4139\r
4140 if ($current === 'select') {\r
4141 break;\r
4142 }\r
4143 }\r
4144\r
4145 /* Reset the insertion mode appropriately. */\r
4146 $this->resetInsertionMode();\r
4147 }\r
4148\r
4149 /* A start tag whose tag name is "select" */\r
4150 } elseif ($token['name'] === 'select' &&\r
4151 $token['type'] === HTML5::STARTTAG\r
4152 ) {\r
4153 /* Parse error. Act as if the token had been an end tag with the\r
4154 tag name "select" instead. */\r
4155 $this->inSelect(\r
4156 array(\r
4157 'name' => 'select',\r
4158 'type' => HTML5::ENDTAG\r
4159 )\r
4160 );\r
4161\r
4162 /* An end tag whose tag name is one of: "caption", "table", "tbody",\r
4163 "tfoot", "thead", "tr", "td", "th" */\r
4164 } elseif (in_array(\r
4165 $token['name'],\r
4166 array(\r
4167 'caption',\r
4168 'table',\r
4169 'tbody',\r
4170 'tfoot',\r
4171 'thead',\r
4172 'tr',\r
4173 'td',\r
4174 'th'\r
4175 )\r
4176 ) && $token['type'] === HTML5::ENDTAG\r
4177 ) {\r
4178 /* Parse error. */\r
4179 // w/e\r
4180\r
4181 /* If the stack of open elements has an element in table scope with\r
4182 the same tag name as that of the token, then act as if an end tag\r
4183 with the tag name "select" had been seen, and reprocess the token.\r
4184 Otherwise, ignore the token. */\r
4185 if ($this->elementInScope($token['name'], true)) {\r
4186 $this->inSelect(\r
4187 array(\r
4188 'name' => 'select',\r
4189 'type' => HTML5::ENDTAG\r
4190 )\r
4191 );\r
4192\r
4193 $this->mainPhase($token);\r
4194 }\r
4195\r
4196 /* Anything else */\r
4197 } else {\r
4198 /* Parse error. Ignore the token. */\r
4199 }\r
4200 }\r
4201\r
4202 private function afterBody($token)\r
4203 {\r
4204 /* Handle the token as follows: */\r
4205\r
4206 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
4207 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
4208 or U+0020 SPACE */\r
4209 if ($token['type'] === HTML5::CHARACTR &&\r
4210 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
4211 ) {\r
4212 /* Process the token as it would be processed if the insertion mode\r
4213 was "in body". */\r
4214 $this->inBody($token);\r
4215\r
4216 /* A comment token */\r
4217 } elseif ($token['type'] === HTML5::COMMENT) {\r
4218 /* Append a Comment node to the first element in the stack of open\r
4219 elements (the html element), with the data attribute set to the\r
4220 data given in the comment token. */\r
4221 $comment = $this->dom->createComment($token['data']);\r
4222 $this->stack[0]->appendChild($comment);\r
4223\r
4224 /* An end tag with the tag name "html" */\r
4225 } elseif ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') {\r
4226 /* If the parser was originally created in order to handle the\r
4227 setting of an element's innerHTML attribute, this is a parse error;\r
4228 ignore the token. (The element will be an html element in this\r
4229 case.) (innerHTML case) */\r
4230\r
4231 /* Otherwise, switch to the trailing end phase. */\r
4232 $this->phase = self::END_PHASE;\r
4233\r
4234 /* Anything else */\r
4235 } else {\r
4236 /* Parse error. Set the insertion mode to "in body" and reprocess\r
4237 the token. */\r
4238 $this->mode = self::IN_BODY;\r
4239 return $this->inBody($token);\r
4240 }\r
4241 }\r
4242\r
4243 private function inFrameset($token)\r
4244 {\r
4245 /* Handle the token as follows: */\r
4246\r
4247 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
4248 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
4249 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */\r
4250 if ($token['type'] === HTML5::CHARACTR &&\r
4251 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
4252 ) {\r
4253 /* Append the character to the current node. */\r
4254 $this->insertText($token['data']);\r
4255\r
4256 /* A comment token */\r
4257 } elseif ($token['type'] === HTML5::COMMENT) {\r
4258 /* Append a Comment node to the current node with the data\r
4259 attribute set to the data given in the comment token. */\r
4260 $this->insertComment($token['data']);\r
4261\r
4262 /* A start tag with the tag name "frameset" */\r
4263 } elseif ($token['name'] === 'frameset' &&\r
4264 $token['type'] === HTML5::STARTTAG\r
4265 ) {\r
4266 $this->insertElement($token);\r
4267\r
4268 /* An end tag with the tag name "frameset" */\r
4269 } elseif ($token['name'] === 'frameset' &&\r
4270 $token['type'] === HTML5::ENDTAG\r
4271 ) {\r
4272 /* If the current node is the root html element, then this is a\r
4273 parse error; ignore the token. (innerHTML case) */\r
4274 if (end($this->stack)->nodeName === 'html') {\r
4275 // Ignore\r
4276\r
4277 } else {\r
4278 /* Otherwise, pop the current node from the stack of open\r
4279 elements. */\r
4280 array_pop($this->stack);\r
4281\r
4282 /* If the parser was not originally created in order to handle\r
4283 the setting of an element's innerHTML attribute (innerHTML case),\r
4284 and the current node is no longer a frameset element, then change\r
4285 the insertion mode to "after frameset". */\r
4286 $this->mode = self::AFTR_FRAME;\r
4287 }\r
4288\r
4289 /* A start tag with the tag name "frame" */\r
4290 } elseif ($token['name'] === 'frame' &&\r
4291 $token['type'] === HTML5::STARTTAG\r
4292 ) {\r
4293 /* Insert an HTML element for the token. */\r
4294 $this->insertElement($token);\r
4295\r
4296 /* Immediately pop the current node off the stack of open elements. */\r
4297 array_pop($this->stack);\r
4298\r
4299 /* A start tag with the tag name "noframes" */\r
4300 } elseif ($token['name'] === 'noframes' &&\r
4301 $token['type'] === HTML5::STARTTAG\r
4302 ) {\r
4303 /* Process the token as if the insertion mode had been "in body". */\r
4304 $this->inBody($token);\r
4305\r
4306 /* Anything else */\r
4307 } else {\r
4308 /* Parse error. Ignore the token. */\r
4309 }\r
4310 }\r
4311\r
4312 private function afterFrameset($token)\r
4313 {\r
4314 /* Handle the token as follows: */\r
4315\r
4316 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
4317 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
4318 U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */\r
4319 if ($token['type'] === HTML5::CHARACTR &&\r
4320 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
4321 ) {\r
4322 /* Append the character to the current node. */\r
4323 $this->insertText($token['data']);\r
4324\r
4325 /* A comment token */\r
4326 } elseif ($token['type'] === HTML5::COMMENT) {\r
4327 /* Append a Comment node to the current node with the data\r
4328 attribute set to the data given in the comment token. */\r
4329 $this->insertComment($token['data']);\r
4330\r
4331 /* An end tag with the tag name "html" */\r
4332 } elseif ($token['name'] === 'html' &&\r
4333 $token['type'] === HTML5::ENDTAG\r
4334 ) {\r
4335 /* Switch to the trailing end phase. */\r
4336 $this->phase = self::END_PHASE;\r
4337\r
4338 /* A start tag with the tag name "noframes" */\r
4339 } elseif ($token['name'] === 'noframes' &&\r
4340 $token['type'] === HTML5::STARTTAG\r
4341 ) {\r
4342 /* Process the token as if the insertion mode had been "in body". */\r
4343 $this->inBody($token);\r
4344\r
4345 /* Anything else */\r
4346 } else {\r
4347 /* Parse error. Ignore the token. */\r
4348 }\r
4349 }\r
4350\r
4351 private function trailingEndPhase($token)\r
4352 {\r
4353 /* After the main phase, as each token is emitted from the tokenisation\r
4354 stage, it must be processed as described in this section. */\r
4355\r
4356 /* A DOCTYPE token */\r
4357 if ($token['type'] === HTML5::DOCTYPE) {\r
4358 // Parse error. Ignore the token.\r
4359\r
4360 /* A comment token */\r
4361 } elseif ($token['type'] === HTML5::COMMENT) {\r
4362 /* Append a Comment node to the Document object with the data\r
4363 attribute set to the data given in the comment token. */\r
4364 $comment = $this->dom->createComment($token['data']);\r
4365 $this->dom->appendChild($comment);\r
4366\r
4367 /* A character token that is one of one of U+0009 CHARACTER TABULATION,\r
4368 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
4369 or U+0020 SPACE */\r
4370 } elseif ($token['type'] === HTML5::CHARACTR &&\r
4371 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])\r
4372 ) {\r
4373 /* Process the token as it would be processed in the main phase. */\r
4374 $this->mainPhase($token);\r
4375\r
4376 /* A character token that is not one of U+0009 CHARACTER TABULATION,\r
4377 U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),\r
4378 or U+0020 SPACE. Or a start tag token. Or an end tag token. */\r
4379 } elseif (($token['type'] === HTML5::CHARACTR &&\r
4380 preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) ||\r
4381 $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG\r
4382 ) {\r
4383 /* Parse error. Switch back to the main phase and reprocess the\r
4384 token. */\r
4385 $this->phase = self::MAIN_PHASE;\r
4386 return $this->mainPhase($token);\r
4387\r
4388 /* An end-of-file token */\r
4389 } elseif ($token['type'] === HTML5::EOF) {\r
4390 /* OMG DONE!! */\r
4391 }\r
4392 }\r
4393\r
4394 private function insertElement($token, $append = true, $check = false)\r
4395 {\r
4396 // Proprietary workaround for libxml2's limitations with tag names\r
4397 if ($check) {\r
4398 // Slightly modified HTML5 tag-name modification,\r
4399 // removing anything that's not an ASCII letter, digit, or hyphen\r
4400 $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']);\r
4401 // Remove leading hyphens and numbers\r
4402 $token['name'] = ltrim($token['name'], '-0..9');\r
4403 // In theory, this should ever be needed, but just in case\r
4404 if ($token['name'] === '') {\r
4405 $token['name'] = 'span';\r
4406 } // arbitrary generic choice\r
4407 }\r
4408\r
4409 $el = $this->dom->createElement($token['name']);\r
4410\r
4411 foreach ($token['attr'] as $attr) {\r
4412 if (!$el->hasAttribute($attr['name'])) {\r
4413 $el->setAttribute($attr['name'], $attr['value']);\r
4414 }\r
4415 }\r
4416\r
4417 $this->appendToRealParent($el);\r
4418 $this->stack[] = $el;\r
4419\r
4420 return $el;\r
4421 }\r
4422\r
4423 private function insertText($data)\r
4424 {\r
4425 $text = $this->dom->createTextNode($data);\r
4426 $this->appendToRealParent($text);\r
4427 }\r
4428\r
4429 private function insertComment($data)\r
4430 {\r
4431 $comment = $this->dom->createComment($data);\r
4432 $this->appendToRealParent($comment);\r
4433 }\r
4434\r
4435 private function appendToRealParent($node)\r
4436 {\r
4437 if ($this->foster_parent === null) {\r
4438 end($this->stack)->appendChild($node);\r
4439\r
4440 } elseif ($this->foster_parent !== null) {\r
4441 /* If the foster parent element is the parent element of the\r
4442 last table element in the stack of open elements, then the new\r
4443 node must be inserted immediately before the last table element\r
4444 in the stack of open elements in the foster parent element;\r
4445 otherwise, the new node must be appended to the foster parent\r
4446 element. */\r
4447 for ($n = count($this->stack) - 1; $n >= 0; $n--) {\r
4448 if ($this->stack[$n]->nodeName === 'table' &&\r
4449 $this->stack[$n]->parentNode !== null\r
4450 ) {\r
4451 $table = $this->stack[$n];\r
4452 break;\r
4453 }\r
4454 }\r
4455\r
4456 if (isset($table) && $this->foster_parent->isSameNode($table->parentNode)) {\r
4457 $this->foster_parent->insertBefore($node, $table);\r
4458 } else {\r
4459 $this->foster_parent->appendChild($node);\r
4460 }\r
4461\r
4462 $this->foster_parent = null;\r
4463 }\r
4464 }\r
4465\r
4466 private function elementInScope($el, $table = false)\r
4467 {\r
4468 if (is_array($el)) {\r
4469 foreach ($el as $element) {\r
4470 if ($this->elementInScope($element, $table)) {\r
4471 return true;\r
4472 }\r
4473 }\r
4474\r
4475 return false;\r
4476 }\r
4477\r
4478 $leng = count($this->stack);\r
4479\r
4480 for ($n = 0; $n < $leng; $n++) {\r
4481 /* 1. Initialise node to be the current node (the bottommost node of\r
4482 the stack). */\r
4483 $node = $this->stack[$leng - 1 - $n];\r
4484\r
4485 if ($node->tagName === $el) {\r
4486 /* 2. If node is the target node, terminate in a match state. */\r
4487 return true;\r
4488\r
4489 } elseif ($node->tagName === 'table') {\r
4490 /* 3. Otherwise, if node is a table element, terminate in a failure\r
4491 state. */\r
4492 return false;\r
4493\r
4494 } elseif ($table === true && in_array(\r
4495 $node->tagName,\r
4496 array(\r
4497 'caption',\r
4498 'td',\r
4499 'th',\r
4500 'button',\r
4501 'marquee',\r
4502 'object'\r
4503 )\r
4504 )\r
4505 ) {\r
4506 /* 4. Otherwise, if the algorithm is the "has an element in scope"\r
4507 variant (rather than the "has an element in table scope" variant),\r
4508 and node is one of the following, terminate in a failure state. */\r
4509 return false;\r
4510\r
4511 } elseif ($node === $node->ownerDocument->documentElement) {\r
4512 /* 5. Otherwise, if node is an html element (root element), terminate\r
4513 in a failure state. (This can only happen if the node is the topmost\r
4514 node of the stack of open elements, and prevents the next step from\r
4515 being invoked if there are no more elements in the stack.) */\r
4516 return false;\r
4517 }\r
4518\r
4519 /* Otherwise, set node to the previous entry in the stack of open\r
4520 elements and return to step 2. (This will never fail, since the loop\r
4521 will always terminate in the previous step if the top of the stack\r
4522 is reached.) */\r
4523 }\r
4524 }\r
4525\r
4526 private function reconstructActiveFormattingElements()\r
4527 {\r
4528 /* 1. If there are no entries in the list of active formatting elements,\r
4529 then there is nothing to reconstruct; stop this algorithm. */\r
4530 $formatting_elements = count($this->a_formatting);\r
4531\r
4532 if ($formatting_elements === 0) {\r
4533 return false;\r
4534 }\r
4535\r
4536 /* 3. Let entry be the last (most recently added) element in the list\r
4537 of active formatting elements. */\r
4538 $entry = end($this->a_formatting);\r
4539\r
4540 /* 2. If the last (most recently added) entry in the list of active\r
4541 formatting elements is a marker, or if it is an element that is in the\r
4542 stack of open elements, then there is nothing to reconstruct; stop this\r
4543 algorithm. */\r
4544 if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {\r
4545 return false;\r
4546 }\r
4547\r
4548 for ($a = $formatting_elements - 1; $a >= 0; true) {\r
4549 /* 4. If there are no entries before entry in the list of active\r
4550 formatting elements, then jump to step 8. */\r
4551 if ($a === 0) {\r
4552 $step_seven = false;\r
4553 break;\r
4554 }\r
4555\r
4556 /* 5. Let entry be the entry one earlier than entry in the list of\r
4557 active formatting elements. */\r
4558 $a--;\r
4559 $entry = $this->a_formatting[$a];\r
4560\r
4561 /* 6. If entry is neither a marker nor an element that is also in\r
4562 thetack of open elements, go to step 4. */\r
4563 if ($entry === self::MARKER || in_array($entry, $this->stack, true)) {\r
4564 break;\r
4565 }\r
4566 }\r
4567\r
4568 while (true) {\r
4569 /* 7. Let entry be the element one later than entry in the list of\r
4570 active formatting elements. */\r
4571 if (isset($step_seven) && $step_seven === true) {\r
4572 $a++;\r
4573 $entry = $this->a_formatting[$a];\r
4574 }\r
4575\r
4576 /* 8. Perform a shallow clone of the element entry to obtain clone. */\r
4577 $clone = $entry->cloneNode();\r
4578\r
4579 /* 9. Append clone to the current node and push it onto the stack\r
4580 of open elements so that it is the new current node. */\r
4581 end($this->stack)->appendChild($clone);\r
4582 $this->stack[] = $clone;\r
4583\r
4584 /* 10. Replace the entry for entry in the list with an entry for\r
4585 clone. */\r
4586 $this->a_formatting[$a] = $clone;\r
4587\r
4588 /* 11. If the entry for clone in the list of active formatting\r
4589 elements is not the last entry in the list, return to step 7. */\r
4590 if (end($this->a_formatting) !== $clone) {\r
4591 $step_seven = true;\r
4592 } else {\r
4593 break;\r
4594 }\r
4595 }\r
4596 }\r
4597\r
4598 private function clearTheActiveFormattingElementsUpToTheLastMarker()\r
4599 {\r
4600 /* When the steps below require the UA to clear the list of active\r
4601 formatting elements up to the last marker, the UA must perform the\r
4602 following steps: */\r
4603\r
4604 while (true) {\r
4605 /* 1. Let entry be the last (most recently added) entry in the list\r
4606 of active formatting elements. */\r
4607 $entry = end($this->a_formatting);\r
4608\r
4609 /* 2. Remove entry from the list of active formatting elements. */\r
4610 array_pop($this->a_formatting);\r
4611\r
4612 /* 3. If entry was a marker, then stop the algorithm at this point.\r
4613 The list has been cleared up to the last marker. */\r
4614 if ($entry === self::MARKER) {\r
4615 break;\r
4616 }\r
4617 }\r
4618 }\r
4619\r
4620 private function generateImpliedEndTags($exclude = array())\r
4621 {\r
4622 /* When the steps below require the UA to generate implied end tags,\r
4623 then, if the current node is a dd element, a dt element, an li element,\r
4624 a p element, a td element, a th element, or a tr element, the UA must\r
4625 act as if an end tag with the respective tag name had been seen and\r
4626 then generate implied end tags again. */\r
4627 $node = end($this->stack);\r
4628 $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude);\r
4629\r
4630 while (in_array(end($this->stack)->nodeName, $elements)) {\r
4631 array_pop($this->stack);\r
4632 }\r
4633 }\r
4634\r
4635 private function getElementCategory($node)\r
4636 {\r
4637 $name = $node->tagName;\r
4638 if (in_array($name, $this->special)) {\r
4639 return self::SPECIAL;\r
4640 } elseif (in_array($name, $this->scoping)) {\r
4641 return self::SCOPING;\r
4642 } elseif (in_array($name, $this->formatting)) {\r
4643 return self::FORMATTING;\r
4644 } else {\r
4645 return self::PHRASING;\r
4646 }\r
4647 }\r
4648\r
4649 private function clearStackToTableContext($elements)\r
4650 {\r
4651 /* When the steps above require the UA to clear the stack back to a\r
4652 table context, it means that the UA must, while the current node is not\r
4653 a table element or an html element, pop elements from the stack of open\r
4654 elements. If this causes any elements to be popped from the stack, then\r
4655 this is a parse error. */\r
4656 while (true) {\r
4657 $node = end($this->stack)->nodeName;\r
4658\r
4659 if (in_array($node, $elements)) {\r
4660 break;\r
4661 } else {\r
4662 array_pop($this->stack);\r
4663 }\r
4664 }\r
4665 }\r
4666\r
4667 private function resetInsertionMode()\r
4668 {\r
4669 /* 1. Let last be false. */\r
4670 $last = false;\r
4671 $leng = count($this->stack);\r
4672\r
4673 for ($n = $leng - 1; $n >= 0; $n--) {\r
4674 /* 2. Let node be the last node in the stack of open elements. */\r
4675 $node = $this->stack[$n];\r
4676\r
4677 /* 3. If node is the first node in the stack of open elements, then\r
4678 set last to true. If the element whose innerHTML attribute is being\r
4679 set is neither a td element nor a th element, then set node to the\r
4680 element whose innerHTML attribute is being set. (innerHTML case) */\r
4681 if ($this->stack[0]->isSameNode($node)) {\r
4682 $last = true;\r
4683 }\r
4684\r
4685 /* 4. If node is a select element, then switch the insertion mode to\r
4686 "in select" and abort these steps. (innerHTML case) */\r
4687 if ($node->nodeName === 'select') {\r
4688 $this->mode = self::IN_SELECT;\r
4689 break;\r
4690\r
4691 /* 5. If node is a td or th element, then switch the insertion mode\r
4692 to "in cell" and abort these steps. */\r
4693 } elseif ($node->nodeName === 'td' || $node->nodeName === 'th') {\r
4694 $this->mode = self::IN_CELL;\r
4695 break;\r
4696\r
4697 /* 6. If node is a tr element, then switch the insertion mode to\r
4698 "in row" and abort these steps. */\r
4699 } elseif ($node->nodeName === 'tr') {\r
4700 $this->mode = self::IN_ROW;\r
4701 break;\r
4702\r
4703 /* 7. If node is a tbody, thead, or tfoot element, then switch the\r
4704 insertion mode to "in table body" and abort these steps. */\r
4705 } elseif (in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) {\r
4706 $this->mode = self::IN_TBODY;\r
4707 break;\r
4708\r
4709 /* 8. If node is a caption element, then switch the insertion mode\r
4710 to "in caption" and abort these steps. */\r
4711 } elseif ($node->nodeName === 'caption') {\r
4712 $this->mode = self::IN_CAPTION;\r
4713 break;\r
4714\r
4715 /* 9. If node is a colgroup element, then switch the insertion mode\r
4716 to "in column group" and abort these steps. (innerHTML case) */\r
4717 } elseif ($node->nodeName === 'colgroup') {\r
4718 $this->mode = self::IN_CGROUP;\r
4719 break;\r
4720\r
4721 /* 10. If node is a table element, then switch the insertion mode\r
4722 to "in table" and abort these steps. */\r
4723 } elseif ($node->nodeName === 'table') {\r
4724 $this->mode = self::IN_TABLE;\r
4725 break;\r
4726\r
4727 /* 11. If node is a head element, then switch the insertion mode\r
4728 to "in body" ("in body"! not "in head"!) and abort these steps.\r
4729 (innerHTML case) */\r
4730 } elseif ($node->nodeName === 'head') {\r
4731 $this->mode = self::IN_BODY;\r
4732 break;\r
4733\r
4734 /* 12. If node is a body element, then switch the insertion mode to\r
4735 "in body" and abort these steps. */\r
4736 } elseif ($node->nodeName === 'body') {\r
4737 $this->mode = self::IN_BODY;\r
4738 break;\r
4739\r
4740 /* 13. If node is a frameset element, then switch the insertion\r
4741 mode to "in frameset" and abort these steps. (innerHTML case) */\r
4742 } elseif ($node->nodeName === 'frameset') {\r
4743 $this->mode = self::IN_FRAME;\r
4744 break;\r
4745\r
4746 /* 14. If node is an html element, then: if the head element\r
4747 pointer is null, switch the insertion mode to "before head",\r
4748 otherwise, switch the insertion mode to "after head". In either\r
4749 case, abort these steps. (innerHTML case) */\r
4750 } elseif ($node->nodeName === 'html') {\r
4751 $this->mode = ($this->head_pointer === null)\r
4752 ? self::BEFOR_HEAD\r
4753 : self::AFTER_HEAD;\r
4754\r
4755 break;\r
4756\r
4757 /* 15. If last is true, then set the insertion mode to "in body"\r
4758 and abort these steps. (innerHTML case) */\r
4759 } elseif ($last) {\r
4760 $this->mode = self::IN_BODY;\r
4761 break;\r
4762 }\r
4763 }\r
4764 }\r
4765\r
4766 private function closeCell()\r
4767 {\r
4768 /* If the stack of open elements has a td or th element in table scope,\r
4769 then act as if an end tag token with that tag name had been seen. */\r
4770 foreach (array('td', 'th') as $cell) {\r
4771 if ($this->elementInScope($cell, true)) {\r
4772 $this->inCell(\r
4773 array(\r
4774 'name' => $cell,\r
4775 'type' => HTML5::ENDTAG\r
4776 )\r
4777 );\r
4778\r
4779 break;\r
4780 }\r
4781 }\r
4782 }\r
4783\r
4784 public function save()\r
4785 {\r
4786 return $this->dom;\r
4787 }\r
4788}\r