]>
Commit | Line | Data |
---|---|---|
d4949327 NL |
1 | <?php\r |
2 | \r | |
3 | /**\r | |
4 | * Removes all unrecognized tags from the list of tokens.\r | |
5 | *\r | |
6 | * This strategy iterates through all the tokens and removes unrecognized\r | |
7 | * tokens. If a token is not recognized but a TagTransform is defined for\r | |
8 | * that element, the element will be transformed accordingly.\r | |
9 | */\r | |
10 | \r | |
11 | class HTMLPurifier_Strategy_RemoveForeignElements extends HTMLPurifier_Strategy\r | |
12 | {\r | |
13 | \r | |
14 | /**\r | |
15 | * @param HTMLPurifier_Token[] $tokens\r | |
16 | * @param HTMLPurifier_Config $config\r | |
17 | * @param HTMLPurifier_Context $context\r | |
18 | * @return array|HTMLPurifier_Token[]\r | |
19 | */\r | |
20 | public function execute($tokens, $config, $context)\r | |
21 | {\r | |
22 | $definition = $config->getHTMLDefinition();\r | |
23 | $generator = new HTMLPurifier_Generator($config, $context);\r | |
24 | $result = array();\r | |
25 | \r | |
26 | $escape_invalid_tags = $config->get('Core.EscapeInvalidTags');\r | |
27 | $remove_invalid_img = $config->get('Core.RemoveInvalidImg');\r | |
28 | \r | |
29 | // currently only used to determine if comments should be kept\r | |
30 | $trusted = $config->get('HTML.Trusted');\r | |
31 | $comment_lookup = $config->get('HTML.AllowedComments');\r | |
32 | $comment_regexp = $config->get('HTML.AllowedCommentsRegexp');\r | |
33 | $check_comments = $comment_lookup !== array() || $comment_regexp !== null;\r | |
34 | \r | |
35 | $remove_script_contents = $config->get('Core.RemoveScriptContents');\r | |
36 | $hidden_elements = $config->get('Core.HiddenElements');\r | |
37 | \r | |
38 | // remove script contents compatibility\r | |
39 | if ($remove_script_contents === true) {\r | |
40 | $hidden_elements['script'] = true;\r | |
41 | } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) {\r | |
42 | unset($hidden_elements['script']);\r | |
43 | }\r | |
44 | \r | |
45 | $attr_validator = new HTMLPurifier_AttrValidator();\r | |
46 | \r | |
47 | // removes tokens until it reaches a closing tag with its value\r | |
48 | $remove_until = false;\r | |
49 | \r | |
50 | // converts comments into text tokens when this is equal to a tag name\r | |
51 | $textify_comments = false;\r | |
52 | \r | |
53 | $token = false;\r | |
54 | $context->register('CurrentToken', $token);\r | |
55 | \r | |
56 | $e = false;\r | |
57 | if ($config->get('Core.CollectErrors')) {\r | |
58 | $e =& $context->get('ErrorCollector');\r | |
59 | }\r | |
60 | \r | |
61 | foreach ($tokens as $token) {\r | |
62 | if ($remove_until) {\r | |
63 | if (empty($token->is_tag) || $token->name !== $remove_until) {\r | |
64 | continue;\r | |
65 | }\r | |
66 | }\r | |
67 | if (!empty($token->is_tag)) {\r | |
68 | // DEFINITION CALL\r | |
69 | \r | |
70 | // before any processing, try to transform the element\r | |
71 | if (isset($definition->info_tag_transform[$token->name])) {\r | |
72 | $original_name = $token->name;\r | |
73 | // there is a transformation for this tag\r | |
74 | // DEFINITION CALL\r | |
75 | $token = $definition->\r | |
76 | info_tag_transform[$token->name]->transform($token, $config, $context);\r | |
77 | if ($e) {\r | |
78 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name);\r | |
79 | }\r | |
80 | }\r | |
81 | \r | |
82 | if (isset($definition->info[$token->name])) {\r | |
83 | // mostly everything's good, but\r | |
84 | // we need to make sure required attributes are in order\r | |
85 | if (($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) &&\r | |
86 | $definition->info[$token->name]->required_attr &&\r | |
87 | ($token->name != 'img' || $remove_invalid_img) // ensure config option still works\r | |
88 | ) {\r | |
89 | $attr_validator->validateToken($token, $config, $context);\r | |
90 | $ok = true;\r | |
91 | foreach ($definition->info[$token->name]->required_attr as $name) {\r | |
92 | if (!isset($token->attr[$name])) {\r | |
93 | $ok = false;\r | |
94 | break;\r | |
95 | }\r | |
96 | }\r | |
97 | if (!$ok) {\r | |
98 | if ($e) {\r | |
99 | $e->send(\r | |
100 | E_ERROR,\r | |
101 | 'Strategy_RemoveForeignElements: Missing required attribute',\r | |
102 | $name\r | |
103 | );\r | |
104 | }\r | |
105 | continue;\r | |
106 | }\r | |
107 | $token->armor['ValidateAttributes'] = true;\r | |
108 | }\r | |
109 | \r | |
110 | if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) {\r | |
111 | $textify_comments = $token->name;\r | |
112 | } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) {\r | |
113 | $textify_comments = false;\r | |
114 | }\r | |
115 | \r | |
116 | } elseif ($escape_invalid_tags) {\r | |
117 | // invalid tag, generate HTML representation and insert in\r | |
118 | if ($e) {\r | |
119 | $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text');\r | |
120 | }\r | |
121 | $token = new HTMLPurifier_Token_Text(\r | |
122 | $generator->generateFromToken($token)\r | |
123 | );\r | |
124 | } else {\r | |
125 | // check if we need to destroy all of the tag's children\r | |
126 | // CAN BE GENERICIZED\r | |
127 | if (isset($hidden_elements[$token->name])) {\r | |
128 | if ($token instanceof HTMLPurifier_Token_Start) {\r | |
129 | $remove_until = $token->name;\r | |
130 | } elseif ($token instanceof HTMLPurifier_Token_Empty) {\r | |
131 | // do nothing: we're still looking\r | |
132 | } else {\r | |
133 | $remove_until = false;\r | |
134 | }\r | |
135 | if ($e) {\r | |
136 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed');\r | |
137 | }\r | |
138 | } else {\r | |
139 | if ($e) {\r | |
140 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed');\r | |
141 | }\r | |
142 | }\r | |
143 | continue;\r | |
144 | }\r | |
145 | } elseif ($token instanceof HTMLPurifier_Token_Comment) {\r | |
146 | // textify comments in script tags when they are allowed\r | |
147 | if ($textify_comments !== false) {\r | |
148 | $data = $token->data;\r | |
149 | $token = new HTMLPurifier_Token_Text($data);\r | |
150 | } elseif ($trusted || $check_comments) {\r | |
151 | // always cleanup comments\r | |
152 | $trailing_hyphen = false;\r | |
153 | if ($e) {\r | |
154 | // perform check whether or not there's a trailing hyphen\r | |
155 | if (substr($token->data, -1) == '-') {\r | |
156 | $trailing_hyphen = true;\r | |
157 | }\r | |
158 | }\r | |
159 | $token->data = rtrim($token->data, '-');\r | |
160 | $found_double_hyphen = false;\r | |
161 | while (strpos($token->data, '--') !== false) {\r | |
162 | $found_double_hyphen = true;\r | |
163 | $token->data = str_replace('--', '-', $token->data);\r | |
164 | }\r | |
165 | if ($trusted || !empty($comment_lookup[trim($token->data)]) ||\r | |
166 | ($comment_regexp !== null && preg_match($comment_regexp, trim($token->data)))) {\r | |
167 | // OK good\r | |
168 | if ($e) {\r | |
169 | if ($trailing_hyphen) {\r | |
170 | $e->send(\r | |
171 | E_NOTICE,\r | |
172 | 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'\r | |
173 | );\r | |
174 | }\r | |
175 | if ($found_double_hyphen) {\r | |
176 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed');\r | |
177 | }\r | |
178 | }\r | |
179 | } else {\r | |
180 | if ($e) {\r | |
181 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');\r | |
182 | }\r | |
183 | continue;\r | |
184 | }\r | |
185 | } else {\r | |
186 | // strip comments\r | |
187 | if ($e) {\r | |
188 | $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed');\r | |
189 | }\r | |
190 | continue;\r | |
191 | }\r | |
192 | } elseif ($token instanceof HTMLPurifier_Token_Text) {\r | |
193 | } else {\r | |
194 | continue;\r | |
195 | }\r | |
196 | $result[] = $token;\r | |
197 | }\r | |
198 | if ($remove_until && $e) {\r | |
199 | // we removed tokens until the end, throw error\r | |
200 | $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until);\r | |
201 | }\r | |
202 | $context->destroy('CurrentToken');\r | |
203 | return $result;\r | |
204 | }\r | |
205 | }\r | |
206 | \r | |
207 | // vim: et sw=4 sts=4\r |