]>
Commit | Line | Data |
---|---|---|
d4949327 NL |
1 | <?php\r |
2 | \r | |
3 | class HTMLPurifier_HTMLModuleManager\r | |
4 | {\r | |
5 | \r | |
6 | /**\r | |
7 | * @type HTMLPurifier_DoctypeRegistry\r | |
8 | */\r | |
9 | public $doctypes;\r | |
10 | \r | |
11 | /**\r | |
12 | * Instance of current doctype.\r | |
13 | * @type string\r | |
14 | */\r | |
15 | public $doctype;\r | |
16 | \r | |
17 | /**\r | |
18 | * @type HTMLPurifier_AttrTypes\r | |
19 | */\r | |
20 | public $attrTypes;\r | |
21 | \r | |
22 | /**\r | |
23 | * Active instances of modules for the specified doctype are\r | |
24 | * indexed, by name, in this array.\r | |
25 | * @type HTMLPurifier_HTMLModule[]\r | |
26 | */\r | |
27 | public $modules = array();\r | |
28 | \r | |
29 | /**\r | |
30 | * Array of recognized HTMLPurifier_HTMLModule instances,\r | |
31 | * indexed by module's class name. This array is usually lazy loaded, but a\r | |
32 | * user can overload a module by pre-emptively registering it.\r | |
33 | * @type HTMLPurifier_HTMLModule[]\r | |
34 | */\r | |
35 | public $registeredModules = array();\r | |
36 | \r | |
37 | /**\r | |
38 | * List of extra modules that were added by the user\r | |
39 | * using addModule(). These get unconditionally merged into the current doctype, whatever\r | |
40 | * it may be.\r | |
41 | * @type HTMLPurifier_HTMLModule[]\r | |
42 | */\r | |
43 | public $userModules = array();\r | |
44 | \r | |
45 | /**\r | |
46 | * Associative array of element name to list of modules that have\r | |
47 | * definitions for the element; this array is dynamically filled.\r | |
48 | * @type array\r | |
49 | */\r | |
50 | public $elementLookup = array();\r | |
51 | \r | |
52 | /**\r | |
53 | * List of prefixes we should use for registering small names.\r | |
54 | * @type array\r | |
55 | */\r | |
56 | public $prefixes = array('HTMLPurifier_HTMLModule_');\r | |
57 | \r | |
58 | /**\r | |
59 | * @type HTMLPurifier_ContentSets\r | |
60 | */\r | |
61 | public $contentSets;\r | |
62 | \r | |
63 | /**\r | |
64 | * @type HTMLPurifier_AttrCollections\r | |
65 | */\r | |
66 | public $attrCollections;\r | |
67 | \r | |
68 | /**\r | |
69 | * If set to true, unsafe elements and attributes will be allowed.\r | |
70 | * @type bool\r | |
71 | */\r | |
72 | public $trusted = false;\r | |
73 | \r | |
74 | public function __construct()\r | |
75 | {\r | |
76 | // editable internal objects\r | |
77 | $this->attrTypes = new HTMLPurifier_AttrTypes();\r | |
78 | $this->doctypes = new HTMLPurifier_DoctypeRegistry();\r | |
79 | \r | |
80 | // setup basic modules\r | |
81 | $common = array(\r | |
82 | 'CommonAttributes', 'Text', 'Hypertext', 'List',\r | |
83 | 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image',\r | |
84 | 'StyleAttribute',\r | |
85 | // Unsafe:\r | |
86 | 'Scripting', 'Object', 'Forms',\r | |
87 | // Sorta legacy, but present in strict:\r | |
88 | 'Name',\r | |
89 | );\r | |
90 | $transitional = array('Legacy', 'Target', 'Iframe');\r | |
91 | $xml = array('XMLCommonAttributes');\r | |
92 | $non_xml = array('NonXMLCommonAttributes');\r | |
93 | \r | |
94 | // setup basic doctypes\r | |
95 | $this->doctypes->register(\r | |
96 | 'HTML 4.01 Transitional',\r | |
97 | false,\r | |
98 | array_merge($common, $transitional, $non_xml),\r | |
99 | array('Tidy_Transitional', 'Tidy_Proprietary'),\r | |
100 | array(),\r | |
101 | '-//W3C//DTD HTML 4.01 Transitional//EN',\r | |
102 | 'http://www.w3.org/TR/html4/loose.dtd'\r | |
103 | );\r | |
104 | \r | |
105 | $this->doctypes->register(\r | |
106 | 'HTML 4.01 Strict',\r | |
107 | false,\r | |
108 | array_merge($common, $non_xml),\r | |
109 | array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),\r | |
110 | array(),\r | |
111 | '-//W3C//DTD HTML 4.01//EN',\r | |
112 | 'http://www.w3.org/TR/html4/strict.dtd'\r | |
113 | );\r | |
114 | \r | |
115 | $this->doctypes->register(\r | |
116 | 'XHTML 1.0 Transitional',\r | |
117 | true,\r | |
118 | array_merge($common, $transitional, $xml, $non_xml),\r | |
119 | array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'),\r | |
120 | array(),\r | |
121 | '-//W3C//DTD XHTML 1.0 Transitional//EN',\r | |
122 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd'\r | |
123 | );\r | |
124 | \r | |
125 | $this->doctypes->register(\r | |
126 | 'XHTML 1.0 Strict',\r | |
127 | true,\r | |
128 | array_merge($common, $xml, $non_xml),\r | |
129 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'),\r | |
130 | array(),\r | |
131 | '-//W3C//DTD XHTML 1.0 Strict//EN',\r | |
132 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'\r | |
133 | );\r | |
134 | \r | |
135 | $this->doctypes->register(\r | |
136 | 'XHTML 1.1',\r | |
137 | true,\r | |
138 | // Iframe is a real XHTML 1.1 module, despite being\r | |
139 | // "transitional"!\r | |
140 | array_merge($common, $xml, array('Ruby', 'Iframe')),\r | |
141 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1\r | |
142 | array(),\r | |
143 | '-//W3C//DTD XHTML 1.1//EN',\r | |
144 | 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd'\r | |
145 | );\r | |
146 | \r | |
147 | }\r | |
148 | \r | |
149 | /**\r | |
150 | * Registers a module to the recognized module list, useful for\r | |
151 | * overloading pre-existing modules.\r | |
152 | * @param $module Mixed: string module name, with or without\r | |
153 | * HTMLPurifier_HTMLModule prefix, or instance of\r | |
154 | * subclass of HTMLPurifier_HTMLModule.\r | |
155 | * @param $overload Boolean whether or not to overload previous modules.\r | |
156 | * If this is not set, and you do overload a module,\r | |
157 | * HTML Purifier will complain with a warning.\r | |
158 | * @note This function will not call autoload, you must instantiate\r | |
159 | * (and thus invoke) autoload outside the method.\r | |
160 | * @note If a string is passed as a module name, different variants\r | |
161 | * will be tested in this order:\r | |
162 | * - Check for HTMLPurifier_HTMLModule_$name\r | |
163 | * - Check all prefixes with $name in order they were added\r | |
164 | * - Check for literal object name\r | |
165 | * - Throw fatal error\r | |
166 | * If your object name collides with an internal class, specify\r | |
167 | * your module manually. All modules must have been included\r | |
168 | * externally: registerModule will not perform inclusions for you!\r | |
169 | */\r | |
170 | public function registerModule($module, $overload = false)\r | |
171 | {\r | |
172 | if (is_string($module)) {\r | |
173 | // attempt to load the module\r | |
174 | $original_module = $module;\r | |
175 | $ok = false;\r | |
176 | foreach ($this->prefixes as $prefix) {\r | |
177 | $module = $prefix . $original_module;\r | |
178 | if (class_exists($module)) {\r | |
179 | $ok = true;\r | |
180 | break;\r | |
181 | }\r | |
182 | }\r | |
183 | if (!$ok) {\r | |
184 | $module = $original_module;\r | |
185 | if (!class_exists($module)) {\r | |
186 | trigger_error(\r | |
187 | $original_module . ' module does not exist',\r | |
188 | E_USER_ERROR\r | |
189 | );\r | |
190 | return;\r | |
191 | }\r | |
192 | }\r | |
193 | $module = new $module();\r | |
194 | }\r | |
195 | if (empty($module->name)) {\r | |
196 | trigger_error('Module instance of ' . get_class($module) . ' must have name');\r | |
197 | return;\r | |
198 | }\r | |
199 | if (!$overload && isset($this->registeredModules[$module->name])) {\r | |
200 | trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING);\r | |
201 | }\r | |
202 | $this->registeredModules[$module->name] = $module;\r | |
203 | }\r | |
204 | \r | |
205 | /**\r | |
206 | * Adds a module to the current doctype by first registering it,\r | |
207 | * and then tacking it on to the active doctype\r | |
208 | */\r | |
209 | public function addModule($module)\r | |
210 | {\r | |
211 | $this->registerModule($module);\r | |
212 | if (is_object($module)) {\r | |
213 | $module = $module->name;\r | |
214 | }\r | |
215 | $this->userModules[] = $module;\r | |
216 | }\r | |
217 | \r | |
218 | /**\r | |
219 | * Adds a class prefix that registerModule() will use to resolve a\r | |
220 | * string name to a concrete class\r | |
221 | */\r | |
222 | public function addPrefix($prefix)\r | |
223 | {\r | |
224 | $this->prefixes[] = $prefix;\r | |
225 | }\r | |
226 | \r | |
227 | /**\r | |
228 | * Performs processing on modules, after being called you may\r | |
229 | * use getElement() and getElements()\r | |
230 | * @param HTMLPurifier_Config $config\r | |
231 | */\r | |
232 | public function setup($config)\r | |
233 | {\r | |
234 | $this->trusted = $config->get('HTML.Trusted');\r | |
235 | \r | |
236 | // generate\r | |
237 | $this->doctype = $this->doctypes->make($config);\r | |
238 | $modules = $this->doctype->modules;\r | |
239 | \r | |
240 | // take out the default modules that aren't allowed\r | |
241 | $lookup = $config->get('HTML.AllowedModules');\r | |
242 | $special_cases = $config->get('HTML.CoreModules');\r | |
243 | \r | |
244 | if (is_array($lookup)) {\r | |
245 | foreach ($modules as $k => $m) {\r | |
246 | if (isset($special_cases[$m])) {\r | |
247 | continue;\r | |
248 | }\r | |
249 | if (!isset($lookup[$m])) {\r | |
250 | unset($modules[$k]);\r | |
251 | }\r | |
252 | }\r | |
253 | }\r | |
254 | \r | |
255 | // custom modules\r | |
256 | if ($config->get('HTML.Proprietary')) {\r | |
257 | $modules[] = 'Proprietary';\r | |
258 | }\r | |
259 | if ($config->get('HTML.SafeObject')) {\r | |
260 | $modules[] = 'SafeObject';\r | |
261 | }\r | |
262 | if ($config->get('HTML.SafeEmbed')) {\r | |
263 | $modules[] = 'SafeEmbed';\r | |
264 | }\r | |
265 | if ($config->get('HTML.SafeScripting') !== array()) {\r | |
266 | $modules[] = 'SafeScripting';\r | |
267 | }\r | |
268 | if ($config->get('HTML.Nofollow')) {\r | |
269 | $modules[] = 'Nofollow';\r | |
270 | }\r | |
271 | if ($config->get('HTML.TargetBlank')) {\r | |
272 | $modules[] = 'TargetBlank';\r | |
273 | }\r | |
274 | \r | |
275 | // merge in custom modules\r | |
276 | $modules = array_merge($modules, $this->userModules);\r | |
277 | \r | |
278 | foreach ($modules as $module) {\r | |
279 | $this->processModule($module);\r | |
280 | $this->modules[$module]->setup($config);\r | |
281 | }\r | |
282 | \r | |
283 | foreach ($this->doctype->tidyModules as $module) {\r | |
284 | $this->processModule($module);\r | |
285 | $this->modules[$module]->setup($config);\r | |
286 | }\r | |
287 | \r | |
288 | // prepare any injectors\r | |
289 | foreach ($this->modules as $module) {\r | |
290 | $n = array();\r | |
291 | foreach ($module->info_injector as $injector) {\r | |
292 | if (!is_object($injector)) {\r | |
293 | $class = "HTMLPurifier_Injector_$injector";\r | |
294 | $injector = new $class;\r | |
295 | }\r | |
296 | $n[$injector->name] = $injector;\r | |
297 | }\r | |
298 | $module->info_injector = $n;\r | |
299 | }\r | |
300 | \r | |
301 | // setup lookup table based on all valid modules\r | |
302 | foreach ($this->modules as $module) {\r | |
303 | foreach ($module->info as $name => $def) {\r | |
304 | if (!isset($this->elementLookup[$name])) {\r | |
305 | $this->elementLookup[$name] = array();\r | |
306 | }\r | |
307 | $this->elementLookup[$name][] = $module->name;\r | |
308 | }\r | |
309 | }\r | |
310 | \r | |
311 | // note the different choice\r | |
312 | $this->contentSets = new HTMLPurifier_ContentSets(\r | |
313 | // content set assembly deals with all possible modules,\r | |
314 | // not just ones deemed to be "safe"\r | |
315 | $this->modules\r | |
316 | );\r | |
317 | $this->attrCollections = new HTMLPurifier_AttrCollections(\r | |
318 | $this->attrTypes,\r | |
319 | // there is no way to directly disable a global attribute,\r | |
320 | // but using AllowedAttributes or simply not including\r | |
321 | // the module in your custom doctype should be sufficient\r | |
322 | $this->modules\r | |
323 | );\r | |
324 | }\r | |
325 | \r | |
326 | /**\r | |
327 | * Takes a module and adds it to the active module collection,\r | |
328 | * registering it if necessary.\r | |
329 | */\r | |
330 | public function processModule($module)\r | |
331 | {\r | |
332 | if (!isset($this->registeredModules[$module]) || is_object($module)) {\r | |
333 | $this->registerModule($module);\r | |
334 | }\r | |
335 | $this->modules[$module] = $this->registeredModules[$module];\r | |
336 | }\r | |
337 | \r | |
338 | /**\r | |
339 | * Retrieves merged element definitions.\r | |
340 | * @return Array of HTMLPurifier_ElementDef\r | |
341 | */\r | |
342 | public function getElements()\r | |
343 | {\r | |
344 | $elements = array();\r | |
345 | foreach ($this->modules as $module) {\r | |
346 | if (!$this->trusted && !$module->safe) {\r | |
347 | continue;\r | |
348 | }\r | |
349 | foreach ($module->info as $name => $v) {\r | |
350 | if (isset($elements[$name])) {\r | |
351 | continue;\r | |
352 | }\r | |
353 | $elements[$name] = $this->getElement($name);\r | |
354 | }\r | |
355 | }\r | |
356 | \r | |
357 | // remove dud elements, this happens when an element that\r | |
358 | // appeared to be safe actually wasn't\r | |
359 | foreach ($elements as $n => $v) {\r | |
360 | if ($v === false) {\r | |
361 | unset($elements[$n]);\r | |
362 | }\r | |
363 | }\r | |
364 | \r | |
365 | return $elements;\r | |
366 | \r | |
367 | }\r | |
368 | \r | |
369 | /**\r | |
370 | * Retrieves a single merged element definition\r | |
371 | * @param string $name Name of element\r | |
372 | * @param bool $trusted Boolean trusted overriding parameter: set to true\r | |
373 | * if you want the full version of an element\r | |
374 | * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef\r | |
375 | * @note You may notice that modules are getting iterated over twice (once\r | |
376 | * in getElements() and once here). This\r | |
377 | * is because\r | |
378 | */\r | |
379 | public function getElement($name, $trusted = null)\r | |
380 | {\r | |
381 | if (!isset($this->elementLookup[$name])) {\r | |
382 | return false;\r | |
383 | }\r | |
384 | \r | |
385 | // setup global state variables\r | |
386 | $def = false;\r | |
387 | if ($trusted === null) {\r | |
388 | $trusted = $this->trusted;\r | |
389 | }\r | |
390 | \r | |
391 | // iterate through each module that has registered itself to this\r | |
392 | // element\r | |
393 | foreach ($this->elementLookup[$name] as $module_name) {\r | |
394 | $module = $this->modules[$module_name];\r | |
395 | \r | |
396 | // refuse to create/merge from a module that is deemed unsafe--\r | |
397 | // pretend the module doesn't exist--when trusted mode is not on.\r | |
398 | if (!$trusted && !$module->safe) {\r | |
399 | continue;\r | |
400 | }\r | |
401 | \r | |
402 | // clone is used because, ideally speaking, the original\r | |
403 | // definition should not be modified. Usually, this will\r | |
404 | // make no difference, but for consistency's sake\r | |
405 | $new_def = clone $module->info[$name];\r | |
406 | \r | |
407 | if (!$def && $new_def->standalone) {\r | |
408 | $def = $new_def;\r | |
409 | } elseif ($def) {\r | |
410 | // This will occur even if $new_def is standalone. In practice,\r | |
411 | // this will usually result in a full replacement.\r | |
412 | $def->mergeIn($new_def);\r | |
413 | } else {\r | |
414 | // :TODO:\r | |
415 | // non-standalone definitions that don't have a standalone\r | |
416 | // to merge into could be deferred to the end\r | |
417 | // HOWEVER, it is perfectly valid for a non-standalone\r | |
418 | // definition to lack a standalone definition, even\r | |
419 | // after all processing: this allows us to safely\r | |
420 | // specify extra attributes for elements that may not be\r | |
421 | // enabled all in one place. In particular, this might\r | |
422 | // be the case for trusted elements. WARNING: care must\r | |
423 | // be taken that the /extra/ definitions are all safe.\r | |
424 | continue;\r | |
425 | }\r | |
426 | \r | |
427 | // attribute value expansions\r | |
428 | $this->attrCollections->performInclusions($def->attr);\r | |
429 | $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes);\r | |
430 | \r | |
431 | // descendants_are_inline, for ChildDef_Chameleon\r | |
432 | if (is_string($def->content_model) &&\r | |
433 | strpos($def->content_model, 'Inline') !== false) {\r | |
434 | if ($name != 'del' && $name != 'ins') {\r | |
435 | // this is for you, ins/del\r | |
436 | $def->descendants_are_inline = true;\r | |
437 | }\r | |
438 | }\r | |
439 | \r | |
440 | $this->contentSets->generateChildDef($def, $module);\r | |
441 | }\r | |
442 | \r | |
443 | // This can occur if there is a blank definition, but no base to\r | |
444 | // mix it in with\r | |
445 | if (!$def) {\r | |
446 | return false;\r | |
447 | }\r | |
448 | \r | |
449 | // add information on required attributes\r | |
450 | foreach ($def->attr as $attr_name => $attr_def) {\r | |
451 | if ($attr_def->required) {\r | |
452 | $def->required_attr[] = $attr_name;\r | |
453 | }\r | |
454 | }\r | |
455 | return $def;\r | |
456 | }\r | |
457 | }\r | |
458 | \r | |
459 | // vim: et sw=4 sts=4\r |