diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2014-02-21 15:43:14 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2014-02-21 15:43:14 +0100 |
commit | d4949327efa15b492cab1bef3fe074290a328a17 (patch) | |
tree | e89e0322bb1f1b06d663fd10fdded21bac867e5d /inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLModuleManager.php | |
parent | c9bd17a1007bb78e5de0775efca01df0fb515031 (diff) | |
download | wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.gz wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.zst wallabag-d4949327efa15b492cab1bef3fe074290a328a17.zip |
[add] HTML Purifier added to clean code
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLModuleManager.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLModuleManager.php | 459 |
1 files changed, 459 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLModuleManager.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLModuleManager.php new file mode 100644 index 00000000..c684961b --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLModuleManager.php | |||
@@ -0,0 +1,459 @@ | |||
1 | <?php | ||
2 | |||
3 | class HTMLPurifier_HTMLModuleManager | ||
4 | { | ||
5 | |||
6 | /** | ||
7 | * @type HTMLPurifier_DoctypeRegistry | ||
8 | */ | ||
9 | public $doctypes; | ||
10 | |||
11 | /** | ||
12 | * Instance of current doctype. | ||
13 | * @type string | ||
14 | */ | ||
15 | public $doctype; | ||
16 | |||
17 | /** | ||
18 | * @type HTMLPurifier_AttrTypes | ||
19 | */ | ||
20 | public $attrTypes; | ||
21 | |||
22 | /** | ||
23 | * Active instances of modules for the specified doctype are | ||
24 | * indexed, by name, in this array. | ||
25 | * @type HTMLPurifier_HTMLModule[] | ||
26 | */ | ||
27 | public $modules = array(); | ||
28 | |||
29 | /** | ||
30 | * Array of recognized HTMLPurifier_HTMLModule instances, | ||
31 | * indexed by module's class name. This array is usually lazy loaded, but a | ||
32 | * user can overload a module by pre-emptively registering it. | ||
33 | * @type HTMLPurifier_HTMLModule[] | ||
34 | */ | ||
35 | public $registeredModules = array(); | ||
36 | |||
37 | /** | ||
38 | * List of extra modules that were added by the user | ||
39 | * using addModule(). These get unconditionally merged into the current doctype, whatever | ||
40 | * it may be. | ||
41 | * @type HTMLPurifier_HTMLModule[] | ||
42 | */ | ||
43 | public $userModules = array(); | ||
44 | |||
45 | /** | ||
46 | * Associative array of element name to list of modules that have | ||
47 | * definitions for the element; this array is dynamically filled. | ||
48 | * @type array | ||
49 | */ | ||
50 | public $elementLookup = array(); | ||
51 | |||
52 | /** | ||
53 | * List of prefixes we should use for registering small names. | ||
54 | * @type array | ||
55 | */ | ||
56 | public $prefixes = array('HTMLPurifier_HTMLModule_'); | ||
57 | |||
58 | /** | ||
59 | * @type HTMLPurifier_ContentSets | ||
60 | */ | ||
61 | public $contentSets; | ||
62 | |||
63 | /** | ||
64 | * @type HTMLPurifier_AttrCollections | ||
65 | */ | ||
66 | public $attrCollections; | ||
67 | |||
68 | /** | ||
69 | * If set to true, unsafe elements and attributes will be allowed. | ||
70 | * @type bool | ||
71 | */ | ||
72 | public $trusted = false; | ||
73 | |||
74 | public function __construct() | ||
75 | { | ||
76 | // editable internal objects | ||
77 | $this->attrTypes = new HTMLPurifier_AttrTypes(); | ||
78 | $this->doctypes = new HTMLPurifier_DoctypeRegistry(); | ||
79 | |||
80 | // setup basic modules | ||
81 | $common = array( | ||
82 | 'CommonAttributes', 'Text', 'Hypertext', 'List', | ||
83 | 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', | ||
84 | 'StyleAttribute', | ||
85 | // Unsafe: | ||
86 | 'Scripting', 'Object', 'Forms', | ||
87 | // Sorta legacy, but present in strict: | ||
88 | 'Name', | ||
89 | ); | ||
90 | $transitional = array('Legacy', 'Target', 'Iframe'); | ||
91 | $xml = array('XMLCommonAttributes'); | ||
92 | $non_xml = array('NonXMLCommonAttributes'); | ||
93 | |||
94 | // setup basic doctypes | ||
95 | $this->doctypes->register( | ||
96 | 'HTML 4.01 Transitional', | ||
97 | false, | ||
98 | array_merge($common, $transitional, $non_xml), | ||
99 | array('Tidy_Transitional', 'Tidy_Proprietary'), | ||
100 | array(), | ||
101 | '-//W3C//DTD HTML 4.01 Transitional//EN', | ||
102 | 'http://www.w3.org/TR/html4/loose.dtd' | ||
103 | ); | ||
104 | |||
105 | $this->doctypes->register( | ||
106 | 'HTML 4.01 Strict', | ||
107 | false, | ||
108 | array_merge($common, $non_xml), | ||
109 | array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), | ||
110 | array(), | ||
111 | '-//W3C//DTD HTML 4.01//EN', | ||
112 | 'http://www.w3.org/TR/html4/strict.dtd' | ||
113 | ); | ||
114 | |||
115 | $this->doctypes->register( | ||
116 | 'XHTML 1.0 Transitional', | ||
117 | true, | ||
118 | array_merge($common, $transitional, $xml, $non_xml), | ||
119 | array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), | ||
120 | array(), | ||
121 | '-//W3C//DTD XHTML 1.0 Transitional//EN', | ||
122 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' | ||
123 | ); | ||
124 | |||
125 | $this->doctypes->register( | ||
126 | 'XHTML 1.0 Strict', | ||
127 | true, | ||
128 | array_merge($common, $xml, $non_xml), | ||
129 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), | ||
130 | array(), | ||
131 | '-//W3C//DTD XHTML 1.0 Strict//EN', | ||
132 | 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' | ||
133 | ); | ||
134 | |||
135 | $this->doctypes->register( | ||
136 | 'XHTML 1.1', | ||
137 | true, | ||
138 | // Iframe is a real XHTML 1.1 module, despite being | ||
139 | // "transitional"! | ||
140 | array_merge($common, $xml, array('Ruby', 'Iframe')), | ||
141 | array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 | ||
142 | array(), | ||
143 | '-//W3C//DTD XHTML 1.1//EN', | ||
144 | 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' | ||
145 | ); | ||
146 | |||
147 | } | ||
148 | |||
149 | /** | ||
150 | * Registers a module to the recognized module list, useful for | ||
151 | * overloading pre-existing modules. | ||
152 | * @param $module Mixed: string module name, with or without | ||
153 | * HTMLPurifier_HTMLModule prefix, or instance of | ||
154 | * subclass of HTMLPurifier_HTMLModule. | ||
155 | * @param $overload Boolean whether or not to overload previous modules. | ||
156 | * If this is not set, and you do overload a module, | ||
157 | * HTML Purifier will complain with a warning. | ||
158 | * @note This function will not call autoload, you must instantiate | ||
159 | * (and thus invoke) autoload outside the method. | ||
160 | * @note If a string is passed as a module name, different variants | ||
161 | * will be tested in this order: | ||
162 | * - Check for HTMLPurifier_HTMLModule_$name | ||
163 | * - Check all prefixes with $name in order they were added | ||
164 | * - Check for literal object name | ||
165 | * - Throw fatal error | ||
166 | * If your object name collides with an internal class, specify | ||
167 | * your module manually. All modules must have been included | ||
168 | * externally: registerModule will not perform inclusions for you! | ||
169 | */ | ||
170 | public function registerModule($module, $overload = false) | ||
171 | { | ||
172 | if (is_string($module)) { | ||
173 | // attempt to load the module | ||
174 | $original_module = $module; | ||
175 | $ok = false; | ||
176 | foreach ($this->prefixes as $prefix) { | ||
177 | $module = $prefix . $original_module; | ||
178 | if (class_exists($module)) { | ||
179 | $ok = true; | ||
180 | break; | ||
181 | } | ||
182 | } | ||
183 | if (!$ok) { | ||
184 | $module = $original_module; | ||
185 | if (!class_exists($module)) { | ||
186 | trigger_error( | ||
187 | $original_module . ' module does not exist', | ||
188 | E_USER_ERROR | ||
189 | ); | ||
190 | return; | ||
191 | } | ||
192 | } | ||
193 | $module = new $module(); | ||
194 | } | ||
195 | if (empty($module->name)) { | ||
196 | trigger_error('Module instance of ' . get_class($module) . ' must have name'); | ||
197 | return; | ||
198 | } | ||
199 | if (!$overload && isset($this->registeredModules[$module->name])) { | ||
200 | trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); | ||
201 | } | ||
202 | $this->registeredModules[$module->name] = $module; | ||
203 | } | ||
204 | |||
205 | /** | ||
206 | * Adds a module to the current doctype by first registering it, | ||
207 | * and then tacking it on to the active doctype | ||
208 | */ | ||
209 | public function addModule($module) | ||
210 | { | ||
211 | $this->registerModule($module); | ||
212 | if (is_object($module)) { | ||
213 | $module = $module->name; | ||
214 | } | ||
215 | $this->userModules[] = $module; | ||
216 | } | ||
217 | |||
218 | /** | ||
219 | * Adds a class prefix that registerModule() will use to resolve a | ||
220 | * string name to a concrete class | ||
221 | */ | ||
222 | public function addPrefix($prefix) | ||
223 | { | ||
224 | $this->prefixes[] = $prefix; | ||
225 | } | ||
226 | |||
227 | /** | ||
228 | * Performs processing on modules, after being called you may | ||
229 | * use getElement() and getElements() | ||
230 | * @param HTMLPurifier_Config $config | ||
231 | */ | ||
232 | public function setup($config) | ||
233 | { | ||
234 | $this->trusted = $config->get('HTML.Trusted'); | ||
235 | |||
236 | // generate | ||
237 | $this->doctype = $this->doctypes->make($config); | ||
238 | $modules = $this->doctype->modules; | ||
239 | |||
240 | // take out the default modules that aren't allowed | ||
241 | $lookup = $config->get('HTML.AllowedModules'); | ||
242 | $special_cases = $config->get('HTML.CoreModules'); | ||
243 | |||
244 | if (is_array($lookup)) { | ||
245 | foreach ($modules as $k => $m) { | ||
246 | if (isset($special_cases[$m])) { | ||
247 | continue; | ||
248 | } | ||
249 | if (!isset($lookup[$m])) { | ||
250 | unset($modules[$k]); | ||
251 | } | ||
252 | } | ||
253 | } | ||
254 | |||
255 | // custom modules | ||
256 | if ($config->get('HTML.Proprietary')) { | ||
257 | $modules[] = 'Proprietary'; | ||
258 | } | ||
259 | if ($config->get('HTML.SafeObject')) { | ||
260 | $modules[] = 'SafeObject'; | ||
261 | } | ||
262 | if ($config->get('HTML.SafeEmbed')) { | ||
263 | $modules[] = 'SafeEmbed'; | ||
264 | } | ||
265 | if ($config->get('HTML.SafeScripting') !== array()) { | ||
266 | $modules[] = 'SafeScripting'; | ||
267 | } | ||
268 | if ($config->get('HTML.Nofollow')) { | ||
269 | $modules[] = 'Nofollow'; | ||
270 | } | ||
271 | if ($config->get('HTML.TargetBlank')) { | ||
272 | $modules[] = 'TargetBlank'; | ||
273 | } | ||
274 | |||
275 | // merge in custom modules | ||
276 | $modules = array_merge($modules, $this->userModules); | ||
277 | |||
278 | foreach ($modules as $module) { | ||
279 | $this->processModule($module); | ||
280 | $this->modules[$module]->setup($config); | ||
281 | } | ||
282 | |||
283 | foreach ($this->doctype->tidyModules as $module) { | ||
284 | $this->processModule($module); | ||
285 | $this->modules[$module]->setup($config); | ||
286 | } | ||
287 | |||
288 | // prepare any injectors | ||
289 | foreach ($this->modules as $module) { | ||
290 | $n = array(); | ||
291 | foreach ($module->info_injector as $injector) { | ||
292 | if (!is_object($injector)) { | ||
293 | $class = "HTMLPurifier_Injector_$injector"; | ||
294 | $injector = new $class; | ||
295 | } | ||
296 | $n[$injector->name] = $injector; | ||
297 | } | ||
298 | $module->info_injector = $n; | ||
299 | } | ||
300 | |||
301 | // setup lookup table based on all valid modules | ||
302 | foreach ($this->modules as $module) { | ||
303 | foreach ($module->info as $name => $def) { | ||
304 | if (!isset($this->elementLookup[$name])) { | ||
305 | $this->elementLookup[$name] = array(); | ||
306 | } | ||
307 | $this->elementLookup[$name][] = $module->name; | ||
308 | } | ||
309 | } | ||
310 | |||
311 | // note the different choice | ||
312 | $this->contentSets = new HTMLPurifier_ContentSets( | ||
313 | // content set assembly deals with all possible modules, | ||
314 | // not just ones deemed to be "safe" | ||
315 | $this->modules | ||
316 | ); | ||
317 | $this->attrCollections = new HTMLPurifier_AttrCollections( | ||
318 | $this->attrTypes, | ||
319 | // there is no way to directly disable a global attribute, | ||
320 | // but using AllowedAttributes or simply not including | ||
321 | // the module in your custom doctype should be sufficient | ||
322 | $this->modules | ||
323 | ); | ||
324 | } | ||
325 | |||
326 | /** | ||
327 | * Takes a module and adds it to the active module collection, | ||
328 | * registering it if necessary. | ||
329 | */ | ||
330 | public function processModule($module) | ||
331 | { | ||
332 | if (!isset($this->registeredModules[$module]) || is_object($module)) { | ||
333 | $this->registerModule($module); | ||
334 | } | ||
335 | $this->modules[$module] = $this->registeredModules[$module]; | ||
336 | } | ||
337 | |||
338 | /** | ||
339 | * Retrieves merged element definitions. | ||
340 | * @return Array of HTMLPurifier_ElementDef | ||
341 | */ | ||
342 | public function getElements() | ||
343 | { | ||
344 | $elements = array(); | ||
345 | foreach ($this->modules as $module) { | ||
346 | if (!$this->trusted && !$module->safe) { | ||
347 | continue; | ||
348 | } | ||
349 | foreach ($module->info as $name => $v) { | ||
350 | if (isset($elements[$name])) { | ||
351 | continue; | ||
352 | } | ||
353 | $elements[$name] = $this->getElement($name); | ||
354 | } | ||
355 | } | ||
356 | |||
357 | // remove dud elements, this happens when an element that | ||
358 | // appeared to be safe actually wasn't | ||
359 | foreach ($elements as $n => $v) { | ||
360 | if ($v === false) { | ||
361 | unset($elements[$n]); | ||
362 | } | ||
363 | } | ||
364 | |||
365 | return $elements; | ||
366 | |||
367 | } | ||
368 | |||
369 | /** | ||
370 | * Retrieves a single merged element definition | ||
371 | * @param string $name Name of element | ||
372 | * @param bool $trusted Boolean trusted overriding parameter: set to true | ||
373 | * if you want the full version of an element | ||
374 | * @return HTMLPurifier_ElementDef Merged HTMLPurifier_ElementDef | ||
375 | * @note You may notice that modules are getting iterated over twice (once | ||
376 | * in getElements() and once here). This | ||
377 | * is because | ||
378 | */ | ||
379 | public function getElement($name, $trusted = null) | ||
380 | { | ||
381 | if (!isset($this->elementLookup[$name])) { | ||
382 | return false; | ||
383 | } | ||
384 | |||
385 | // setup global state variables | ||
386 | $def = false; | ||
387 | if ($trusted === null) { | ||
388 | $trusted = $this->trusted; | ||
389 | } | ||
390 | |||
391 | // iterate through each module that has registered itself to this | ||
392 | // element | ||
393 | foreach ($this->elementLookup[$name] as $module_name) { | ||
394 | $module = $this->modules[$module_name]; | ||
395 | |||
396 | // refuse to create/merge from a module that is deemed unsafe-- | ||
397 | // pretend the module doesn't exist--when trusted mode is not on. | ||
398 | if (!$trusted && !$module->safe) { | ||
399 | continue; | ||
400 | } | ||
401 | |||
402 | // clone is used because, ideally speaking, the original | ||
403 | // definition should not be modified. Usually, this will | ||
404 | // make no difference, but for consistency's sake | ||
405 | $new_def = clone $module->info[$name]; | ||
406 | |||
407 | if (!$def && $new_def->standalone) { | ||
408 | $def = $new_def; | ||
409 | } elseif ($def) { | ||
410 | // This will occur even if $new_def is standalone. In practice, | ||
411 | // this will usually result in a full replacement. | ||
412 | $def->mergeIn($new_def); | ||
413 | } else { | ||
414 | // :TODO: | ||
415 | // non-standalone definitions that don't have a standalone | ||
416 | // to merge into could be deferred to the end | ||
417 | // HOWEVER, it is perfectly valid for a non-standalone | ||
418 | // definition to lack a standalone definition, even | ||
419 | // after all processing: this allows us to safely | ||
420 | // specify extra attributes for elements that may not be | ||
421 | // enabled all in one place. In particular, this might | ||
422 | // be the case for trusted elements. WARNING: care must | ||
423 | // be taken that the /extra/ definitions are all safe. | ||
424 | continue; | ||
425 | } | ||
426 | |||
427 | // attribute value expansions | ||
428 | $this->attrCollections->performInclusions($def->attr); | ||
429 | $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); | ||
430 | |||
431 | // descendants_are_inline, for ChildDef_Chameleon | ||
432 | if (is_string($def->content_model) && | ||
433 | strpos($def->content_model, 'Inline') !== false) { | ||
434 | if ($name != 'del' && $name != 'ins') { | ||
435 | // this is for you, ins/del | ||
436 | $def->descendants_are_inline = true; | ||
437 | } | ||
438 | } | ||
439 | |||
440 | $this->contentSets->generateChildDef($def, $module); | ||
441 | } | ||
442 | |||
443 | // This can occur if there is a blank definition, but no base to | ||
444 | // mix it in with | ||
445 | if (!$def) { | ||
446 | return false; | ||
447 | } | ||
448 | |||
449 | // add information on required attributes | ||
450 | foreach ($def->attr as $attr_name => $attr_def) { | ||
451 | if ($attr_def->required) { | ||
452 | $def->required_attr[] = $attr_name; | ||
453 | } | ||
454 | } | ||
455 | return $def; | ||
456 | } | ||
457 | } | ||
458 | |||
459 | // vim: et sw=4 sts=4 | ||