diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2014-02-21 15:43:14 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2014-02-21 15:43:14 +0100 |
commit | d4949327efa15b492cab1bef3fe074290a328a17 (patch) | |
tree | e89e0322bb1f1b06d663fd10fdded21bac867e5d /inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php | |
parent | c9bd17a1007bb78e5de0775efca01df0fb515031 (diff) | |
download | wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.gz wallabag-d4949327efa15b492cab1bef3fe074290a328a17.tar.zst wallabag-d4949327efa15b492cab1bef3fe074290a328a17.zip |
[add] HTML Purifier added to clean code
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php | 493 |
1 files changed, 493 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php new file mode 100644 index 00000000..027c85d5 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php | |||
@@ -0,0 +1,493 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Definition of the purified HTML that describes allowed children, | ||
5 | * attributes, and many other things. | ||
6 | * | ||
7 | * Conventions: | ||
8 | * | ||
9 | * All member variables that are prefixed with info | ||
10 | * (including the main $info array) are used by HTML Purifier internals | ||
11 | * and should not be directly edited when customizing the HTMLDefinition. | ||
12 | * They can usually be set via configuration directives or custom | ||
13 | * modules. | ||
14 | * | ||
15 | * On the other hand, member variables without the info prefix are used | ||
16 | * internally by the HTMLDefinition and MUST NOT be used by other HTML | ||
17 | * Purifier internals. Many of them, however, are public, and may be | ||
18 | * edited by userspace code to tweak the behavior of HTMLDefinition. | ||
19 | * | ||
20 | * @note This class is inspected by Printer_HTMLDefinition; please | ||
21 | * update that class if things here change. | ||
22 | * | ||
23 | * @warning Directives that change this object's structure must be in | ||
24 | * the HTML or Attr namespace! | ||
25 | */ | ||
26 | class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition | ||
27 | { | ||
28 | |||
29 | // FULLY-PUBLIC VARIABLES --------------------------------------------- | ||
30 | |||
31 | /** | ||
32 | * Associative array of element names to HTMLPurifier_ElementDef. | ||
33 | * @type HTMLPurifier_ElementDef[] | ||
34 | */ | ||
35 | public $info = array(); | ||
36 | |||
37 | /** | ||
38 | * Associative array of global attribute name to attribute definition. | ||
39 | * @type array | ||
40 | */ | ||
41 | public $info_global_attr = array(); | ||
42 | |||
43 | /** | ||
44 | * String name of parent element HTML will be going into. | ||
45 | * @type string | ||
46 | */ | ||
47 | public $info_parent = 'div'; | ||
48 | |||
49 | /** | ||
50 | * Definition for parent element, allows parent element to be a | ||
51 | * tag that's not allowed inside the HTML fragment. | ||
52 | * @type HTMLPurifier_ElementDef | ||
53 | */ | ||
54 | public $info_parent_def; | ||
55 | |||
56 | /** | ||
57 | * String name of element used to wrap inline elements in block context. | ||
58 | * @type string | ||
59 | * @note This is rarely used except for BLOCKQUOTEs in strict mode | ||
60 | */ | ||
61 | public $info_block_wrapper = 'p'; | ||
62 | |||
63 | /** | ||
64 | * Associative array of deprecated tag name to HTMLPurifier_TagTransform. | ||
65 | * @type array | ||
66 | */ | ||
67 | public $info_tag_transform = array(); | ||
68 | |||
69 | /** | ||
70 | * Indexed list of HTMLPurifier_AttrTransform to be performed before validation. | ||
71 | * @type HTMLPurifier_AttrTransform[] | ||
72 | */ | ||
73 | public $info_attr_transform_pre = array(); | ||
74 | |||
75 | /** | ||
76 | * Indexed list of HTMLPurifier_AttrTransform to be performed after validation. | ||
77 | * @type HTMLPurifier_AttrTransform[] | ||
78 | */ | ||
79 | public $info_attr_transform_post = array(); | ||
80 | |||
81 | /** | ||
82 | * Nested lookup array of content set name (Block, Inline) to | ||
83 | * element name to whether or not it belongs in that content set. | ||
84 | * @type array | ||
85 | */ | ||
86 | public $info_content_sets = array(); | ||
87 | |||
88 | /** | ||
89 | * Indexed list of HTMLPurifier_Injector to be used. | ||
90 | * @type HTMLPurifier_Injector[] | ||
91 | */ | ||
92 | public $info_injector = array(); | ||
93 | |||
94 | /** | ||
95 | * Doctype object | ||
96 | * @type HTMLPurifier_Doctype | ||
97 | */ | ||
98 | public $doctype; | ||
99 | |||
100 | |||
101 | |||
102 | // RAW CUSTOMIZATION STUFF -------------------------------------------- | ||
103 | |||
104 | /** | ||
105 | * Adds a custom attribute to a pre-existing element | ||
106 | * @note This is strictly convenience, and does not have a corresponding | ||
107 | * method in HTMLPurifier_HTMLModule | ||
108 | * @param string $element_name Element name to add attribute to | ||
109 | * @param string $attr_name Name of attribute | ||
110 | * @param mixed $def Attribute definition, can be string or object, see | ||
111 | * HTMLPurifier_AttrTypes for details | ||
112 | */ | ||
113 | public function addAttribute($element_name, $attr_name, $def) | ||
114 | { | ||
115 | $module = $this->getAnonymousModule(); | ||
116 | if (!isset($module->info[$element_name])) { | ||
117 | $element = $module->addBlankElement($element_name); | ||
118 | } else { | ||
119 | $element = $module->info[$element_name]; | ||
120 | } | ||
121 | $element->attr[$attr_name] = $def; | ||
122 | } | ||
123 | |||
124 | /** | ||
125 | * Adds a custom element to your HTML definition | ||
126 | * @see HTMLPurifier_HTMLModule::addElement() for detailed | ||
127 | * parameter and return value descriptions. | ||
128 | */ | ||
129 | public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) | ||
130 | { | ||
131 | $module = $this->getAnonymousModule(); | ||
132 | // assume that if the user is calling this, the element | ||
133 | // is safe. This may not be a good idea | ||
134 | $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); | ||
135 | return $element; | ||
136 | } | ||
137 | |||
138 | /** | ||
139 | * Adds a blank element to your HTML definition, for overriding | ||
140 | * existing behavior | ||
141 | * @param string $element_name | ||
142 | * @return HTMLPurifier_ElementDef | ||
143 | * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed | ||
144 | * parameter and return value descriptions. | ||
145 | */ | ||
146 | public function addBlankElement($element_name) | ||
147 | { | ||
148 | $module = $this->getAnonymousModule(); | ||
149 | $element = $module->addBlankElement($element_name); | ||
150 | return $element; | ||
151 | } | ||
152 | |||
153 | /** | ||
154 | * Retrieves a reference to the anonymous module, so you can | ||
155 | * bust out advanced features without having to make your own | ||
156 | * module. | ||
157 | * @return HTMLPurifier_HTMLModule | ||
158 | */ | ||
159 | public function getAnonymousModule() | ||
160 | { | ||
161 | if (!$this->_anonModule) { | ||
162 | $this->_anonModule = new HTMLPurifier_HTMLModule(); | ||
163 | $this->_anonModule->name = 'Anonymous'; | ||
164 | } | ||
165 | return $this->_anonModule; | ||
166 | } | ||
167 | |||
168 | private $_anonModule = null; | ||
169 | |||
170 | // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- | ||
171 | |||
172 | /** | ||
173 | * @type string | ||
174 | */ | ||
175 | public $type = 'HTML'; | ||
176 | |||
177 | /** | ||
178 | * @type HTMLPurifier_HTMLModuleManager | ||
179 | */ | ||
180 | public $manager; | ||
181 | |||
182 | /** | ||
183 | * Performs low-cost, preliminary initialization. | ||
184 | */ | ||
185 | public function __construct() | ||
186 | { | ||
187 | $this->manager = new HTMLPurifier_HTMLModuleManager(); | ||
188 | } | ||
189 | |||
190 | /** | ||
191 | * @param HTMLPurifier_Config $config | ||
192 | */ | ||
193 | protected function doSetup($config) | ||
194 | { | ||
195 | $this->processModules($config); | ||
196 | $this->setupConfigStuff($config); | ||
197 | unset($this->manager); | ||
198 | |||
199 | // cleanup some of the element definitions | ||
200 | foreach ($this->info as $k => $v) { | ||
201 | unset($this->info[$k]->content_model); | ||
202 | unset($this->info[$k]->content_model_type); | ||
203 | } | ||
204 | } | ||
205 | |||
206 | /** | ||
207 | * Extract out the information from the manager | ||
208 | * @param HTMLPurifier_Config $config | ||
209 | */ | ||
210 | protected function processModules($config) | ||
211 | { | ||
212 | if ($this->_anonModule) { | ||
213 | // for user specific changes | ||
214 | // this is late-loaded so we don't have to deal with PHP4 | ||
215 | // reference wonky-ness | ||
216 | $this->manager->addModule($this->_anonModule); | ||
217 | unset($this->_anonModule); | ||
218 | } | ||
219 | |||
220 | $this->manager->setup($config); | ||
221 | $this->doctype = $this->manager->doctype; | ||
222 | |||
223 | foreach ($this->manager->modules as $module) { | ||
224 | foreach ($module->info_tag_transform as $k => $v) { | ||
225 | if ($v === false) { | ||
226 | unset($this->info_tag_transform[$k]); | ||
227 | } else { | ||
228 | $this->info_tag_transform[$k] = $v; | ||
229 | } | ||
230 | } | ||
231 | foreach ($module->info_attr_transform_pre as $k => $v) { | ||
232 | if ($v === false) { | ||
233 | unset($this->info_attr_transform_pre[$k]); | ||
234 | } else { | ||
235 | $this->info_attr_transform_pre[$k] = $v; | ||
236 | } | ||
237 | } | ||
238 | foreach ($module->info_attr_transform_post as $k => $v) { | ||
239 | if ($v === false) { | ||
240 | unset($this->info_attr_transform_post[$k]); | ||
241 | } else { | ||
242 | $this->info_attr_transform_post[$k] = $v; | ||
243 | } | ||
244 | } | ||
245 | foreach ($module->info_injector as $k => $v) { | ||
246 | if ($v === false) { | ||
247 | unset($this->info_injector[$k]); | ||
248 | } else { | ||
249 | $this->info_injector[$k] = $v; | ||
250 | } | ||
251 | } | ||
252 | } | ||
253 | $this->info = $this->manager->getElements(); | ||
254 | $this->info_content_sets = $this->manager->contentSets->lookup; | ||
255 | } | ||
256 | |||
257 | /** | ||
258 | * Sets up stuff based on config. We need a better way of doing this. | ||
259 | * @param HTMLPurifier_Config $config | ||
260 | */ | ||
261 | protected function setupConfigStuff($config) | ||
262 | { | ||
263 | $block_wrapper = $config->get('HTML.BlockWrapper'); | ||
264 | if (isset($this->info_content_sets['Block'][$block_wrapper])) { | ||
265 | $this->info_block_wrapper = $block_wrapper; | ||
266 | } else { | ||
267 | trigger_error( | ||
268 | 'Cannot use non-block element as block wrapper', | ||
269 | E_USER_ERROR | ||
270 | ); | ||
271 | } | ||
272 | |||
273 | $parent = $config->get('HTML.Parent'); | ||
274 | $def = $this->manager->getElement($parent, true); | ||
275 | if ($def) { | ||
276 | $this->info_parent = $parent; | ||
277 | $this->info_parent_def = $def; | ||
278 | } else { | ||
279 | trigger_error( | ||
280 | 'Cannot use unrecognized element as parent', | ||
281 | E_USER_ERROR | ||
282 | ); | ||
283 | $this->info_parent_def = $this->manager->getElement($this->info_parent, true); | ||
284 | } | ||
285 | |||
286 | // support template text | ||
287 | $support = "(for information on implementing this, see the support forums) "; | ||
288 | |||
289 | // setup allowed elements ----------------------------------------- | ||
290 | |||
291 | $allowed_elements = $config->get('HTML.AllowedElements'); | ||
292 | $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early | ||
293 | |||
294 | if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { | ||
295 | $allowed = $config->get('HTML.Allowed'); | ||
296 | if (is_string($allowed)) { | ||
297 | list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); | ||
298 | } | ||
299 | } | ||
300 | |||
301 | if (is_array($allowed_elements)) { | ||
302 | foreach ($this->info as $name => $d) { | ||
303 | if (!isset($allowed_elements[$name])) { | ||
304 | unset($this->info[$name]); | ||
305 | } | ||
306 | unset($allowed_elements[$name]); | ||
307 | } | ||
308 | // emit errors | ||
309 | foreach ($allowed_elements as $element => $d) { | ||
310 | $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! | ||
311 | trigger_error("Element '$element' is not supported $support", E_USER_WARNING); | ||
312 | } | ||
313 | } | ||
314 | |||
315 | // setup allowed attributes --------------------------------------- | ||
316 | |||
317 | $allowed_attributes_mutable = $allowed_attributes; // by copy! | ||
318 | if (is_array($allowed_attributes)) { | ||
319 | // This actually doesn't do anything, since we went away from | ||
320 | // global attributes. It's possible that userland code uses | ||
321 | // it, but HTMLModuleManager doesn't! | ||
322 | foreach ($this->info_global_attr as $attr => $x) { | ||
323 | $keys = array($attr, "*@$attr", "*.$attr"); | ||
324 | $delete = true; | ||
325 | foreach ($keys as $key) { | ||
326 | if ($delete && isset($allowed_attributes[$key])) { | ||
327 | $delete = false; | ||
328 | } | ||
329 | if (isset($allowed_attributes_mutable[$key])) { | ||
330 | unset($allowed_attributes_mutable[$key]); | ||
331 | } | ||
332 | } | ||
333 | if ($delete) { | ||
334 | unset($this->info_global_attr[$attr]); | ||
335 | } | ||
336 | } | ||
337 | |||
338 | foreach ($this->info as $tag => $info) { | ||
339 | foreach ($info->attr as $attr => $x) { | ||
340 | $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); | ||
341 | $delete = true; | ||
342 | foreach ($keys as $key) { | ||
343 | if ($delete && isset($allowed_attributes[$key])) { | ||
344 | $delete = false; | ||
345 | } | ||
346 | if (isset($allowed_attributes_mutable[$key])) { | ||
347 | unset($allowed_attributes_mutable[$key]); | ||
348 | } | ||
349 | } | ||
350 | if ($delete) { | ||
351 | if ($this->info[$tag]->attr[$attr]->required) { | ||
352 | trigger_error( | ||
353 | "Required attribute '$attr' in element '$tag' " . | ||
354 | "was not allowed, which means '$tag' will not be allowed either", | ||
355 | E_USER_WARNING | ||
356 | ); | ||
357 | } | ||
358 | unset($this->info[$tag]->attr[$attr]); | ||
359 | } | ||
360 | } | ||
361 | } | ||
362 | // emit errors | ||
363 | foreach ($allowed_attributes_mutable as $elattr => $d) { | ||
364 | $bits = preg_split('/[.@]/', $elattr, 2); | ||
365 | $c = count($bits); | ||
366 | switch ($c) { | ||
367 | case 2: | ||
368 | if ($bits[0] !== '*') { | ||
369 | $element = htmlspecialchars($bits[0]); | ||
370 | $attribute = htmlspecialchars($bits[1]); | ||
371 | if (!isset($this->info[$element])) { | ||
372 | trigger_error( | ||
373 | "Cannot allow attribute '$attribute' if element " . | ||
374 | "'$element' is not allowed/supported $support" | ||
375 | ); | ||
376 | } else { | ||
377 | trigger_error( | ||
378 | "Attribute '$attribute' in element '$element' not supported $support", | ||
379 | E_USER_WARNING | ||
380 | ); | ||
381 | } | ||
382 | break; | ||
383 | } | ||
384 | // otherwise fall through | ||
385 | case 1: | ||
386 | $attribute = htmlspecialchars($bits[0]); | ||
387 | trigger_error( | ||
388 | "Global attribute '$attribute' is not ". | ||
389 | "supported in any elements $support", | ||
390 | E_USER_WARNING | ||
391 | ); | ||
392 | break; | ||
393 | } | ||
394 | } | ||
395 | } | ||
396 | |||
397 | // setup forbidden elements --------------------------------------- | ||
398 | |||
399 | $forbidden_elements = $config->get('HTML.ForbiddenElements'); | ||
400 | $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); | ||
401 | |||
402 | foreach ($this->info as $tag => $info) { | ||
403 | if (isset($forbidden_elements[$tag])) { | ||
404 | unset($this->info[$tag]); | ||
405 | continue; | ||
406 | } | ||
407 | foreach ($info->attr as $attr => $x) { | ||
408 | if (isset($forbidden_attributes["$tag@$attr"]) || | ||
409 | isset($forbidden_attributes["*@$attr"]) || | ||
410 | isset($forbidden_attributes[$attr]) | ||
411 | ) { | ||
412 | unset($this->info[$tag]->attr[$attr]); | ||
413 | continue; | ||
414 | } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually | ||
415 | // $tag.$attr are not user supplied, so no worries! | ||
416 | trigger_error( | ||
417 | "Error with $tag.$attr: tag.attr syntax not supported for " . | ||
418 | "HTML.ForbiddenAttributes; use tag@attr instead", | ||
419 | E_USER_WARNING | ||
420 | ); | ||
421 | } | ||
422 | } | ||
423 | } | ||
424 | foreach ($forbidden_attributes as $key => $v) { | ||
425 | if (strlen($key) < 2) { | ||
426 | continue; | ||
427 | } | ||
428 | if ($key[0] != '*') { | ||
429 | continue; | ||
430 | } | ||
431 | if ($key[1] == '.') { | ||
432 | trigger_error( | ||
433 | "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", | ||
434 | E_USER_WARNING | ||
435 | ); | ||
436 | } | ||
437 | } | ||
438 | |||
439 | // setup injectors ----------------------------------------------------- | ||
440 | foreach ($this->info_injector as $i => $injector) { | ||
441 | if ($injector->checkNeeded($config) !== false) { | ||
442 | // remove injector that does not have it's required | ||
443 | // elements/attributes present, and is thus not needed. | ||
444 | unset($this->info_injector[$i]); | ||
445 | } | ||
446 | } | ||
447 | } | ||
448 | |||
449 | /** | ||
450 | * Parses a TinyMCE-flavored Allowed Elements and Attributes list into | ||
451 | * separate lists for processing. Format is element[attr1|attr2],element2... | ||
452 | * @warning Although it's largely drawn from TinyMCE's implementation, | ||
453 | * it is different, and you'll probably have to modify your lists | ||
454 | * @param array $list String list to parse | ||
455 | * @return array | ||
456 | * @todo Give this its own class, probably static interface | ||
457 | */ | ||
458 | public function parseTinyMCEAllowedList($list) | ||
459 | { | ||
460 | $list = str_replace(array(' ', "\t"), '', $list); | ||
461 | |||
462 | $elements = array(); | ||
463 | $attributes = array(); | ||
464 | |||
465 | $chunks = preg_split('/(,|[\n\r]+)/', $list); | ||
466 | foreach ($chunks as $chunk) { | ||
467 | if (empty($chunk)) { | ||
468 | continue; | ||
469 | } | ||
470 | // remove TinyMCE element control characters | ||
471 | if (!strpos($chunk, '[')) { | ||
472 | $element = $chunk; | ||
473 | $attr = false; | ||
474 | } else { | ||
475 | list($element, $attr) = explode('[', $chunk); | ||
476 | } | ||
477 | if ($element !== '*') { | ||
478 | $elements[$element] = true; | ||
479 | } | ||
480 | if (!$attr) { | ||
481 | continue; | ||
482 | } | ||
483 | $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] | ||
484 | $attr = explode('|', $attr); | ||
485 | foreach ($attr as $key) { | ||
486 | $attributes["$element.$key"] = true; | ||
487 | } | ||
488 | } | ||
489 | return array($elements, $attributes); | ||
490 | } | ||
491 | } | ||
492 | |||
493 | // vim: et sw=4 sts=4 | ||