]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/htmlpurifier/HTMLPurifier/HTMLDefinition.php
remove autoload section in composer.json
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / HTMLDefinition.php
CommitLineData
d4949327
NL
1<?php\r
2\r
3/**\r
4 * Definition of the purified HTML that describes allowed children,\r
5 * attributes, and many other things.\r
6 *\r
7 * Conventions:\r
8 *\r
9 * All member variables that are prefixed with info\r
10 * (including the main $info array) are used by HTML Purifier internals\r
11 * and should not be directly edited when customizing the HTMLDefinition.\r
12 * They can usually be set via configuration directives or custom\r
13 * modules.\r
14 *\r
15 * On the other hand, member variables without the info prefix are used\r
16 * internally by the HTMLDefinition and MUST NOT be used by other HTML\r
17 * Purifier internals. Many of them, however, are public, and may be\r
18 * edited by userspace code to tweak the behavior of HTMLDefinition.\r
19 *\r
20 * @note This class is inspected by Printer_HTMLDefinition; please\r
21 * update that class if things here change.\r
22 *\r
23 * @warning Directives that change this object's structure must be in\r
24 * the HTML or Attr namespace!\r
25 */\r
26class HTMLPurifier_HTMLDefinition extends HTMLPurifier_Definition\r
27{\r
28\r
29 // FULLY-PUBLIC VARIABLES ---------------------------------------------\r
30\r
31 /**\r
32 * Associative array of element names to HTMLPurifier_ElementDef.\r
33 * @type HTMLPurifier_ElementDef[]\r
34 */\r
35 public $info = array();\r
36\r
37 /**\r
38 * Associative array of global attribute name to attribute definition.\r
39 * @type array\r
40 */\r
41 public $info_global_attr = array();\r
42\r
43 /**\r
44 * String name of parent element HTML will be going into.\r
45 * @type string\r
46 */\r
47 public $info_parent = 'div';\r
48\r
49 /**\r
50 * Definition for parent element, allows parent element to be a\r
51 * tag that's not allowed inside the HTML fragment.\r
52 * @type HTMLPurifier_ElementDef\r
53 */\r
54 public $info_parent_def;\r
55\r
56 /**\r
57 * String name of element used to wrap inline elements in block context.\r
58 * @type string\r
59 * @note This is rarely used except for BLOCKQUOTEs in strict mode\r
60 */\r
61 public $info_block_wrapper = 'p';\r
62\r
63 /**\r
64 * Associative array of deprecated tag name to HTMLPurifier_TagTransform.\r
65 * @type array\r
66 */\r
67 public $info_tag_transform = array();\r
68\r
69 /**\r
70 * Indexed list of HTMLPurifier_AttrTransform to be performed before validation.\r
71 * @type HTMLPurifier_AttrTransform[]\r
72 */\r
73 public $info_attr_transform_pre = array();\r
74\r
75 /**\r
76 * Indexed list of HTMLPurifier_AttrTransform to be performed after validation.\r
77 * @type HTMLPurifier_AttrTransform[]\r
78 */\r
79 public $info_attr_transform_post = array();\r
80\r
81 /**\r
82 * Nested lookup array of content set name (Block, Inline) to\r
83 * element name to whether or not it belongs in that content set.\r
84 * @type array\r
85 */\r
86 public $info_content_sets = array();\r
87\r
88 /**\r
89 * Indexed list of HTMLPurifier_Injector to be used.\r
90 * @type HTMLPurifier_Injector[]\r
91 */\r
92 public $info_injector = array();\r
93\r
94 /**\r
95 * Doctype object\r
96 * @type HTMLPurifier_Doctype\r
97 */\r
98 public $doctype;\r
99\r
100\r
101\r
102 // RAW CUSTOMIZATION STUFF --------------------------------------------\r
103\r
104 /**\r
105 * Adds a custom attribute to a pre-existing element\r
106 * @note This is strictly convenience, and does not have a corresponding\r
107 * method in HTMLPurifier_HTMLModule\r
108 * @param string $element_name Element name to add attribute to\r
109 * @param string $attr_name Name of attribute\r
110 * @param mixed $def Attribute definition, can be string or object, see\r
111 * HTMLPurifier_AttrTypes for details\r
112 */\r
113 public function addAttribute($element_name, $attr_name, $def)\r
114 {\r
115 $module = $this->getAnonymousModule();\r
116 if (!isset($module->info[$element_name])) {\r
117 $element = $module->addBlankElement($element_name);\r
118 } else {\r
119 $element = $module->info[$element_name];\r
120 }\r
121 $element->attr[$attr_name] = $def;\r
122 }\r
123\r
124 /**\r
125 * Adds a custom element to your HTML definition\r
126 * @see HTMLPurifier_HTMLModule::addElement() for detailed\r
127 * parameter and return value descriptions.\r
128 */\r
129 public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array())\r
130 {\r
131 $module = $this->getAnonymousModule();\r
132 // assume that if the user is calling this, the element\r
133 // is safe. This may not be a good idea\r
134 $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes);\r
135 return $element;\r
136 }\r
137\r
138 /**\r
139 * Adds a blank element to your HTML definition, for overriding\r
140 * existing behavior\r
141 * @param string $element_name\r
142 * @return HTMLPurifier_ElementDef\r
143 * @see HTMLPurifier_HTMLModule::addBlankElement() for detailed\r
144 * parameter and return value descriptions.\r
145 */\r
146 public function addBlankElement($element_name)\r
147 {\r
148 $module = $this->getAnonymousModule();\r
149 $element = $module->addBlankElement($element_name);\r
150 return $element;\r
151 }\r
152\r
153 /**\r
154 * Retrieves a reference to the anonymous module, so you can\r
155 * bust out advanced features without having to make your own\r
156 * module.\r
157 * @return HTMLPurifier_HTMLModule\r
158 */\r
159 public function getAnonymousModule()\r
160 {\r
161 if (!$this->_anonModule) {\r
162 $this->_anonModule = new HTMLPurifier_HTMLModule();\r
163 $this->_anonModule->name = 'Anonymous';\r
164 }\r
165 return $this->_anonModule;\r
166 }\r
167\r
168 private $_anonModule = null;\r
169\r
170 // PUBLIC BUT INTERNAL VARIABLES --------------------------------------\r
171\r
172 /**\r
173 * @type string\r
174 */\r
175 public $type = 'HTML';\r
176\r
177 /**\r
178 * @type HTMLPurifier_HTMLModuleManager\r
179 */\r
180 public $manager;\r
181\r
182 /**\r
183 * Performs low-cost, preliminary initialization.\r
184 */\r
185 public function __construct()\r
186 {\r
187 $this->manager = new HTMLPurifier_HTMLModuleManager();\r
188 }\r
189\r
190 /**\r
191 * @param HTMLPurifier_Config $config\r
192 */\r
193 protected function doSetup($config)\r
194 {\r
195 $this->processModules($config);\r
196 $this->setupConfigStuff($config);\r
197 unset($this->manager);\r
198\r
199 // cleanup some of the element definitions\r
200 foreach ($this->info as $k => $v) {\r
201 unset($this->info[$k]->content_model);\r
202 unset($this->info[$k]->content_model_type);\r
203 }\r
204 }\r
205\r
206 /**\r
207 * Extract out the information from the manager\r
208 * @param HTMLPurifier_Config $config\r
209 */\r
210 protected function processModules($config)\r
211 {\r
212 if ($this->_anonModule) {\r
213 // for user specific changes\r
214 // this is late-loaded so we don't have to deal with PHP4\r
215 // reference wonky-ness\r
216 $this->manager->addModule($this->_anonModule);\r
217 unset($this->_anonModule);\r
218 }\r
219\r
220 $this->manager->setup($config);\r
221 $this->doctype = $this->manager->doctype;\r
222\r
223 foreach ($this->manager->modules as $module) {\r
224 foreach ($module->info_tag_transform as $k => $v) {\r
225 if ($v === false) {\r
226 unset($this->info_tag_transform[$k]);\r
227 } else {\r
228 $this->info_tag_transform[$k] = $v;\r
229 }\r
230 }\r
231 foreach ($module->info_attr_transform_pre as $k => $v) {\r
232 if ($v === false) {\r
233 unset($this->info_attr_transform_pre[$k]);\r
234 } else {\r
235 $this->info_attr_transform_pre[$k] = $v;\r
236 }\r
237 }\r
238 foreach ($module->info_attr_transform_post as $k => $v) {\r
239 if ($v === false) {\r
240 unset($this->info_attr_transform_post[$k]);\r
241 } else {\r
242 $this->info_attr_transform_post[$k] = $v;\r
243 }\r
244 }\r
245 foreach ($module->info_injector as $k => $v) {\r
246 if ($v === false) {\r
247 unset($this->info_injector[$k]);\r
248 } else {\r
249 $this->info_injector[$k] = $v;\r
250 }\r
251 }\r
252 }\r
253 $this->info = $this->manager->getElements();\r
254 $this->info_content_sets = $this->manager->contentSets->lookup;\r
255 }\r
256\r
257 /**\r
258 * Sets up stuff based on config. We need a better way of doing this.\r
259 * @param HTMLPurifier_Config $config\r
260 */\r
261 protected function setupConfigStuff($config)\r
262 {\r
263 $block_wrapper = $config->get('HTML.BlockWrapper');\r
264 if (isset($this->info_content_sets['Block'][$block_wrapper])) {\r
265 $this->info_block_wrapper = $block_wrapper;\r
266 } else {\r
267 trigger_error(\r
268 'Cannot use non-block element as block wrapper',\r
269 E_USER_ERROR\r
270 );\r
271 }\r
272\r
273 $parent = $config->get('HTML.Parent');\r
274 $def = $this->manager->getElement($parent, true);\r
275 if ($def) {\r
276 $this->info_parent = $parent;\r
277 $this->info_parent_def = $def;\r
278 } else {\r
279 trigger_error(\r
280 'Cannot use unrecognized element as parent',\r
281 E_USER_ERROR\r
282 );\r
283 $this->info_parent_def = $this->manager->getElement($this->info_parent, true);\r
284 }\r
285\r
286 // support template text\r
287 $support = "(for information on implementing this, see the support forums) ";\r
288\r
289 // setup allowed elements -----------------------------------------\r
290\r
291 $allowed_elements = $config->get('HTML.AllowedElements');\r
292 $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early\r
293\r
294 if (!is_array($allowed_elements) && !is_array($allowed_attributes)) {\r
295 $allowed = $config->get('HTML.Allowed');\r
296 if (is_string($allowed)) {\r
297 list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed);\r
298 }\r
299 }\r
300\r
301 if (is_array($allowed_elements)) {\r
302 foreach ($this->info as $name => $d) {\r
303 if (!isset($allowed_elements[$name])) {\r
304 unset($this->info[$name]);\r
305 }\r
306 unset($allowed_elements[$name]);\r
307 }\r
308 // emit errors\r
309 foreach ($allowed_elements as $element => $d) {\r
310 $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful!\r
311 trigger_error("Element '$element' is not supported $support", E_USER_WARNING);\r
312 }\r
313 }\r
314\r
315 // setup allowed attributes ---------------------------------------\r
316\r
317 $allowed_attributes_mutable = $allowed_attributes; // by copy!\r
318 if (is_array($allowed_attributes)) {\r
319 // This actually doesn't do anything, since we went away from\r
320 // global attributes. It's possible that userland code uses\r
321 // it, but HTMLModuleManager doesn't!\r
322 foreach ($this->info_global_attr as $attr => $x) {\r
323 $keys = array($attr, "*@$attr", "*.$attr");\r
324 $delete = true;\r
325 foreach ($keys as $key) {\r
326 if ($delete && isset($allowed_attributes[$key])) {\r
327 $delete = false;\r
328 }\r
329 if (isset($allowed_attributes_mutable[$key])) {\r
330 unset($allowed_attributes_mutable[$key]);\r
331 }\r
332 }\r
333 if ($delete) {\r
334 unset($this->info_global_attr[$attr]);\r
335 }\r
336 }\r
337\r
338 foreach ($this->info as $tag => $info) {\r
339 foreach ($info->attr as $attr => $x) {\r
340 $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr");\r
341 $delete = true;\r
342 foreach ($keys as $key) {\r
343 if ($delete && isset($allowed_attributes[$key])) {\r
344 $delete = false;\r
345 }\r
346 if (isset($allowed_attributes_mutable[$key])) {\r
347 unset($allowed_attributes_mutable[$key]);\r
348 }\r
349 }\r
350 if ($delete) {\r
351 if ($this->info[$tag]->attr[$attr]->required) {\r
352 trigger_error(\r
353 "Required attribute '$attr' in element '$tag' " .\r
354 "was not allowed, which means '$tag' will not be allowed either",\r
355 E_USER_WARNING\r
356 );\r
357 }\r
358 unset($this->info[$tag]->attr[$attr]);\r
359 }\r
360 }\r
361 }\r
362 // emit errors\r
363 foreach ($allowed_attributes_mutable as $elattr => $d) {\r
364 $bits = preg_split('/[.@]/', $elattr, 2);\r
365 $c = count($bits);\r
366 switch ($c) {\r
367 case 2:\r
368 if ($bits[0] !== '*') {\r
369 $element = htmlspecialchars($bits[0]);\r
370 $attribute = htmlspecialchars($bits[1]);\r
371 if (!isset($this->info[$element])) {\r
372 trigger_error(\r
373 "Cannot allow attribute '$attribute' if element " .\r
374 "'$element' is not allowed/supported $support"\r
375 );\r
376 } else {\r
377 trigger_error(\r
378 "Attribute '$attribute' in element '$element' not supported $support",\r
379 E_USER_WARNING\r
380 );\r
381 }\r
382 break;\r
383 }\r
384 // otherwise fall through\r
385 case 1:\r
386 $attribute = htmlspecialchars($bits[0]);\r
387 trigger_error(\r
388 "Global attribute '$attribute' is not ".\r
389 "supported in any elements $support",\r
390 E_USER_WARNING\r
391 );\r
392 break;\r
393 }\r
394 }\r
395 }\r
396\r
397 // setup forbidden elements ---------------------------------------\r
398\r
399 $forbidden_elements = $config->get('HTML.ForbiddenElements');\r
400 $forbidden_attributes = $config->get('HTML.ForbiddenAttributes');\r
401\r
402 foreach ($this->info as $tag => $info) {\r
403 if (isset($forbidden_elements[$tag])) {\r
404 unset($this->info[$tag]);\r
405 continue;\r
406 }\r
407 foreach ($info->attr as $attr => $x) {\r
408 if (isset($forbidden_attributes["$tag@$attr"]) ||\r
409 isset($forbidden_attributes["*@$attr"]) ||\r
410 isset($forbidden_attributes[$attr])\r
411 ) {\r
412 unset($this->info[$tag]->attr[$attr]);\r
413 continue;\r
414 } elseif (isset($forbidden_attributes["$tag.$attr"])) { // this segment might get removed eventually\r
415 // $tag.$attr are not user supplied, so no worries!\r
416 trigger_error(\r
417 "Error with $tag.$attr: tag.attr syntax not supported for " .\r
418 "HTML.ForbiddenAttributes; use tag@attr instead",\r
419 E_USER_WARNING\r
420 );\r
421 }\r
422 }\r
423 }\r
424 foreach ($forbidden_attributes as $key => $v) {\r
425 if (strlen($key) < 2) {\r
426 continue;\r
427 }\r
428 if ($key[0] != '*') {\r
429 continue;\r
430 }\r
431 if ($key[1] == '.') {\r
432 trigger_error(\r
433 "Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead",\r
434 E_USER_WARNING\r
435 );\r
436 }\r
437 }\r
438\r
439 // setup injectors -----------------------------------------------------\r
440 foreach ($this->info_injector as $i => $injector) {\r
441 if ($injector->checkNeeded($config) !== false) {\r
442 // remove injector that does not have it's required\r
443 // elements/attributes present, and is thus not needed.\r
444 unset($this->info_injector[$i]);\r
445 }\r
446 }\r
447 }\r
448\r
449 /**\r
450 * Parses a TinyMCE-flavored Allowed Elements and Attributes list into\r
451 * separate lists for processing. Format is element[attr1|attr2],element2...\r
452 * @warning Although it's largely drawn from TinyMCE's implementation,\r
453 * it is different, and you'll probably have to modify your lists\r
454 * @param array $list String list to parse\r
455 * @return array\r
456 * @todo Give this its own class, probably static interface\r
457 */\r
458 public function parseTinyMCEAllowedList($list)\r
459 {\r
460 $list = str_replace(array(' ', "\t"), '', $list);\r
461\r
462 $elements = array();\r
463 $attributes = array();\r
464\r
465 $chunks = preg_split('/(,|[\n\r]+)/', $list);\r
466 foreach ($chunks as $chunk) {\r
467 if (empty($chunk)) {\r
468 continue;\r
469 }\r
470 // remove TinyMCE element control characters\r
471 if (!strpos($chunk, '[')) {\r
472 $element = $chunk;\r
473 $attr = false;\r
474 } else {\r
475 list($element, $attr) = explode('[', $chunk);\r
476 }\r
477 if ($element !== '*') {\r
478 $elements[$element] = true;\r
479 }\r
480 if (!$attr) {\r
481 continue;\r
482 }\r
483 $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ]\r
484 $attr = explode('|', $attr);\r
485 foreach ($attr as $key) {\r
486 $attributes["$element.$key"] = true;\r
487 }\r
488 }\r
489 return array($elements, $attributes);\r
490 }\r
491}\r
492\r
493// vim: et sw=4 sts=4\r