]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php
remove autoload section in composer.json
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / EntityParser.php
CommitLineData
d4949327
NL
1<?php\r
2\r
3// if want to implement error collecting here, we'll need to use some sort\r
4// of global data (probably trigger_error) because it's impossible to pass\r
5// $config or $context to the callback functions.\r
6\r
7/**\r
8 * Handles referencing and derefencing character entities\r
9 */\r
10class HTMLPurifier_EntityParser\r
11{\r
12\r
13 /**\r
14 * Reference to entity lookup table.\r
15 * @type HTMLPurifier_EntityLookup\r
16 */\r
17 protected $_entity_lookup;\r
18\r
19 /**\r
20 * Callback regex string for parsing entities.\r
21 * @type string\r
22 */\r
23 protected $_substituteEntitiesRegex =\r
24 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';\r
25 // 1. hex 2. dec 3. string (XML style)\r
26\r
27 /**\r
28 * Decimal to parsed string conversion table for special entities.\r
29 * @type array\r
30 */\r
31 protected $_special_dec2str =\r
32 array(\r
33 34 => '"',\r
34 38 => '&',\r
35 39 => "'",\r
36 60 => '<',\r
37 62 => '>'\r
38 );\r
39\r
40 /**\r
41 * Stripped entity names to decimal conversion table for special entities.\r
42 * @type array\r
43 */\r
44 protected $_special_ent2dec =\r
45 array(\r
46 'quot' => 34,\r
47 'amp' => 38,\r
48 'lt' => 60,\r
49 'gt' => 62\r
50 );\r
51\r
52 /**\r
53 * Substitutes non-special entities with their parsed equivalents. Since\r
54 * running this whenever you have parsed character is t3h 5uck, we run\r
55 * it before everything else.\r
56 *\r
57 * @param string $string String to have non-special entities parsed.\r
58 * @return string Parsed string.\r
59 */\r
60 public function substituteNonSpecialEntities($string)\r
61 {\r
62 // it will try to detect missing semicolons, but don't rely on it\r
63 return preg_replace_callback(\r
64 $this->_substituteEntitiesRegex,\r
65 array($this, 'nonSpecialEntityCallback'),\r
66 $string\r
67 );\r
68 }\r
69\r
70 /**\r
71 * Callback function for substituteNonSpecialEntities() that does the work.\r
72 *\r
73 * @param array $matches PCRE matches array, with 0 the entire match, and\r
74 * either index 1, 2 or 3 set with a hex value, dec value,\r
75 * or string (respectively).\r
76 * @return string Replacement string.\r
77 */\r
78\r
79 protected function nonSpecialEntityCallback($matches)\r
80 {\r
81 // replaces all but big five\r
82 $entity = $matches[0];\r
83 $is_num = (@$matches[0][1] === '#');\r
84 if ($is_num) {\r
85 $is_hex = (@$entity[2] === 'x');\r
86 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];\r
87 // abort for special characters\r
88 if (isset($this->_special_dec2str[$code])) {\r
89 return $entity;\r
90 }\r
91 return HTMLPurifier_Encoder::unichr($code);\r
92 } else {\r
93 if (isset($this->_special_ent2dec[$matches[3]])) {\r
94 return $entity;\r
95 }\r
96 if (!$this->_entity_lookup) {\r
97 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();\r
98 }\r
99 if (isset($this->_entity_lookup->table[$matches[3]])) {\r
100 return $this->_entity_lookup->table[$matches[3]];\r
101 } else {\r
102 return $entity;\r
103 }\r
104 }\r
105 }\r
106\r
107 /**\r
108 * Substitutes only special entities with their parsed equivalents.\r
109 *\r
110 * @notice We try to avoid calling this function because otherwise, it\r
111 * would have to be called a lot (for every parsed section).\r
112 *\r
113 * @param string $string String to have non-special entities parsed.\r
114 * @return string Parsed string.\r
115 */\r
116 public function substituteSpecialEntities($string)\r
117 {\r
118 return preg_replace_callback(\r
119 $this->_substituteEntitiesRegex,\r
120 array($this, 'specialEntityCallback'),\r
121 $string\r
122 );\r
123 }\r
124\r
125 /**\r
126 * Callback function for substituteSpecialEntities() that does the work.\r
127 *\r
128 * This callback has same syntax as nonSpecialEntityCallback().\r
129 *\r
130 * @param array $matches PCRE-style matches array, with 0 the entire match, and\r
131 * either index 1, 2 or 3 set with a hex value, dec value,\r
132 * or string (respectively).\r
133 * @return string Replacement string.\r
134 */\r
135 protected function specialEntityCallback($matches)\r
136 {\r
137 $entity = $matches[0];\r
138 $is_num = (@$matches[0][1] === '#');\r
139 if ($is_num) {\r
140 $is_hex = (@$entity[2] === 'x');\r
141 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];\r
142 return isset($this->_special_dec2str[$int]) ?\r
143 $this->_special_dec2str[$int] :\r
144 $entity;\r
145 } else {\r
146 return isset($this->_special_ent2dec[$matches[3]]) ?\r
147 $this->_special_ent2dec[$matches[3]] :\r
148 $entity;\r
149 }\r
150 }\r
151}\r
152\r
153// vim: et sw=4 sts=4\r