]>
Commit | Line | Data |
---|---|---|
1 | <?php\r | |
2 | \r | |
3 | // if want to implement error collecting here, we'll need to use some sort\r | |
4 | // of global data (probably trigger_error) because it's impossible to pass\r | |
5 | // $config or $context to the callback functions.\r | |
6 | \r | |
7 | /**\r | |
8 | * Handles referencing and derefencing character entities\r | |
9 | */\r | |
10 | class HTMLPurifier_EntityParser\r | |
11 | {\r | |
12 | \r | |
13 | /**\r | |
14 | * Reference to entity lookup table.\r | |
15 | * @type HTMLPurifier_EntityLookup\r | |
16 | */\r | |
17 | protected $_entity_lookup;\r | |
18 | \r | |
19 | /**\r | |
20 | * Callback regex string for parsing entities.\r | |
21 | * @type string\r | |
22 | */\r | |
23 | protected $_substituteEntitiesRegex =\r | |
24 | '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';\r | |
25 | // 1. hex 2. dec 3. string (XML style)\r | |
26 | \r | |
27 | /**\r | |
28 | * Decimal to parsed string conversion table for special entities.\r | |
29 | * @type array\r | |
30 | */\r | |
31 | protected $_special_dec2str =\r | |
32 | array(\r | |
33 | 34 => '"',\r | |
34 | 38 => '&',\r | |
35 | 39 => "'",\r | |
36 | 60 => '<',\r | |
37 | 62 => '>'\r | |
38 | );\r | |
39 | \r | |
40 | /**\r | |
41 | * Stripped entity names to decimal conversion table for special entities.\r | |
42 | * @type array\r | |
43 | */\r | |
44 | protected $_special_ent2dec =\r | |
45 | array(\r | |
46 | 'quot' => 34,\r | |
47 | 'amp' => 38,\r | |
48 | 'lt' => 60,\r | |
49 | 'gt' => 62\r | |
50 | );\r | |
51 | \r | |
52 | /**\r | |
53 | * Substitutes non-special entities with their parsed equivalents. Since\r | |
54 | * running this whenever you have parsed character is t3h 5uck, we run\r | |
55 | * it before everything else.\r | |
56 | *\r | |
57 | * @param string $string String to have non-special entities parsed.\r | |
58 | * @return string Parsed string.\r | |
59 | */\r | |
60 | public function substituteNonSpecialEntities($string)\r | |
61 | {\r | |
62 | // it will try to detect missing semicolons, but don't rely on it\r | |
63 | return preg_replace_callback(\r | |
64 | $this->_substituteEntitiesRegex,\r | |
65 | array($this, 'nonSpecialEntityCallback'),\r | |
66 | $string\r | |
67 | );\r | |
68 | }\r | |
69 | \r | |
70 | /**\r | |
71 | * Callback function for substituteNonSpecialEntities() that does the work.\r | |
72 | *\r | |
73 | * @param array $matches PCRE matches array, with 0 the entire match, and\r | |
74 | * either index 1, 2 or 3 set with a hex value, dec value,\r | |
75 | * or string (respectively).\r | |
76 | * @return string Replacement string.\r | |
77 | */\r | |
78 | \r | |
79 | protected function nonSpecialEntityCallback($matches)\r | |
80 | {\r | |
81 | // replaces all but big five\r | |
82 | $entity = $matches[0];\r | |
83 | $is_num = (@$matches[0][1] === '#');\r | |
84 | if ($is_num) {\r | |
85 | $is_hex = (@$entity[2] === 'x');\r | |
86 | $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];\r | |
87 | // abort for special characters\r | |
88 | if (isset($this->_special_dec2str[$code])) {\r | |
89 | return $entity;\r | |
90 | }\r | |
91 | return HTMLPurifier_Encoder::unichr($code);\r | |
92 | } else {\r | |
93 | if (isset($this->_special_ent2dec[$matches[3]])) {\r | |
94 | return $entity;\r | |
95 | }\r | |
96 | if (!$this->_entity_lookup) {\r | |
97 | $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();\r | |
98 | }\r | |
99 | if (isset($this->_entity_lookup->table[$matches[3]])) {\r | |
100 | return $this->_entity_lookup->table[$matches[3]];\r | |
101 | } else {\r | |
102 | return $entity;\r | |
103 | }\r | |
104 | }\r | |
105 | }\r | |
106 | \r | |
107 | /**\r | |
108 | * Substitutes only special entities with their parsed equivalents.\r | |
109 | *\r | |
110 | * @notice We try to avoid calling this function because otherwise, it\r | |
111 | * would have to be called a lot (for every parsed section).\r | |
112 | *\r | |
113 | * @param string $string String to have non-special entities parsed.\r | |
114 | * @return string Parsed string.\r | |
115 | */\r | |
116 | public function substituteSpecialEntities($string)\r | |
117 | {\r | |
118 | return preg_replace_callback(\r | |
119 | $this->_substituteEntitiesRegex,\r | |
120 | array($this, 'specialEntityCallback'),\r | |
121 | $string\r | |
122 | );\r | |
123 | }\r | |
124 | \r | |
125 | /**\r | |
126 | * Callback function for substituteSpecialEntities() that does the work.\r | |
127 | *\r | |
128 | * This callback has same syntax as nonSpecialEntityCallback().\r | |
129 | *\r | |
130 | * @param array $matches PCRE-style matches array, with 0 the entire match, and\r | |
131 | * either index 1, 2 or 3 set with a hex value, dec value,\r | |
132 | * or string (respectively).\r | |
133 | * @return string Replacement string.\r | |
134 | */\r | |
135 | protected function specialEntityCallback($matches)\r | |
136 | {\r | |
137 | $entity = $matches[0];\r | |
138 | $is_num = (@$matches[0][1] === '#');\r | |
139 | if ($is_num) {\r | |
140 | $is_hex = (@$entity[2] === 'x');\r | |
141 | $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];\r | |
142 | return isset($this->_special_dec2str[$int]) ?\r | |
143 | $this->_special_dec2str[$int] :\r | |
144 | $entity;\r | |
145 | } else {\r | |
146 | return isset($this->_special_ent2dec[$matches[3]]) ?\r | |
147 | $this->_special_ent2dec[$matches[3]] :\r | |
148 | $entity;\r | |
149 | }\r | |
150 | }\r | |
151 | }\r | |
152 | \r | |
153 | // vim: et sw=4 sts=4\r |