]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php
[add] HTML Purifier added to clean code
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / EntityParser.php
1 <?php
2
3 // if want to implement error collecting here, we'll need to use some sort
4 // of global data (probably trigger_error) because it's impossible to pass
5 // $config or $context to the callback functions.
6
7 /**
8 * Handles referencing and derefencing character entities
9 */
10 class HTMLPurifier_EntityParser
11 {
12
13 /**
14 * Reference to entity lookup table.
15 * @type HTMLPurifier_EntityLookup
16 */
17 protected $_entity_lookup;
18
19 /**
20 * Callback regex string for parsing entities.
21 * @type string
22 */
23 protected $_substituteEntitiesRegex =
24 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
25 // 1. hex 2. dec 3. string (XML style)
26
27 /**
28 * Decimal to parsed string conversion table for special entities.
29 * @type array
30 */
31 protected $_special_dec2str =
32 array(
33 34 => '"',
34 38 => '&',
35 39 => "'",
36 60 => '<',
37 62 => '>'
38 );
39
40 /**
41 * Stripped entity names to decimal conversion table for special entities.
42 * @type array
43 */
44 protected $_special_ent2dec =
45 array(
46 'quot' => 34,
47 'amp' => 38,
48 'lt' => 60,
49 'gt' => 62
50 );
51
52 /**
53 * Substitutes non-special entities with their parsed equivalents. Since
54 * running this whenever you have parsed character is t3h 5uck, we run
55 * it before everything else.
56 *
57 * @param string $string String to have non-special entities parsed.
58 * @return string Parsed string.
59 */
60 public function substituteNonSpecialEntities($string)
61 {
62 // it will try to detect missing semicolons, but don't rely on it
63 return preg_replace_callback(
64 $this->_substituteEntitiesRegex,
65 array($this, 'nonSpecialEntityCallback'),
66 $string
67 );
68 }
69
70 /**
71 * Callback function for substituteNonSpecialEntities() that does the work.
72 *
73 * @param array $matches PCRE matches array, with 0 the entire match, and
74 * either index 1, 2 or 3 set with a hex value, dec value,
75 * or string (respectively).
76 * @return string Replacement string.
77 */
78
79 protected function nonSpecialEntityCallback($matches)
80 {
81 // replaces all but big five
82 $entity = $matches[0];
83 $is_num = (@$matches[0][1] === '#');
84 if ($is_num) {
85 $is_hex = (@$entity[2] === 'x');
86 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
87 // abort for special characters
88 if (isset($this->_special_dec2str[$code])) {
89 return $entity;
90 }
91 return HTMLPurifier_Encoder::unichr($code);
92 } else {
93 if (isset($this->_special_ent2dec[$matches[3]])) {
94 return $entity;
95 }
96 if (!$this->_entity_lookup) {
97 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
98 }
99 if (isset($this->_entity_lookup->table[$matches[3]])) {
100 return $this->_entity_lookup->table[$matches[3]];
101 } else {
102 return $entity;
103 }
104 }
105 }
106
107 /**
108 * Substitutes only special entities with their parsed equivalents.
109 *
110 * @notice We try to avoid calling this function because otherwise, it
111 * would have to be called a lot (for every parsed section).
112 *
113 * @param string $string String to have non-special entities parsed.
114 * @return string Parsed string.
115 */
116 public function substituteSpecialEntities($string)
117 {
118 return preg_replace_callback(
119 $this->_substituteEntitiesRegex,
120 array($this, 'specialEntityCallback'),
121 $string
122 );
123 }
124
125 /**
126 * Callback function for substituteSpecialEntities() that does the work.
127 *
128 * This callback has same syntax as nonSpecialEntityCallback().
129 *
130 * @param array $matches PCRE-style matches array, with 0 the entire match, and
131 * either index 1, 2 or 3 set with a hex value, dec value,
132 * or string (respectively).
133 * @return string Replacement string.
134 */
135 protected function specialEntityCallback($matches)
136 {
137 $entity = $matches[0];
138 $is_num = (@$matches[0][1] === '#');
139 if ($is_num) {
140 $is_hex = (@$entity[2] === 'x');
141 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
142 return isset($this->_special_dec2str[$int]) ?
143 $this->_special_dec2str[$int] :
144 $entity;
145 } else {
146 return isset($this->_special_ent2dec[$matches[3]]) ?
147 $this->_special_ent2dec[$matches[3]] :
148 $entity;
149 }
150 }
151 }
152
153 // vim: et sw=4 sts=4