diff options
author | Nicolas Lœuillet <nicolas@loeuillet.org> | 2014-02-21 15:57:10 +0100 |
---|---|---|
committer | Nicolas Lœuillet <nicolas@loeuillet.org> | 2014-02-21 15:57:10 +0100 |
commit | 99679d06884120c57f43b44e55e03595f1f87bed (patch) | |
tree | a3f2a1aa1afdaeca1386d0c6e8a75344fd2241fb /inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php | |
parent | 655214ab30ee84884dc408488b85586f36263fcb (diff) | |
parent | d3b47e94705e17b3ba3529cbb1dc6efe69c5d2b7 (diff) | |
download | wallabag-1.5.2.tar.gz wallabag-1.5.2.tar.zst wallabag-1.5.2.zip |
Merge pull request #481 from wallabag/dev1.5.2
1.5.2
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php | 153 |
1 files changed, 153 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php new file mode 100644 index 00000000..432a4f9c --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/EntityParser.php | |||
@@ -0,0 +1,153 @@ | |||
1 | <?php | ||
2 | |||
3 | // if want to implement error collecting here, we'll need to use some sort | ||
4 | // of global data (probably trigger_error) because it's impossible to pass | ||
5 | // $config or $context to the callback functions. | ||
6 | |||
7 | /** | ||
8 | * Handles referencing and derefencing character entities | ||
9 | */ | ||
10 | class HTMLPurifier_EntityParser | ||
11 | { | ||
12 | |||
13 | /** | ||
14 | * Reference to entity lookup table. | ||
15 | * @type HTMLPurifier_EntityLookup | ||
16 | */ | ||
17 | protected $_entity_lookup; | ||
18 | |||
19 | /** | ||
20 | * Callback regex string for parsing entities. | ||
21 | * @type string | ||
22 | */ | ||
23 | protected $_substituteEntitiesRegex = | ||
24 | '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/'; | ||
25 | // 1. hex 2. dec 3. string (XML style) | ||
26 | |||
27 | /** | ||
28 | * Decimal to parsed string conversion table for special entities. | ||
29 | * @type array | ||
30 | */ | ||
31 | protected $_special_dec2str = | ||
32 | array( | ||
33 | 34 => '"', | ||
34 | 38 => '&', | ||
35 | 39 => "'", | ||
36 | 60 => '<', | ||
37 | 62 => '>' | ||
38 | ); | ||
39 | |||
40 | /** | ||
41 | * Stripped entity names to decimal conversion table for special entities. | ||
42 | * @type array | ||
43 | */ | ||
44 | protected $_special_ent2dec = | ||
45 | array( | ||
46 | 'quot' => 34, | ||
47 | 'amp' => 38, | ||
48 | 'lt' => 60, | ||
49 | 'gt' => 62 | ||
50 | ); | ||
51 | |||
52 | /** | ||
53 | * Substitutes non-special entities with their parsed equivalents. Since | ||
54 | * running this whenever you have parsed character is t3h 5uck, we run | ||
55 | * it before everything else. | ||
56 | * | ||
57 | * @param string $string String to have non-special entities parsed. | ||
58 | * @return string Parsed string. | ||
59 | */ | ||
60 | public function substituteNonSpecialEntities($string) | ||
61 | { | ||
62 | // it will try to detect missing semicolons, but don't rely on it | ||
63 | return preg_replace_callback( | ||
64 | $this->_substituteEntitiesRegex, | ||
65 | array($this, 'nonSpecialEntityCallback'), | ||
66 | $string | ||
67 | ); | ||
68 | } | ||
69 | |||
70 | /** | ||
71 | * Callback function for substituteNonSpecialEntities() that does the work. | ||
72 | * | ||
73 | * @param array $matches PCRE matches array, with 0 the entire match, and | ||
74 | * either index 1, 2 or 3 set with a hex value, dec value, | ||
75 | * or string (respectively). | ||
76 | * @return string Replacement string. | ||
77 | */ | ||
78 | |||
79 | protected function nonSpecialEntityCallback($matches) | ||
80 | { | ||
81 | // replaces all but big five | ||
82 | $entity = $matches[0]; | ||
83 | $is_num = (@$matches[0][1] === '#'); | ||
84 | if ($is_num) { | ||
85 | $is_hex = (@$entity[2] === 'x'); | ||
86 | $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; | ||
87 | // abort for special characters | ||
88 | if (isset($this->_special_dec2str[$code])) { | ||
89 | return $entity; | ||
90 | } | ||
91 | return HTMLPurifier_Encoder::unichr($code); | ||
92 | } else { | ||
93 | if (isset($this->_special_ent2dec[$matches[3]])) { | ||
94 | return $entity; | ||
95 | } | ||
96 | if (!$this->_entity_lookup) { | ||
97 | $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); | ||
98 | } | ||
99 | if (isset($this->_entity_lookup->table[$matches[3]])) { | ||
100 | return $this->_entity_lookup->table[$matches[3]]; | ||
101 | } else { | ||
102 | return $entity; | ||
103 | } | ||
104 | } | ||
105 | } | ||
106 | |||
107 | /** | ||
108 | * Substitutes only special entities with their parsed equivalents. | ||
109 | * | ||
110 | * @notice We try to avoid calling this function because otherwise, it | ||
111 | * would have to be called a lot (for every parsed section). | ||
112 | * | ||
113 | * @param string $string String to have non-special entities parsed. | ||
114 | * @return string Parsed string. | ||
115 | */ | ||
116 | public function substituteSpecialEntities($string) | ||
117 | { | ||
118 | return preg_replace_callback( | ||
119 | $this->_substituteEntitiesRegex, | ||
120 | array($this, 'specialEntityCallback'), | ||
121 | $string | ||
122 | ); | ||
123 | } | ||
124 | |||
125 | /** | ||
126 | * Callback function for substituteSpecialEntities() that does the work. | ||
127 | * | ||
128 | * This callback has same syntax as nonSpecialEntityCallback(). | ||
129 | * | ||
130 | * @param array $matches PCRE-style matches array, with 0 the entire match, and | ||
131 | * either index 1, 2 or 3 set with a hex value, dec value, | ||
132 | * or string (respectively). | ||
133 | * @return string Replacement string. | ||
134 | */ | ||
135 | protected function specialEntityCallback($matches) | ||
136 | { | ||
137 | $entity = $matches[0]; | ||
138 | $is_num = (@$matches[0][1] === '#'); | ||
139 | if ($is_num) { | ||
140 | $is_hex = (@$entity[2] === 'x'); | ||
141 | $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; | ||
142 | return isset($this->_special_dec2str[$int]) ? | ||
143 | $this->_special_dec2str[$int] : | ||
144 | $entity; | ||
145 | } else { | ||
146 | return isset($this->_special_ent2dec[$matches[3]]) ? | ||
147 | $this->_special_ent2dec[$matches[3]] : | ||
148 | $entity; | ||
149 | } | ||
150 | } | ||
151 | } | ||
152 | |||
153 | // vim: et sw=4 sts=4 | ||