diff options
author | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-02-21 15:57:10 +0100 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas@loeuillet.org> | 2014-02-21 15:57:10 +0100 |
commit | 99679d06884120c57f43b44e55e03595f1f87bed (patch) | |
tree | a3f2a1aa1afdaeca1386d0c6e8a75344fd2241fb /inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php | |
parent | 655214ab30ee84884dc408488b85586f36263fcb (diff) | |
parent | d3b47e94705e17b3ba3529cbb1dc6efe69c5d2b7 (diff) | |
download | wallabag-99679d06884120c57f43b44e55e03595f1f87bed.tar.gz wallabag-99679d06884120c57f43b44e55e03595f1f87bed.tar.zst wallabag-99679d06884120c57f43b44e55e03595f1f87bed.zip |
Merge pull request #481 from wallabag/dev1.5.2
1.5.2
Diffstat (limited to 'inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php')
-rw-r--r-- | inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php b/inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php new file mode 100644 index 00000000..c4256b95 --- /dev/null +++ b/inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php | |||
@@ -0,0 +1,314 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * HTML Purifier's internal representation of a URI. | ||
5 | * @note | ||
6 | * Internal data-structures are completely escaped. If the data needs | ||
7 | * to be used in a non-URI context (which is very unlikely), be sure | ||
8 | * to decode it first. The URI may not necessarily be well-formed until | ||
9 | * validate() is called. | ||
10 | */ | ||
11 | class HTMLPurifier_URI | ||
12 | { | ||
13 | /** | ||
14 | * @type string | ||
15 | */ | ||
16 | public $scheme; | ||
17 | |||
18 | /** | ||
19 | * @type string | ||
20 | */ | ||
21 | public $userinfo; | ||
22 | |||
23 | /** | ||
24 | * @type string | ||
25 | */ | ||
26 | public $host; | ||
27 | |||
28 | /** | ||
29 | * @type int | ||
30 | */ | ||
31 | public $port; | ||
32 | |||
33 | /** | ||
34 | * @type string | ||
35 | */ | ||
36 | public $path; | ||
37 | |||
38 | /** | ||
39 | * @type string | ||
40 | */ | ||
41 | public $query; | ||
42 | |||
43 | /** | ||
44 | * @type string | ||
45 | */ | ||
46 | public $fragment; | ||
47 | |||
48 | /** | ||
49 | * @param string $scheme | ||
50 | * @param string $userinfo | ||
51 | * @param string $host | ||
52 | * @param int $port | ||
53 | * @param string $path | ||
54 | * @param string $query | ||
55 | * @param string $fragment | ||
56 | * @note Automatically normalizes scheme and port | ||
57 | */ | ||
58 | public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment) | ||
59 | { | ||
60 | $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); | ||
61 | $this->userinfo = $userinfo; | ||
62 | $this->host = $host; | ||
63 | $this->port = is_null($port) ? $port : (int)$port; | ||
64 | $this->path = $path; | ||
65 | $this->query = $query; | ||
66 | $this->fragment = $fragment; | ||
67 | } | ||
68 | |||
69 | /** | ||
70 | * Retrieves a scheme object corresponding to the URI's scheme/default | ||
71 | * @param HTMLPurifier_Config $config | ||
72 | * @param HTMLPurifier_Context $context | ||
73 | * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI | ||
74 | */ | ||
75 | public function getSchemeObj($config, $context) | ||
76 | { | ||
77 | $registry = HTMLPurifier_URISchemeRegistry::instance(); | ||
78 | if ($this->scheme !== null) { | ||
79 | $scheme_obj = $registry->getScheme($this->scheme, $config, $context); | ||
80 | if (!$scheme_obj) { | ||
81 | return false; | ||
82 | } // invalid scheme, clean it out | ||
83 | } else { | ||
84 | // no scheme: retrieve the default one | ||
85 | $def = $config->getDefinition('URI'); | ||
86 | $scheme_obj = $def->getDefaultScheme($config, $context); | ||
87 | if (!$scheme_obj) { | ||
88 | // something funky happened to the default scheme object | ||
89 | trigger_error( | ||
90 | 'Default scheme object "' . $def->defaultScheme . '" was not readable', | ||
91 | E_USER_WARNING | ||
92 | ); | ||
93 | return false; | ||
94 | } | ||
95 | } | ||
96 | return $scheme_obj; | ||
97 | } | ||
98 | |||
99 | /** | ||
100 | * Generic validation method applicable for all schemes. May modify | ||
101 | * this URI in order to get it into a compliant form. | ||
102 | * @param HTMLPurifier_Config $config | ||
103 | * @param HTMLPurifier_Context $context | ||
104 | * @return bool True if validation/filtering succeeds, false if failure | ||
105 | */ | ||
106 | public function validate($config, $context) | ||
107 | { | ||
108 | // ABNF definitions from RFC 3986 | ||
109 | $chars_sub_delims = '!$&\'()*+,;='; | ||
110 | $chars_gen_delims = ':/?#[]@'; | ||
111 | $chars_pchar = $chars_sub_delims . ':@'; | ||
112 | |||
113 | // validate host | ||
114 | if (!is_null($this->host)) { | ||
115 | $host_def = new HTMLPurifier_AttrDef_URI_Host(); | ||
116 | $this->host = $host_def->validate($this->host, $config, $context); | ||
117 | if ($this->host === false) { | ||
118 | $this->host = null; | ||
119 | } | ||
120 | } | ||
121 | |||
122 | // validate scheme | ||
123 | // NOTE: It's not appropriate to check whether or not this | ||
124 | // scheme is in our registry, since a URIFilter may convert a | ||
125 | // URI that we don't allow into one we do. So instead, we just | ||
126 | // check if the scheme can be dropped because there is no host | ||
127 | // and it is our default scheme. | ||
128 | if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') { | ||
129 | // support for relative paths is pretty abysmal when the | ||
130 | // scheme is present, so axe it when possible | ||
131 | $def = $config->getDefinition('URI'); | ||
132 | if ($def->defaultScheme === $this->scheme) { | ||
133 | $this->scheme = null; | ||
134 | } | ||
135 | } | ||
136 | |||
137 | // validate username | ||
138 | if (!is_null($this->userinfo)) { | ||
139 | $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); | ||
140 | $this->userinfo = $encoder->encode($this->userinfo); | ||
141 | } | ||
142 | |||
143 | // validate port | ||
144 | if (!is_null($this->port)) { | ||
145 | if ($this->port < 1 || $this->port > 65535) { | ||
146 | $this->port = null; | ||
147 | } | ||
148 | } | ||
149 | |||
150 | // validate path | ||
151 | $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); | ||
152 | if (!is_null($this->host)) { // this catches $this->host === '' | ||
153 | // path-abempty (hier and relative) | ||
154 | // http://www.example.com/my/path | ||
155 | // //www.example.com/my/path (looks odd, but works, and | ||
156 | // recognized by most browsers) | ||
157 | // (this set is valid or invalid on a scheme by scheme | ||
158 | // basis, so we'll deal with it later) | ||
159 | // file:///my/path | ||
160 | // ///my/path | ||
161 | $this->path = $segments_encoder->encode($this->path); | ||
162 | } elseif ($this->path !== '') { | ||
163 | if ($this->path[0] === '/') { | ||
164 | // path-absolute (hier and relative) | ||
165 | // http:/my/path | ||
166 | // /my/path | ||
167 | if (strlen($this->path) >= 2 && $this->path[1] === '/') { | ||
168 | // This could happen if both the host gets stripped | ||
169 | // out | ||
170 | // http://my/path | ||
171 | // //my/path | ||
172 | $this->path = ''; | ||
173 | } else { | ||
174 | $this->path = $segments_encoder->encode($this->path); | ||
175 | } | ||
176 | } elseif (!is_null($this->scheme)) { | ||
177 | // path-rootless (hier) | ||
178 | // http:my/path | ||
179 | // Short circuit evaluation means we don't need to check nz | ||
180 | $this->path = $segments_encoder->encode($this->path); | ||
181 | } else { | ||
182 | // path-noscheme (relative) | ||
183 | // my/path | ||
184 | // (once again, not checking nz) | ||
185 | $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); | ||
186 | $c = strpos($this->path, '/'); | ||
187 | if ($c !== false) { | ||
188 | $this->path = | ||
189 | $segment_nc_encoder->encode(substr($this->path, 0, $c)) . | ||
190 | $segments_encoder->encode(substr($this->path, $c)); | ||
191 | } else { | ||
192 | $this->path = $segment_nc_encoder->encode($this->path); | ||
193 | } | ||
194 | } | ||
195 | } else { | ||
196 | // path-empty (hier and relative) | ||
197 | $this->path = ''; // just to be safe | ||
198 | } | ||
199 | |||
200 | // qf = query and fragment | ||
201 | $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); | ||
202 | |||
203 | if (!is_null($this->query)) { | ||
204 | $this->query = $qf_encoder->encode($this->query); | ||
205 | } | ||
206 | |||
207 | if (!is_null($this->fragment)) { | ||
208 | $this->fragment = $qf_encoder->encode($this->fragment); | ||
209 | } | ||
210 | return true; | ||
211 | } | ||
212 | |||
213 | /** | ||
214 | * Convert URI back to string | ||
215 | * @return string URI appropriate for output | ||
216 | */ | ||
217 | public function toString() | ||
218 | { | ||
219 | // reconstruct authority | ||
220 | $authority = null; | ||
221 | // there is a rendering difference between a null authority | ||
222 | // (http:foo-bar) and an empty string authority | ||
223 | // (http:///foo-bar). | ||
224 | if (!is_null($this->host)) { | ||
225 | $authority = ''; | ||
226 | if (!is_null($this->userinfo)) { | ||
227 | $authority .= $this->userinfo . '@'; | ||
228 | } | ||
229 | $authority .= $this->host; | ||
230 | if (!is_null($this->port)) { | ||
231 | $authority .= ':' . $this->port; | ||
232 | } | ||
233 | } | ||
234 | |||
235 | // Reconstruct the result | ||
236 | // One might wonder about parsing quirks from browsers after | ||
237 | // this reconstruction. Unfortunately, parsing behavior depends | ||
238 | // on what *scheme* was employed (file:///foo is handled *very* | ||
239 | // differently than http:///foo), so unfortunately we have to | ||
240 | // defer to the schemes to do the right thing. | ||
241 | $result = ''; | ||
242 | if (!is_null($this->scheme)) { | ||
243 | $result .= $this->scheme . ':'; | ||
244 | } | ||
245 | if (!is_null($authority)) { | ||
246 | $result .= '//' . $authority; | ||
247 | } | ||
248 | $result .= $this->path; | ||
249 | if (!is_null($this->query)) { | ||
250 | $result .= '?' . $this->query; | ||
251 | } | ||
252 | if (!is_null($this->fragment)) { | ||
253 | $result .= '#' . $this->fragment; | ||
254 | } | ||
255 | |||
256 | return $result; | ||
257 | } | ||
258 | |||
259 | /** | ||
260 | * Returns true if this URL might be considered a 'local' URL given | ||
261 | * the current context. This is true when the host is null, or | ||
262 | * when it matches the host supplied to the configuration. | ||
263 | * | ||
264 | * Note that this does not do any scheme checking, so it is mostly | ||
265 | * only appropriate for metadata that doesn't care about protocol | ||
266 | * security. isBenign is probably what you actually want. | ||
267 | * @param HTMLPurifier_Config $config | ||
268 | * @param HTMLPurifier_Context $context | ||
269 | * @return bool | ||
270 | */ | ||
271 | public function isLocal($config, $context) | ||
272 | { | ||
273 | if ($this->host === null) { | ||
274 | return true; | ||
275 | } | ||
276 | $uri_def = $config->getDefinition('URI'); | ||
277 | if ($uri_def->host === $this->host) { | ||
278 | return true; | ||
279 | } | ||
280 | return false; | ||
281 | } | ||
282 | |||
283 | /** | ||
284 | * Returns true if this URL should be considered a 'benign' URL, | ||
285 | * that is: | ||
286 | * | ||
287 | * - It is a local URL (isLocal), and | ||
288 | * - It has a equal or better level of security | ||
289 | * @param HTMLPurifier_Config $config | ||
290 | * @param HTMLPurifier_Context $context | ||
291 | * @return bool | ||
292 | */ | ||
293 | public function isBenign($config, $context) | ||
294 | { | ||
295 | if (!$this->isLocal($config, $context)) { | ||
296 | return false; | ||
297 | } | ||
298 | |||
299 | $scheme_obj = $this->getSchemeObj($config, $context); | ||
300 | if (!$scheme_obj) { | ||
301 | return false; | ||
302 | } // conservative approach | ||
303 | |||
304 | $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context); | ||
305 | if ($current_scheme_obj->secure) { | ||
306 | if (!$scheme_obj->secure) { | ||
307 | return false; | ||
308 | } | ||
309 | } | ||
310 | return true; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | // vim: et sw=4 sts=4 | ||