]> git.immae.eu Git - github/wallabag/wallabag.git/blame - inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php
remove autoload section in composer.json
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / URI.php
CommitLineData
d4949327
NL
1<?php\r
2\r
3/**\r
4 * HTML Purifier's internal representation of a URI.\r
5 * @note\r
6 * Internal data-structures are completely escaped. If the data needs\r
7 * to be used in a non-URI context (which is very unlikely), be sure\r
8 * to decode it first. The URI may not necessarily be well-formed until\r
9 * validate() is called.\r
10 */\r
11class HTMLPurifier_URI\r
12{\r
13 /**\r
14 * @type string\r
15 */\r
16 public $scheme;\r
17\r
18 /**\r
19 * @type string\r
20 */\r
21 public $userinfo;\r
22\r
23 /**\r
24 * @type string\r
25 */\r
26 public $host;\r
27\r
28 /**\r
29 * @type int\r
30 */\r
31 public $port;\r
32\r
33 /**\r
34 * @type string\r
35 */\r
36 public $path;\r
37\r
38 /**\r
39 * @type string\r
40 */\r
41 public $query;\r
42\r
43 /**\r
44 * @type string\r
45 */\r
46 public $fragment;\r
47\r
48 /**\r
49 * @param string $scheme\r
50 * @param string $userinfo\r
51 * @param string $host\r
52 * @param int $port\r
53 * @param string $path\r
54 * @param string $query\r
55 * @param string $fragment\r
56 * @note Automatically normalizes scheme and port\r
57 */\r
58 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)\r
59 {\r
60 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);\r
61 $this->userinfo = $userinfo;\r
62 $this->host = $host;\r
63 $this->port = is_null($port) ? $port : (int)$port;\r
64 $this->path = $path;\r
65 $this->query = $query;\r
66 $this->fragment = $fragment;\r
67 }\r
68\r
69 /**\r
70 * Retrieves a scheme object corresponding to the URI's scheme/default\r
71 * @param HTMLPurifier_Config $config\r
72 * @param HTMLPurifier_Context $context\r
73 * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI\r
74 */\r
75 public function getSchemeObj($config, $context)\r
76 {\r
77 $registry = HTMLPurifier_URISchemeRegistry::instance();\r
78 if ($this->scheme !== null) {\r
79 $scheme_obj = $registry->getScheme($this->scheme, $config, $context);\r
80 if (!$scheme_obj) {\r
81 return false;\r
82 } // invalid scheme, clean it out\r
83 } else {\r
84 // no scheme: retrieve the default one\r
85 $def = $config->getDefinition('URI');\r
86 $scheme_obj = $def->getDefaultScheme($config, $context);\r
87 if (!$scheme_obj) {\r
88 // something funky happened to the default scheme object\r
89 trigger_error(\r
90 'Default scheme object "' . $def->defaultScheme . '" was not readable',\r
91 E_USER_WARNING\r
92 );\r
93 return false;\r
94 }\r
95 }\r
96 return $scheme_obj;\r
97 }\r
98\r
99 /**\r
100 * Generic validation method applicable for all schemes. May modify\r
101 * this URI in order to get it into a compliant form.\r
102 * @param HTMLPurifier_Config $config\r
103 * @param HTMLPurifier_Context $context\r
104 * @return bool True if validation/filtering succeeds, false if failure\r
105 */\r
106 public function validate($config, $context)\r
107 {\r
108 // ABNF definitions from RFC 3986\r
109 $chars_sub_delims = '!$&\'()*+,;=';\r
110 $chars_gen_delims = ':/?#[]@';\r
111 $chars_pchar = $chars_sub_delims . ':@';\r
112\r
113 // validate host\r
114 if (!is_null($this->host)) {\r
115 $host_def = new HTMLPurifier_AttrDef_URI_Host();\r
116 $this->host = $host_def->validate($this->host, $config, $context);\r
117 if ($this->host === false) {\r
118 $this->host = null;\r
119 }\r
120 }\r
121\r
122 // validate scheme\r
123 // NOTE: It's not appropriate to check whether or not this\r
124 // scheme is in our registry, since a URIFilter may convert a\r
125 // URI that we don't allow into one we do. So instead, we just\r
126 // check if the scheme can be dropped because there is no host\r
127 // and it is our default scheme.\r
128 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {\r
129 // support for relative paths is pretty abysmal when the\r
130 // scheme is present, so axe it when possible\r
131 $def = $config->getDefinition('URI');\r
132 if ($def->defaultScheme === $this->scheme) {\r
133 $this->scheme = null;\r
134 }\r
135 }\r
136\r
137 // validate username\r
138 if (!is_null($this->userinfo)) {\r
139 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');\r
140 $this->userinfo = $encoder->encode($this->userinfo);\r
141 }\r
142\r
143 // validate port\r
144 if (!is_null($this->port)) {\r
145 if ($this->port < 1 || $this->port > 65535) {\r
146 $this->port = null;\r
147 }\r
148 }\r
149\r
150 // validate path\r
151 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');\r
152 if (!is_null($this->host)) { // this catches $this->host === ''\r
153 // path-abempty (hier and relative)\r
154 // http://www.example.com/my/path\r
155 // //www.example.com/my/path (looks odd, but works, and\r
156 // recognized by most browsers)\r
157 // (this set is valid or invalid on a scheme by scheme\r
158 // basis, so we'll deal with it later)\r
159 // file:///my/path\r
160 // ///my/path\r
161 $this->path = $segments_encoder->encode($this->path);\r
162 } elseif ($this->path !== '') {\r
163 if ($this->path[0] === '/') {\r
164 // path-absolute (hier and relative)\r
165 // http:/my/path\r
166 // /my/path\r
167 if (strlen($this->path) >= 2 && $this->path[1] === '/') {\r
168 // This could happen if both the host gets stripped\r
169 // out\r
170 // http://my/path\r
171 // //my/path\r
172 $this->path = '';\r
173 } else {\r
174 $this->path = $segments_encoder->encode($this->path);\r
175 }\r
176 } elseif (!is_null($this->scheme)) {\r
177 // path-rootless (hier)\r
178 // http:my/path\r
179 // Short circuit evaluation means we don't need to check nz\r
180 $this->path = $segments_encoder->encode($this->path);\r
181 } else {\r
182 // path-noscheme (relative)\r
183 // my/path\r
184 // (once again, not checking nz)\r
185 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');\r
186 $c = strpos($this->path, '/');\r
187 if ($c !== false) {\r
188 $this->path =\r
189 $segment_nc_encoder->encode(substr($this->path, 0, $c)) .\r
190 $segments_encoder->encode(substr($this->path, $c));\r
191 } else {\r
192 $this->path = $segment_nc_encoder->encode($this->path);\r
193 }\r
194 }\r
195 } else {\r
196 // path-empty (hier and relative)\r
197 $this->path = ''; // just to be safe\r
198 }\r
199\r
200 // qf = query and fragment\r
201 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');\r
202\r
203 if (!is_null($this->query)) {\r
204 $this->query = $qf_encoder->encode($this->query);\r
205 }\r
206\r
207 if (!is_null($this->fragment)) {\r
208 $this->fragment = $qf_encoder->encode($this->fragment);\r
209 }\r
210 return true;\r
211 }\r
212\r
213 /**\r
214 * Convert URI back to string\r
215 * @return string URI appropriate for output\r
216 */\r
217 public function toString()\r
218 {\r
219 // reconstruct authority\r
220 $authority = null;\r
221 // there is a rendering difference between a null authority\r
222 // (http:foo-bar) and an empty string authority\r
223 // (http:///foo-bar).\r
224 if (!is_null($this->host)) {\r
225 $authority = '';\r
226 if (!is_null($this->userinfo)) {\r
227 $authority .= $this->userinfo . '@';\r
228 }\r
229 $authority .= $this->host;\r
230 if (!is_null($this->port)) {\r
231 $authority .= ':' . $this->port;\r
232 }\r
233 }\r
234\r
235 // Reconstruct the result\r
236 // One might wonder about parsing quirks from browsers after\r
237 // this reconstruction. Unfortunately, parsing behavior depends\r
238 // on what *scheme* was employed (file:///foo is handled *very*\r
239 // differently than http:///foo), so unfortunately we have to\r
240 // defer to the schemes to do the right thing.\r
241 $result = '';\r
242 if (!is_null($this->scheme)) {\r
243 $result .= $this->scheme . ':';\r
244 }\r
245 if (!is_null($authority)) {\r
246 $result .= '//' . $authority;\r
247 }\r
248 $result .= $this->path;\r
249 if (!is_null($this->query)) {\r
250 $result .= '?' . $this->query;\r
251 }\r
252 if (!is_null($this->fragment)) {\r
253 $result .= '#' . $this->fragment;\r
254 }\r
255\r
256 return $result;\r
257 }\r
258\r
259 /**\r
260 * Returns true if this URL might be considered a 'local' URL given\r
261 * the current context. This is true when the host is null, or\r
262 * when it matches the host supplied to the configuration.\r
263 *\r
264 * Note that this does not do any scheme checking, so it is mostly\r
265 * only appropriate for metadata that doesn't care about protocol\r
266 * security. isBenign is probably what you actually want.\r
267 * @param HTMLPurifier_Config $config\r
268 * @param HTMLPurifier_Context $context\r
269 * @return bool\r
270 */\r
271 public function isLocal($config, $context)\r
272 {\r
273 if ($this->host === null) {\r
274 return true;\r
275 }\r
276 $uri_def = $config->getDefinition('URI');\r
277 if ($uri_def->host === $this->host) {\r
278 return true;\r
279 }\r
280 return false;\r
281 }\r
282\r
283 /**\r
284 * Returns true if this URL should be considered a 'benign' URL,\r
285 * that is:\r
286 *\r
287 * - It is a local URL (isLocal), and\r
288 * - It has a equal or better level of security\r
289 * @param HTMLPurifier_Config $config\r
290 * @param HTMLPurifier_Context $context\r
291 * @return bool\r
292 */\r
293 public function isBenign($config, $context)\r
294 {\r
295 if (!$this->isLocal($config, $context)) {\r
296 return false;\r
297 }\r
298\r
299 $scheme_obj = $this->getSchemeObj($config, $context);\r
300 if (!$scheme_obj) {\r
301 return false;\r
302 } // conservative approach\r
303\r
304 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);\r
305 if ($current_scheme_obj->secure) {\r
306 if (!$scheme_obj->secure) {\r
307 return false;\r
308 }\r
309 }\r
310 return true;\r
311 }\r
312}\r
313\r
314// vim: et sw=4 sts=4\r