]>
Commit | Line | Data |
---|---|---|
d4949327 NL |
1 | <?php\r |
2 | \r | |
3 | /**\r | |
4 | * HTML Purifier's internal representation of a URI.\r | |
5 | * @note\r | |
6 | * Internal data-structures are completely escaped. If the data needs\r | |
7 | * to be used in a non-URI context (which is very unlikely), be sure\r | |
8 | * to decode it first. The URI may not necessarily be well-formed until\r | |
9 | * validate() is called.\r | |
10 | */\r | |
11 | class HTMLPurifier_URI\r | |
12 | {\r | |
13 | /**\r | |
14 | * @type string\r | |
15 | */\r | |
16 | public $scheme;\r | |
17 | \r | |
18 | /**\r | |
19 | * @type string\r | |
20 | */\r | |
21 | public $userinfo;\r | |
22 | \r | |
23 | /**\r | |
24 | * @type string\r | |
25 | */\r | |
26 | public $host;\r | |
27 | \r | |
28 | /**\r | |
29 | * @type int\r | |
30 | */\r | |
31 | public $port;\r | |
32 | \r | |
33 | /**\r | |
34 | * @type string\r | |
35 | */\r | |
36 | public $path;\r | |
37 | \r | |
38 | /**\r | |
39 | * @type string\r | |
40 | */\r | |
41 | public $query;\r | |
42 | \r | |
43 | /**\r | |
44 | * @type string\r | |
45 | */\r | |
46 | public $fragment;\r | |
47 | \r | |
48 | /**\r | |
49 | * @param string $scheme\r | |
50 | * @param string $userinfo\r | |
51 | * @param string $host\r | |
52 | * @param int $port\r | |
53 | * @param string $path\r | |
54 | * @param string $query\r | |
55 | * @param string $fragment\r | |
56 | * @note Automatically normalizes scheme and port\r | |
57 | */\r | |
58 | public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)\r | |
59 | {\r | |
60 | $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);\r | |
61 | $this->userinfo = $userinfo;\r | |
62 | $this->host = $host;\r | |
63 | $this->port = is_null($port) ? $port : (int)$port;\r | |
64 | $this->path = $path;\r | |
65 | $this->query = $query;\r | |
66 | $this->fragment = $fragment;\r | |
67 | }\r | |
68 | \r | |
69 | /**\r | |
70 | * Retrieves a scheme object corresponding to the URI's scheme/default\r | |
71 | * @param HTMLPurifier_Config $config\r | |
72 | * @param HTMLPurifier_Context $context\r | |
73 | * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI\r | |
74 | */\r | |
75 | public function getSchemeObj($config, $context)\r | |
76 | {\r | |
77 | $registry = HTMLPurifier_URISchemeRegistry::instance();\r | |
78 | if ($this->scheme !== null) {\r | |
79 | $scheme_obj = $registry->getScheme($this->scheme, $config, $context);\r | |
80 | if (!$scheme_obj) {\r | |
81 | return false;\r | |
82 | } // invalid scheme, clean it out\r | |
83 | } else {\r | |
84 | // no scheme: retrieve the default one\r | |
85 | $def = $config->getDefinition('URI');\r | |
86 | $scheme_obj = $def->getDefaultScheme($config, $context);\r | |
87 | if (!$scheme_obj) {\r | |
88 | // something funky happened to the default scheme object\r | |
89 | trigger_error(\r | |
90 | 'Default scheme object "' . $def->defaultScheme . '" was not readable',\r | |
91 | E_USER_WARNING\r | |
92 | );\r | |
93 | return false;\r | |
94 | }\r | |
95 | }\r | |
96 | return $scheme_obj;\r | |
97 | }\r | |
98 | \r | |
99 | /**\r | |
100 | * Generic validation method applicable for all schemes. May modify\r | |
101 | * this URI in order to get it into a compliant form.\r | |
102 | * @param HTMLPurifier_Config $config\r | |
103 | * @param HTMLPurifier_Context $context\r | |
104 | * @return bool True if validation/filtering succeeds, false if failure\r | |
105 | */\r | |
106 | public function validate($config, $context)\r | |
107 | {\r | |
108 | // ABNF definitions from RFC 3986\r | |
109 | $chars_sub_delims = '!$&\'()*+,;=';\r | |
110 | $chars_gen_delims = ':/?#[]@';\r | |
111 | $chars_pchar = $chars_sub_delims . ':@';\r | |
112 | \r | |
113 | // validate host\r | |
114 | if (!is_null($this->host)) {\r | |
115 | $host_def = new HTMLPurifier_AttrDef_URI_Host();\r | |
116 | $this->host = $host_def->validate($this->host, $config, $context);\r | |
117 | if ($this->host === false) {\r | |
118 | $this->host = null;\r | |
119 | }\r | |
120 | }\r | |
121 | \r | |
122 | // validate scheme\r | |
123 | // NOTE: It's not appropriate to check whether or not this\r | |
124 | // scheme is in our registry, since a URIFilter may convert a\r | |
125 | // URI that we don't allow into one we do. So instead, we just\r | |
126 | // check if the scheme can be dropped because there is no host\r | |
127 | // and it is our default scheme.\r | |
128 | if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {\r | |
129 | // support for relative paths is pretty abysmal when the\r | |
130 | // scheme is present, so axe it when possible\r | |
131 | $def = $config->getDefinition('URI');\r | |
132 | if ($def->defaultScheme === $this->scheme) {\r | |
133 | $this->scheme = null;\r | |
134 | }\r | |
135 | }\r | |
136 | \r | |
137 | // validate username\r | |
138 | if (!is_null($this->userinfo)) {\r | |
139 | $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');\r | |
140 | $this->userinfo = $encoder->encode($this->userinfo);\r | |
141 | }\r | |
142 | \r | |
143 | // validate port\r | |
144 | if (!is_null($this->port)) {\r | |
145 | if ($this->port < 1 || $this->port > 65535) {\r | |
146 | $this->port = null;\r | |
147 | }\r | |
148 | }\r | |
149 | \r | |
150 | // validate path\r | |
151 | $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');\r | |
152 | if (!is_null($this->host)) { // this catches $this->host === ''\r | |
153 | // path-abempty (hier and relative)\r | |
154 | // http://www.example.com/my/path\r | |
155 | // //www.example.com/my/path (looks odd, but works, and\r | |
156 | // recognized by most browsers)\r | |
157 | // (this set is valid or invalid on a scheme by scheme\r | |
158 | // basis, so we'll deal with it later)\r | |
159 | // file:///my/path\r | |
160 | // ///my/path\r | |
161 | $this->path = $segments_encoder->encode($this->path);\r | |
162 | } elseif ($this->path !== '') {\r | |
163 | if ($this->path[0] === '/') {\r | |
164 | // path-absolute (hier and relative)\r | |
165 | // http:/my/path\r | |
166 | // /my/path\r | |
167 | if (strlen($this->path) >= 2 && $this->path[1] === '/') {\r | |
168 | // This could happen if both the host gets stripped\r | |
169 | // out\r | |
170 | // http://my/path\r | |
171 | // //my/path\r | |
172 | $this->path = '';\r | |
173 | } else {\r | |
174 | $this->path = $segments_encoder->encode($this->path);\r | |
175 | }\r | |
176 | } elseif (!is_null($this->scheme)) {\r | |
177 | // path-rootless (hier)\r | |
178 | // http:my/path\r | |
179 | // Short circuit evaluation means we don't need to check nz\r | |
180 | $this->path = $segments_encoder->encode($this->path);\r | |
181 | } else {\r | |
182 | // path-noscheme (relative)\r | |
183 | // my/path\r | |
184 | // (once again, not checking nz)\r | |
185 | $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');\r | |
186 | $c = strpos($this->path, '/');\r | |
187 | if ($c !== false) {\r | |
188 | $this->path =\r | |
189 | $segment_nc_encoder->encode(substr($this->path, 0, $c)) .\r | |
190 | $segments_encoder->encode(substr($this->path, $c));\r | |
191 | } else {\r | |
192 | $this->path = $segment_nc_encoder->encode($this->path);\r | |
193 | }\r | |
194 | }\r | |
195 | } else {\r | |
196 | // path-empty (hier and relative)\r | |
197 | $this->path = ''; // just to be safe\r | |
198 | }\r | |
199 | \r | |
200 | // qf = query and fragment\r | |
201 | $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');\r | |
202 | \r | |
203 | if (!is_null($this->query)) {\r | |
204 | $this->query = $qf_encoder->encode($this->query);\r | |
205 | }\r | |
206 | \r | |
207 | if (!is_null($this->fragment)) {\r | |
208 | $this->fragment = $qf_encoder->encode($this->fragment);\r | |
209 | }\r | |
210 | return true;\r | |
211 | }\r | |
212 | \r | |
213 | /**\r | |
214 | * Convert URI back to string\r | |
215 | * @return string URI appropriate for output\r | |
216 | */\r | |
217 | public function toString()\r | |
218 | {\r | |
219 | // reconstruct authority\r | |
220 | $authority = null;\r | |
221 | // there is a rendering difference between a null authority\r | |
222 | // (http:foo-bar) and an empty string authority\r | |
223 | // (http:///foo-bar).\r | |
224 | if (!is_null($this->host)) {\r | |
225 | $authority = '';\r | |
226 | if (!is_null($this->userinfo)) {\r | |
227 | $authority .= $this->userinfo . '@';\r | |
228 | }\r | |
229 | $authority .= $this->host;\r | |
230 | if (!is_null($this->port)) {\r | |
231 | $authority .= ':' . $this->port;\r | |
232 | }\r | |
233 | }\r | |
234 | \r | |
235 | // Reconstruct the result\r | |
236 | // One might wonder about parsing quirks from browsers after\r | |
237 | // this reconstruction. Unfortunately, parsing behavior depends\r | |
238 | // on what *scheme* was employed (file:///foo is handled *very*\r | |
239 | // differently than http:///foo), so unfortunately we have to\r | |
240 | // defer to the schemes to do the right thing.\r | |
241 | $result = '';\r | |
242 | if (!is_null($this->scheme)) {\r | |
243 | $result .= $this->scheme . ':';\r | |
244 | }\r | |
245 | if (!is_null($authority)) {\r | |
246 | $result .= '//' . $authority;\r | |
247 | }\r | |
248 | $result .= $this->path;\r | |
249 | if (!is_null($this->query)) {\r | |
250 | $result .= '?' . $this->query;\r | |
251 | }\r | |
252 | if (!is_null($this->fragment)) {\r | |
253 | $result .= '#' . $this->fragment;\r | |
254 | }\r | |
255 | \r | |
256 | return $result;\r | |
257 | }\r | |
258 | \r | |
259 | /**\r | |
260 | * Returns true if this URL might be considered a 'local' URL given\r | |
261 | * the current context. This is true when the host is null, or\r | |
262 | * when it matches the host supplied to the configuration.\r | |
263 | *\r | |
264 | * Note that this does not do any scheme checking, so it is mostly\r | |
265 | * only appropriate for metadata that doesn't care about protocol\r | |
266 | * security. isBenign is probably what you actually want.\r | |
267 | * @param HTMLPurifier_Config $config\r | |
268 | * @param HTMLPurifier_Context $context\r | |
269 | * @return bool\r | |
270 | */\r | |
271 | public function isLocal($config, $context)\r | |
272 | {\r | |
273 | if ($this->host === null) {\r | |
274 | return true;\r | |
275 | }\r | |
276 | $uri_def = $config->getDefinition('URI');\r | |
277 | if ($uri_def->host === $this->host) {\r | |
278 | return true;\r | |
279 | }\r | |
280 | return false;\r | |
281 | }\r | |
282 | \r | |
283 | /**\r | |
284 | * Returns true if this URL should be considered a 'benign' URL,\r | |
285 | * that is:\r | |
286 | *\r | |
287 | * - It is a local URL (isLocal), and\r | |
288 | * - It has a equal or better level of security\r | |
289 | * @param HTMLPurifier_Config $config\r | |
290 | * @param HTMLPurifier_Context $context\r | |
291 | * @return bool\r | |
292 | */\r | |
293 | public function isBenign($config, $context)\r | |
294 | {\r | |
295 | if (!$this->isLocal($config, $context)) {\r | |
296 | return false;\r | |
297 | }\r | |
298 | \r | |
299 | $scheme_obj = $this->getSchemeObj($config, $context);\r | |
300 | if (!$scheme_obj) {\r | |
301 | return false;\r | |
302 | } // conservative approach\r | |
303 | \r | |
304 | $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);\r | |
305 | if ($current_scheme_obj->secure) {\r | |
306 | if (!$scheme_obj->secure) {\r | |
307 | return false;\r | |
308 | }\r | |
309 | }\r | |
310 | return true;\r | |
311 | }\r | |
312 | }\r | |
313 | \r | |
314 | // vim: et sw=4 sts=4\r |