]> git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/htmlpurifier/HTMLPurifier/URI.php
[add] HTML Purifier added to clean code
[github/wallabag/wallabag.git] / inc / 3rdparty / htmlpurifier / HTMLPurifier / URI.php
1 <?php
2
3 /**
4 * HTML Purifier's internal representation of a URI.
5 * @note
6 * Internal data-structures are completely escaped. If the data needs
7 * to be used in a non-URI context (which is very unlikely), be sure
8 * to decode it first. The URI may not necessarily be well-formed until
9 * validate() is called.
10 */
11 class HTMLPurifier_URI
12 {
13 /**
14 * @type string
15 */
16 public $scheme;
17
18 /**
19 * @type string
20 */
21 public $userinfo;
22
23 /**
24 * @type string
25 */
26 public $host;
27
28 /**
29 * @type int
30 */
31 public $port;
32
33 /**
34 * @type string
35 */
36 public $path;
37
38 /**
39 * @type string
40 */
41 public $query;
42
43 /**
44 * @type string
45 */
46 public $fragment;
47
48 /**
49 * @param string $scheme
50 * @param string $userinfo
51 * @param string $host
52 * @param int $port
53 * @param string $path
54 * @param string $query
55 * @param string $fragment
56 * @note Automatically normalizes scheme and port
57 */
58 public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
59 {
60 $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
61 $this->userinfo = $userinfo;
62 $this->host = $host;
63 $this->port = is_null($port) ? $port : (int)$port;
64 $this->path = $path;
65 $this->query = $query;
66 $this->fragment = $fragment;
67 }
68
69 /**
70 * Retrieves a scheme object corresponding to the URI's scheme/default
71 * @param HTMLPurifier_Config $config
72 * @param HTMLPurifier_Context $context
73 * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
74 */
75 public function getSchemeObj($config, $context)
76 {
77 $registry = HTMLPurifier_URISchemeRegistry::instance();
78 if ($this->scheme !== null) {
79 $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
80 if (!$scheme_obj) {
81 return false;
82 } // invalid scheme, clean it out
83 } else {
84 // no scheme: retrieve the default one
85 $def = $config->getDefinition('URI');
86 $scheme_obj = $def->getDefaultScheme($config, $context);
87 if (!$scheme_obj) {
88 // something funky happened to the default scheme object
89 trigger_error(
90 'Default scheme object "' . $def->defaultScheme . '" was not readable',
91 E_USER_WARNING
92 );
93 return false;
94 }
95 }
96 return $scheme_obj;
97 }
98
99 /**
100 * Generic validation method applicable for all schemes. May modify
101 * this URI in order to get it into a compliant form.
102 * @param HTMLPurifier_Config $config
103 * @param HTMLPurifier_Context $context
104 * @return bool True if validation/filtering succeeds, false if failure
105 */
106 public function validate($config, $context)
107 {
108 // ABNF definitions from RFC 3986
109 $chars_sub_delims = '!$&\'()*+,;=';
110 $chars_gen_delims = ':/?#[]@';
111 $chars_pchar = $chars_sub_delims . ':@';
112
113 // validate host
114 if (!is_null($this->host)) {
115 $host_def = new HTMLPurifier_AttrDef_URI_Host();
116 $this->host = $host_def->validate($this->host, $config, $context);
117 if ($this->host === false) {
118 $this->host = null;
119 }
120 }
121
122 // validate scheme
123 // NOTE: It's not appropriate to check whether or not this
124 // scheme is in our registry, since a URIFilter may convert a
125 // URI that we don't allow into one we do. So instead, we just
126 // check if the scheme can be dropped because there is no host
127 // and it is our default scheme.
128 if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
129 // support for relative paths is pretty abysmal when the
130 // scheme is present, so axe it when possible
131 $def = $config->getDefinition('URI');
132 if ($def->defaultScheme === $this->scheme) {
133 $this->scheme = null;
134 }
135 }
136
137 // validate username
138 if (!is_null($this->userinfo)) {
139 $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
140 $this->userinfo = $encoder->encode($this->userinfo);
141 }
142
143 // validate port
144 if (!is_null($this->port)) {
145 if ($this->port < 1 || $this->port > 65535) {
146 $this->port = null;
147 }
148 }
149
150 // validate path
151 $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
152 if (!is_null($this->host)) { // this catches $this->host === ''
153 // path-abempty (hier and relative)
154 // http://www.example.com/my/path
155 // //www.example.com/my/path (looks odd, but works, and
156 // recognized by most browsers)
157 // (this set is valid or invalid on a scheme by scheme
158 // basis, so we'll deal with it later)
159 // file:///my/path
160 // ///my/path
161 $this->path = $segments_encoder->encode($this->path);
162 } elseif ($this->path !== '') {
163 if ($this->path[0] === '/') {
164 // path-absolute (hier and relative)
165 // http:/my/path
166 // /my/path
167 if (strlen($this->path) >= 2 && $this->path[1] === '/') {
168 // This could happen if both the host gets stripped
169 // out
170 // http://my/path
171 // //my/path
172 $this->path = '';
173 } else {
174 $this->path = $segments_encoder->encode($this->path);
175 }
176 } elseif (!is_null($this->scheme)) {
177 // path-rootless (hier)
178 // http:my/path
179 // Short circuit evaluation means we don't need to check nz
180 $this->path = $segments_encoder->encode($this->path);
181 } else {
182 // path-noscheme (relative)
183 // my/path
184 // (once again, not checking nz)
185 $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
186 $c = strpos($this->path, '/');
187 if ($c !== false) {
188 $this->path =
189 $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
190 $segments_encoder->encode(substr($this->path, $c));
191 } else {
192 $this->path = $segment_nc_encoder->encode($this->path);
193 }
194 }
195 } else {
196 // path-empty (hier and relative)
197 $this->path = ''; // just to be safe
198 }
199
200 // qf = query and fragment
201 $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
202
203 if (!is_null($this->query)) {
204 $this->query = $qf_encoder->encode($this->query);
205 }
206
207 if (!is_null($this->fragment)) {
208 $this->fragment = $qf_encoder->encode($this->fragment);
209 }
210 return true;
211 }
212
213 /**
214 * Convert URI back to string
215 * @return string URI appropriate for output
216 */
217 public function toString()
218 {
219 // reconstruct authority
220 $authority = null;
221 // there is a rendering difference between a null authority
222 // (http:foo-bar) and an empty string authority
223 // (http:///foo-bar).
224 if (!is_null($this->host)) {
225 $authority = '';
226 if (!is_null($this->userinfo)) {
227 $authority .= $this->userinfo . '@';
228 }
229 $authority .= $this->host;
230 if (!is_null($this->port)) {
231 $authority .= ':' . $this->port;
232 }
233 }
234
235 // Reconstruct the result
236 // One might wonder about parsing quirks from browsers after
237 // this reconstruction. Unfortunately, parsing behavior depends
238 // on what *scheme* was employed (file:///foo is handled *very*
239 // differently than http:///foo), so unfortunately we have to
240 // defer to the schemes to do the right thing.
241 $result = '';
242 if (!is_null($this->scheme)) {
243 $result .= $this->scheme . ':';
244 }
245 if (!is_null($authority)) {
246 $result .= '//' . $authority;
247 }
248 $result .= $this->path;
249 if (!is_null($this->query)) {
250 $result .= '?' . $this->query;
251 }
252 if (!is_null($this->fragment)) {
253 $result .= '#' . $this->fragment;
254 }
255
256 return $result;
257 }
258
259 /**
260 * Returns true if this URL might be considered a 'local' URL given
261 * the current context. This is true when the host is null, or
262 * when it matches the host supplied to the configuration.
263 *
264 * Note that this does not do any scheme checking, so it is mostly
265 * only appropriate for metadata that doesn't care about protocol
266 * security. isBenign is probably what you actually want.
267 * @param HTMLPurifier_Config $config
268 * @param HTMLPurifier_Context $context
269 * @return bool
270 */
271 public function isLocal($config, $context)
272 {
273 if ($this->host === null) {
274 return true;
275 }
276 $uri_def = $config->getDefinition('URI');
277 if ($uri_def->host === $this->host) {
278 return true;
279 }
280 return false;
281 }
282
283 /**
284 * Returns true if this URL should be considered a 'benign' URL,
285 * that is:
286 *
287 * - It is a local URL (isLocal), and
288 * - It has a equal or better level of security
289 * @param HTMLPurifier_Config $config
290 * @param HTMLPurifier_Context $context
291 * @return bool
292 */
293 public function isBenign($config, $context)
294 {
295 if (!$this->isLocal($config, $context)) {
296 return false;
297 }
298
299 $scheme_obj = $this->getSchemeObj($config, $context);
300 if (!$scheme_obj) {
301 return false;
302 } // conservative approach
303
304 $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
305 if ($current_scheme_obj->secure) {
306 if (!$scheme_obj->secure) {
307 return false;
308 }
309 }
310 return true;
311 }
312 }
313
314 // vim: et sw=4 sts=4