diff options
author | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
---|---|---|
committer | Nicolas LÅ“uillet <nicolas.loeuillet@gmail.com> | 2013-12-23 02:28:56 -0800 |
commit | 60fc4f4b1ab37fbfe9021f3fa1395d66a4424ed2 (patch) | |
tree | 515c4b9c8286ae363f77722c91acb878151dc386 /inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php | |
parent | cbfd5a1019f47fadefd8490dae9f039ae894298d (diff) | |
parent | da5fc42f615eeb45a702604970f94967507fb432 (diff) | |
download | wallabag-1.3.0.tar.gz wallabag-1.3.0.tar.zst wallabag-1.3.0.zip |
Merge pull request #363 from inthepoche/dev1.3.0
poche 1.3.0
Diffstat (limited to 'inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php')
-rw-r--r-- | inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php | 1238 |
1 files changed, 1238 insertions, 0 deletions
diff --git a/inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php b/inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php new file mode 100644 index 00000000..d3198c04 --- /dev/null +++ b/inc/3rdparty/libraries/simplepie/library/SimplePie/IRI.php | |||
@@ -0,0 +1,1238 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * SimplePie | ||
4 | * | ||
5 | * A PHP-Based RSS and Atom Feed Framework. | ||
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | ||
7 | * | ||
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | ||
9 | * All rights reserved. | ||
10 | * | ||
11 | * Redistribution and use in source and binary forms, with or without modification, are | ||
12 | * permitted provided that the following conditions are met: | ||
13 | * | ||
14 | * * Redistributions of source code must retain the above copyright notice, this list of | ||
15 | * conditions and the following disclaimer. | ||
16 | * | ||
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | ||
18 | * of conditions and the following disclaimer in the documentation and/or other materials | ||
19 | * provided with the distribution. | ||
20 | * | ||
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | ||
22 | * to endorse or promote products derived from this software without specific prior | ||
23 | * written permission. | ||
24 | * | ||
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | ||
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | ||
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | ||
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | ||
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | ||
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | ||
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | ||
33 | * POSSIBILITY OF SUCH DAMAGE. | ||
34 | * | ||
35 | * @package SimplePie | ||
36 | * @version 1.3.1 | ||
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | ||
38 | * @author Ryan Parman | ||
39 | * @author Geoffrey Sneddon | ||
40 | * @author Ryan McCue | ||
41 | * @link http://simplepie.org/ SimplePie | ||
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | ||
43 | */ | ||
44 | |||
45 | /** | ||
46 | * IRI parser/serialiser/normaliser | ||
47 | * | ||
48 | * @package SimplePie | ||
49 | * @subpackage HTTP | ||
50 | * @author Geoffrey Sneddon | ||
51 | * @author Steve Minutillo | ||
52 | * @author Ryan McCue | ||
53 | * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue | ||
54 | * @license http://www.opensource.org/licenses/bsd-license.php | ||
55 | */ | ||
56 | class SimplePie_IRI | ||
57 | { | ||
58 | /** | ||
59 | * Scheme | ||
60 | * | ||
61 | * @var string | ||
62 | */ | ||
63 | protected $scheme = null; | ||
64 | |||
65 | /** | ||
66 | * User Information | ||
67 | * | ||
68 | * @var string | ||
69 | */ | ||
70 | protected $iuserinfo = null; | ||
71 | |||
72 | /** | ||
73 | * ihost | ||
74 | * | ||
75 | * @var string | ||
76 | */ | ||
77 | protected $ihost = null; | ||
78 | |||
79 | /** | ||
80 | * Port | ||
81 | * | ||
82 | * @var string | ||
83 | */ | ||
84 | protected $port = null; | ||
85 | |||
86 | /** | ||
87 | * ipath | ||
88 | * | ||
89 | * @var string | ||
90 | */ | ||
91 | protected $ipath = ''; | ||
92 | |||
93 | /** | ||
94 | * iquery | ||
95 | * | ||
96 | * @var string | ||
97 | */ | ||
98 | protected $iquery = null; | ||
99 | |||
100 | /** | ||
101 | * ifragment | ||
102 | * | ||
103 | * @var string | ||
104 | */ | ||
105 | protected $ifragment = null; | ||
106 | |||
107 | /** | ||
108 | * Normalization database | ||
109 | * | ||
110 | * Each key is the scheme, each value is an array with each key as the IRI | ||
111 | * part and value as the default value for that part. | ||
112 | */ | ||
113 | protected $normalization = array( | ||
114 | 'acap' => array( | ||
115 | 'port' => 674 | ||
116 | ), | ||
117 | 'dict' => array( | ||
118 | 'port' => 2628 | ||
119 | ), | ||
120 | 'file' => array( | ||
121 | 'ihost' => 'localhost' | ||
122 | ), | ||
123 | 'http' => array( | ||
124 | 'port' => 80, | ||
125 | 'ipath' => '/' | ||
126 | ), | ||
127 | 'https' => array( | ||
128 | 'port' => 443, | ||
129 | 'ipath' => '/' | ||
130 | ), | ||
131 | ); | ||
132 | |||
133 | /** | ||
134 | * Return the entire IRI when you try and read the object as a string | ||
135 | * | ||
136 | * @return string | ||
137 | */ | ||
138 | public function __toString() | ||
139 | { | ||
140 | return $this->get_iri(); | ||
141 | } | ||
142 | |||
143 | /** | ||
144 | * Overload __set() to provide access via properties | ||
145 | * | ||
146 | * @param string $name Property name | ||
147 | * @param mixed $value Property value | ||
148 | */ | ||
149 | public function __set($name, $value) | ||
150 | { | ||
151 | if (method_exists($this, 'set_' . $name)) | ||
152 | { | ||
153 | call_user_func(array($this, 'set_' . $name), $value); | ||
154 | } | ||
155 | elseif ( | ||
156 | $name === 'iauthority' | ||
157 | || $name === 'iuserinfo' | ||
158 | || $name === 'ihost' | ||
159 | || $name === 'ipath' | ||
160 | || $name === 'iquery' | ||
161 | || $name === 'ifragment' | ||
162 | ) | ||
163 | { | ||
164 | call_user_func(array($this, 'set_' . substr($name, 1)), $value); | ||
165 | } | ||
166 | } | ||
167 | |||
168 | /** | ||
169 | * Overload __get() to provide access via properties | ||
170 | * | ||
171 | * @param string $name Property name | ||
172 | * @return mixed | ||
173 | */ | ||
174 | public function __get($name) | ||
175 | { | ||
176 | // isset() returns false for null, we don't want to do that | ||
177 | // Also why we use array_key_exists below instead of isset() | ||
178 | $props = get_object_vars($this); | ||
179 | |||
180 | if ( | ||
181 | $name === 'iri' || | ||
182 | $name === 'uri' || | ||
183 | $name === 'iauthority' || | ||
184 | $name === 'authority' | ||
185 | ) | ||
186 | { | ||
187 | $return = $this->{"get_$name"}(); | ||
188 | } | ||
189 | elseif (array_key_exists($name, $props)) | ||
190 | { | ||
191 | $return = $this->$name; | ||
192 | } | ||
193 | // host -> ihost | ||
194 | elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) | ||
195 | { | ||
196 | $name = $prop; | ||
197 | $return = $this->$prop; | ||
198 | } | ||
199 | // ischeme -> scheme | ||
200 | elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) | ||
201 | { | ||
202 | $name = $prop; | ||
203 | $return = $this->$prop; | ||
204 | } | ||
205 | else | ||
206 | { | ||
207 | trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); | ||
208 | $return = null; | ||
209 | } | ||
210 | |||
211 | if ($return === null && isset($this->normalization[$this->scheme][$name])) | ||
212 | { | ||
213 | return $this->normalization[$this->scheme][$name]; | ||
214 | } | ||
215 | else | ||
216 | { | ||
217 | return $return; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | /** | ||
222 | * Overload __isset() to provide access via properties | ||
223 | * | ||
224 | * @param string $name Property name | ||
225 | * @return bool | ||
226 | */ | ||
227 | public function __isset($name) | ||
228 | { | ||
229 | if (method_exists($this, 'get_' . $name) || isset($this->$name)) | ||
230 | { | ||
231 | return true; | ||
232 | } | ||
233 | else | ||
234 | { | ||
235 | return false; | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * Overload __unset() to provide access via properties | ||
241 | * | ||
242 | * @param string $name Property name | ||
243 | */ | ||
244 | public function __unset($name) | ||
245 | { | ||
246 | if (method_exists($this, 'set_' . $name)) | ||
247 | { | ||
248 | call_user_func(array($this, 'set_' . $name), ''); | ||
249 | } | ||
250 | } | ||
251 | |||
252 | /** | ||
253 | * Create a new IRI object, from a specified string | ||
254 | * | ||
255 | * @param string $iri | ||
256 | */ | ||
257 | public function __construct($iri = null) | ||
258 | { | ||
259 | $this->set_iri($iri); | ||
260 | } | ||
261 | |||
262 | /** | ||
263 | * Create a new IRI object by resolving a relative IRI | ||
264 | * | ||
265 | * Returns false if $base is not absolute, otherwise an IRI. | ||
266 | * | ||
267 | * @param IRI|string $base (Absolute) Base IRI | ||
268 | * @param IRI|string $relative Relative IRI | ||
269 | * @return IRI|false | ||
270 | */ | ||
271 | public static function absolutize($base, $relative) | ||
272 | { | ||
273 | if (!($relative instanceof SimplePie_IRI)) | ||
274 | { | ||
275 | $relative = new SimplePie_IRI($relative); | ||
276 | } | ||
277 | if (!$relative->is_valid()) | ||
278 | { | ||
279 | return false; | ||
280 | } | ||
281 | elseif ($relative->scheme !== null) | ||
282 | { | ||
283 | return clone $relative; | ||
284 | } | ||
285 | else | ||
286 | { | ||
287 | if (!($base instanceof SimplePie_IRI)) | ||
288 | { | ||
289 | $base = new SimplePie_IRI($base); | ||
290 | } | ||
291 | if ($base->scheme !== null && $base->is_valid()) | ||
292 | { | ||
293 | if ($relative->get_iri() !== '') | ||
294 | { | ||
295 | if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) | ||
296 | { | ||
297 | $target = clone $relative; | ||
298 | $target->scheme = $base->scheme; | ||
299 | } | ||
300 | else | ||
301 | { | ||
302 | $target = new SimplePie_IRI; | ||
303 | $target->scheme = $base->scheme; | ||
304 | $target->iuserinfo = $base->iuserinfo; | ||
305 | $target->ihost = $base->ihost; | ||
306 | $target->port = $base->port; | ||
307 | if ($relative->ipath !== '') | ||
308 | { | ||
309 | if ($relative->ipath[0] === '/') | ||
310 | { | ||
311 | $target->ipath = $relative->ipath; | ||
312 | } | ||
313 | elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') | ||
314 | { | ||
315 | $target->ipath = '/' . $relative->ipath; | ||
316 | } | ||
317 | elseif (($last_segment = strrpos($base->ipath, '/')) !== false) | ||
318 | { | ||
319 | $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; | ||
320 | } | ||
321 | else | ||
322 | { | ||
323 | $target->ipath = $relative->ipath; | ||
324 | } | ||
325 | $target->ipath = $target->remove_dot_segments($target->ipath); | ||
326 | $target->iquery = $relative->iquery; | ||
327 | } | ||
328 | else | ||
329 | { | ||
330 | $target->ipath = $base->ipath; | ||
331 | if ($relative->iquery !== null) | ||
332 | { | ||
333 | $target->iquery = $relative->iquery; | ||
334 | } | ||
335 | elseif ($base->iquery !== null) | ||
336 | { | ||
337 | $target->iquery = $base->iquery; | ||
338 | } | ||
339 | } | ||
340 | $target->ifragment = $relative->ifragment; | ||
341 | } | ||
342 | } | ||
343 | else | ||
344 | { | ||
345 | $target = clone $base; | ||
346 | $target->ifragment = null; | ||
347 | } | ||
348 | $target->scheme_normalization(); | ||
349 | return $target; | ||
350 | } | ||
351 | else | ||
352 | { | ||
353 | return false; | ||
354 | } | ||
355 | } | ||
356 | } | ||
357 | |||
358 | /** | ||
359 | * Parse an IRI into scheme/authority/path/query/fragment segments | ||
360 | * | ||
361 | * @param string $iri | ||
362 | * @return array | ||
363 | */ | ||
364 | protected function parse_iri($iri) | ||
365 | { | ||
366 | $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); | ||
367 | if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) | ||
368 | { | ||
369 | if ($match[1] === '') | ||
370 | { | ||
371 | $match['scheme'] = null; | ||
372 | } | ||
373 | if (!isset($match[3]) || $match[3] === '') | ||
374 | { | ||
375 | $match['authority'] = null; | ||
376 | } | ||
377 | if (!isset($match[5])) | ||
378 | { | ||
379 | $match['path'] = ''; | ||
380 | } | ||
381 | if (!isset($match[6]) || $match[6] === '') | ||
382 | { | ||
383 | $match['query'] = null; | ||
384 | } | ||
385 | if (!isset($match[8]) || $match[8] === '') | ||
386 | { | ||
387 | $match['fragment'] = null; | ||
388 | } | ||
389 | return $match; | ||
390 | } | ||
391 | else | ||
392 | { | ||
393 | // This can occur when a paragraph is accidentally parsed as a URI | ||
394 | return false; | ||
395 | } | ||
396 | } | ||
397 | |||
398 | /** | ||
399 | * Remove dot segments from a path | ||
400 | * | ||
401 | * @param string $input | ||
402 | * @return string | ||
403 | */ | ||
404 | protected function remove_dot_segments($input) | ||
405 | { | ||
406 | $output = ''; | ||
407 | while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') | ||
408 | { | ||
409 | // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, | ||
410 | if (strpos($input, '../') === 0) | ||
411 | { | ||
412 | $input = substr($input, 3); | ||
413 | } | ||
414 | elseif (strpos($input, './') === 0) | ||
415 | { | ||
416 | $input = substr($input, 2); | ||
417 | } | ||
418 | // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, | ||
419 | elseif (strpos($input, '/./') === 0) | ||
420 | { | ||
421 | $input = substr($input, 2); | ||
422 | } | ||
423 | elseif ($input === '/.') | ||
424 | { | ||
425 | $input = '/'; | ||
426 | } | ||
427 | // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, | ||
428 | elseif (strpos($input, '/../') === 0) | ||
429 | { | ||
430 | $input = substr($input, 3); | ||
431 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
432 | } | ||
433 | elseif ($input === '/..') | ||
434 | { | ||
435 | $input = '/'; | ||
436 | $output = substr_replace($output, '', strrpos($output, '/')); | ||
437 | } | ||
438 | // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, | ||
439 | elseif ($input === '.' || $input === '..') | ||
440 | { | ||
441 | $input = ''; | ||
442 | } | ||
443 | // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer | ||
444 | elseif (($pos = strpos($input, '/', 1)) !== false) | ||
445 | { | ||
446 | $output .= substr($input, 0, $pos); | ||
447 | $input = substr_replace($input, '', 0, $pos); | ||
448 | } | ||
449 | else | ||
450 | { | ||
451 | $output .= $input; | ||
452 | $input = ''; | ||
453 | } | ||
454 | } | ||
455 | return $output . $input; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * Replace invalid character with percent encoding | ||
460 | * | ||
461 | * @param string $string Input string | ||
462 | * @param string $extra_chars Valid characters not in iunreserved or | ||
463 | * iprivate (this is ASCII-only) | ||
464 | * @param bool $iprivate Allow iprivate | ||
465 | * @return string | ||
466 | */ | ||
467 | protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) | ||
468 | { | ||
469 | // Normalize as many pct-encoded sections as possible | ||
470 | $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string); | ||
471 | |||
472 | // Replace invalid percent characters | ||
473 | $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); | ||
474 | |||
475 | // Add unreserved and % to $extra_chars (the latter is safe because all | ||
476 | // pct-encoded sections are now valid). | ||
477 | $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; | ||
478 | |||
479 | // Now replace any bytes that aren't allowed with their pct-encoded versions | ||
480 | $position = 0; | ||
481 | $strlen = strlen($string); | ||
482 | while (($position += strspn($string, $extra_chars, $position)) < $strlen) | ||
483 | { | ||
484 | $value = ord($string[$position]); | ||
485 | |||
486 | // Start position | ||
487 | $start = $position; | ||
488 | |||
489 | // By default we are valid | ||
490 | $valid = true; | ||
491 | |||
492 | // No one byte sequences are valid due to the while. | ||
493 | // Two byte sequence: | ||
494 | if (($value & 0xE0) === 0xC0) | ||
495 | { | ||
496 | $character = ($value & 0x1F) << 6; | ||
497 | $length = 2; | ||
498 | $remaining = 1; | ||
499 | } | ||
500 | // Three byte sequence: | ||
501 | elseif (($value & 0xF0) === 0xE0) | ||
502 | { | ||
503 | $character = ($value & 0x0F) << 12; | ||
504 | $length = 3; | ||
505 | $remaining = 2; | ||
506 | } | ||
507 | // Four byte sequence: | ||
508 | elseif (($value & 0xF8) === 0xF0) | ||
509 | { | ||
510 | $character = ($value & 0x07) << 18; | ||
511 | $length = 4; | ||
512 | $remaining = 3; | ||
513 | } | ||
514 | // Invalid byte: | ||
515 | else | ||
516 | { | ||
517 | $valid = false; | ||
518 | $length = 1; | ||
519 | $remaining = 0; | ||
520 | } | ||
521 | |||
522 | if ($remaining) | ||
523 | { | ||
524 | if ($position + $length <= $strlen) | ||
525 | { | ||
526 | for ($position++; $remaining; $position++) | ||
527 | { | ||
528 | $value = ord($string[$position]); | ||
529 | |||
530 | // Check that the byte is valid, then add it to the character: | ||
531 | if (($value & 0xC0) === 0x80) | ||
532 | { | ||
533 | $character |= ($value & 0x3F) << (--$remaining * 6); | ||
534 | } | ||
535 | // If it is invalid, count the sequence as invalid and reprocess the current byte: | ||
536 | else | ||
537 | { | ||
538 | $valid = false; | ||
539 | $position--; | ||
540 | break; | ||
541 | } | ||
542 | } | ||
543 | } | ||
544 | else | ||
545 | { | ||
546 | $position = $strlen - 1; | ||
547 | $valid = false; | ||
548 | } | ||
549 | } | ||
550 | |||
551 | // Percent encode anything invalid or not in ucschar | ||
552 | if ( | ||
553 | // Invalid sequences | ||
554 | !$valid | ||
555 | // Non-shortest form sequences are invalid | ||
556 | || $length > 1 && $character <= 0x7F | ||
557 | || $length > 2 && $character <= 0x7FF | ||
558 | || $length > 3 && $character <= 0xFFFF | ||
559 | // Outside of range of ucschar codepoints | ||
560 | // Noncharacters | ||
561 | || ($character & 0xFFFE) === 0xFFFE | ||
562 | || $character >= 0xFDD0 && $character <= 0xFDEF | ||
563 | || ( | ||
564 | // Everything else not in ucschar | ||
565 | $character > 0xD7FF && $character < 0xF900 | ||
566 | || $character < 0xA0 | ||
567 | || $character > 0xEFFFD | ||
568 | ) | ||
569 | && ( | ||
570 | // Everything not in iprivate, if it applies | ||
571 | !$iprivate | ||
572 | || $character < 0xE000 | ||
573 | || $character > 0x10FFFD | ||
574 | ) | ||
575 | ) | ||
576 | { | ||
577 | // If we were a character, pretend we weren't, but rather an error. | ||
578 | if ($valid) | ||
579 | $position--; | ||
580 | |||
581 | for ($j = $start; $j <= $position; $j++) | ||
582 | { | ||
583 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); | ||
584 | $j += 2; | ||
585 | $position += 2; | ||
586 | $strlen += 2; | ||
587 | } | ||
588 | } | ||
589 | } | ||
590 | |||
591 | return $string; | ||
592 | } | ||
593 | |||
594 | /** | ||
595 | * Callback function for preg_replace_callback. | ||
596 | * | ||
597 | * Removes sequences of percent encoded bytes that represent UTF-8 | ||
598 | * encoded characters in iunreserved | ||
599 | * | ||
600 | * @param array $match PCRE match | ||
601 | * @return string Replacement | ||
602 | */ | ||
603 | protected function remove_iunreserved_percent_encoded($match) | ||
604 | { | ||
605 | // As we just have valid percent encoded sequences we can just explode | ||
606 | // and ignore the first member of the returned array (an empty string). | ||
607 | $bytes = explode('%', $match[0]); | ||
608 | |||
609 | // Initialize the new string (this is what will be returned) and that | ||
610 | // there are no bytes remaining in the current sequence (unsurprising | ||
611 | // at the first byte!). | ||
612 | $string = ''; | ||
613 | $remaining = 0; | ||
614 | |||
615 | // Loop over each and every byte, and set $value to its value | ||
616 | for ($i = 1, $len = count($bytes); $i < $len; $i++) | ||
617 | { | ||
618 | $value = hexdec($bytes[$i]); | ||
619 | |||
620 | // If we're the first byte of sequence: | ||
621 | if (!$remaining) | ||
622 | { | ||
623 | // Start position | ||
624 | $start = $i; | ||
625 | |||
626 | // By default we are valid | ||
627 | $valid = true; | ||
628 | |||
629 | // One byte sequence: | ||
630 | if ($value <= 0x7F) | ||
631 | { | ||
632 | $character = $value; | ||
633 | $length = 1; | ||
634 | } | ||
635 | // Two byte sequence: | ||
636 | elseif (($value & 0xE0) === 0xC0) | ||
637 | { | ||
638 | $character = ($value & 0x1F) << 6; | ||
639 | $length = 2; | ||
640 | $remaining = 1; | ||
641 | } | ||
642 | // Three byte sequence: | ||
643 | elseif (($value & 0xF0) === 0xE0) | ||
644 | { | ||
645 | $character = ($value & 0x0F) << 12; | ||
646 | $length = 3; | ||
647 | $remaining = 2; | ||
648 | } | ||
649 | // Four byte sequence: | ||
650 | elseif (($value & 0xF8) === 0xF0) | ||
651 | { | ||
652 | $character = ($value & 0x07) << 18; | ||
653 | $length = 4; | ||
654 | $remaining = 3; | ||
655 | } | ||
656 | // Invalid byte: | ||
657 | else | ||
658 | { | ||
659 | $valid = false; | ||
660 | $remaining = 0; | ||
661 | } | ||
662 | } | ||
663 | // Continuation byte: | ||
664 | else | ||
665 | { | ||
666 | // Check that the byte is valid, then add it to the character: | ||
667 | if (($value & 0xC0) === 0x80) | ||
668 | { | ||
669 | $remaining--; | ||
670 | $character |= ($value & 0x3F) << ($remaining * 6); | ||
671 | } | ||
672 | // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: | ||
673 | else | ||
674 | { | ||
675 | $valid = false; | ||
676 | $remaining = 0; | ||
677 | $i--; | ||
678 | } | ||
679 | } | ||
680 | |||
681 | // If we've reached the end of the current byte sequence, append it to Unicode::$data | ||
682 | if (!$remaining) | ||
683 | { | ||
684 | // Percent encode anything invalid or not in iunreserved | ||
685 | if ( | ||
686 | // Invalid sequences | ||
687 | !$valid | ||
688 | // Non-shortest form sequences are invalid | ||
689 | || $length > 1 && $character <= 0x7F | ||
690 | || $length > 2 && $character <= 0x7FF | ||
691 | || $length > 3 && $character <= 0xFFFF | ||
692 | // Outside of range of iunreserved codepoints | ||
693 | || $character < 0x2D | ||
694 | || $character > 0xEFFFD | ||
695 | // Noncharacters | ||
696 | || ($character & 0xFFFE) === 0xFFFE | ||
697 | || $character >= 0xFDD0 && $character <= 0xFDEF | ||
698 | // Everything else not in iunreserved (this is all BMP) | ||
699 | || $character === 0x2F | ||
700 | || $character > 0x39 && $character < 0x41 | ||
701 | || $character > 0x5A && $character < 0x61 | ||
702 | || $character > 0x7A && $character < 0x7E | ||
703 | || $character > 0x7E && $character < 0xA0 | ||
704 | || $character > 0xD7FF && $character < 0xF900 | ||
705 | ) | ||
706 | { | ||
707 | for ($j = $start; $j <= $i; $j++) | ||
708 | { | ||
709 | $string .= '%' . strtoupper($bytes[$j]); | ||
710 | } | ||
711 | } | ||
712 | else | ||
713 | { | ||
714 | for ($j = $start; $j <= $i; $j++) | ||
715 | { | ||
716 | $string .= chr(hexdec($bytes[$j])); | ||
717 | } | ||
718 | } | ||
719 | } | ||
720 | } | ||
721 | |||
722 | // If we have any bytes left over they are invalid (i.e., we are | ||
723 | // mid-way through a multi-byte sequence) | ||
724 | if ($remaining) | ||
725 | { | ||
726 | for ($j = $start; $j < $len; $j++) | ||
727 | { | ||
728 | $string .= '%' . strtoupper($bytes[$j]); | ||
729 | } | ||
730 | } | ||
731 | |||
732 | return $string; | ||
733 | } | ||
734 | |||
735 | protected function scheme_normalization() | ||
736 | { | ||
737 | if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) | ||
738 | { | ||
739 | $this->iuserinfo = null; | ||
740 | } | ||
741 | if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) | ||
742 | { | ||
743 | $this->ihost = null; | ||
744 | } | ||
745 | if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) | ||
746 | { | ||
747 | $this->port = null; | ||
748 | } | ||
749 | if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) | ||
750 | { | ||
751 | $this->ipath = ''; | ||
752 | } | ||
753 | if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) | ||
754 | { | ||
755 | $this->iquery = null; | ||
756 | } | ||
757 | if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) | ||
758 | { | ||
759 | $this->ifragment = null; | ||
760 | } | ||
761 | } | ||
762 | |||
763 | /** | ||
764 | * Check if the object represents a valid IRI. This needs to be done on each | ||
765 | * call as some things change depending on another part of the IRI. | ||
766 | * | ||
767 | * @return bool | ||
768 | */ | ||
769 | public function is_valid() | ||
770 | { | ||
771 | $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; | ||
772 | if ($this->ipath !== '' && | ||
773 | ( | ||
774 | $isauthority && ( | ||
775 | $this->ipath[0] !== '/' || | ||
776 | substr($this->ipath, 0, 2) === '//' | ||
777 | ) || | ||
778 | ( | ||
779 | $this->scheme === null && | ||
780 | !$isauthority && | ||
781 | strpos($this->ipath, ':') !== false && | ||
782 | (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) | ||
783 | ) | ||
784 | ) | ||
785 | ) | ||
786 | { | ||
787 | return false; | ||
788 | } | ||
789 | |||
790 | return true; | ||
791 | } | ||
792 | |||
793 | /** | ||
794 | * Set the entire IRI. Returns true on success, false on failure (if there | ||
795 | * are any invalid characters). | ||
796 | * | ||
797 | * @param string $iri | ||
798 | * @return bool | ||
799 | */ | ||
800 | public function set_iri($iri) | ||
801 | { | ||
802 | static $cache; | ||
803 | if (!$cache) | ||
804 | { | ||
805 | $cache = array(); | ||
806 | } | ||
807 | |||
808 | if ($iri === null) | ||
809 | { | ||
810 | return true; | ||
811 | } | ||
812 | elseif (isset($cache[$iri])) | ||
813 | { | ||
814 | list($this->scheme, | ||
815 | $this->iuserinfo, | ||
816 | $this->ihost, | ||
817 | $this->port, | ||
818 | $this->ipath, | ||
819 | $this->iquery, | ||
820 | $this->ifragment, | ||
821 | $return) = $cache[$iri]; | ||
822 | return $return; | ||
823 | } | ||
824 | else | ||
825 | { | ||
826 | $parsed = $this->parse_iri((string) $iri); | ||
827 | if (!$parsed) | ||
828 | { | ||
829 | return false; | ||
830 | } | ||
831 | |||
832 | $return = $this->set_scheme($parsed['scheme']) | ||
833 | && $this->set_authority($parsed['authority']) | ||
834 | && $this->set_path($parsed['path']) | ||
835 | && $this->set_query($parsed['query']) | ||
836 | && $this->set_fragment($parsed['fragment']); | ||
837 | |||
838 | $cache[$iri] = array($this->scheme, | ||
839 | $this->iuserinfo, | ||
840 | $this->ihost, | ||
841 | $this->port, | ||
842 | $this->ipath, | ||
843 | $this->iquery, | ||
844 | $this->ifragment, | ||
845 | $return); | ||
846 | return $return; | ||
847 | } | ||
848 | } | ||
849 | |||
850 | /** | ||
851 | * Set the scheme. Returns true on success, false on failure (if there are | ||
852 | * any invalid characters). | ||
853 | * | ||
854 | * @param string $scheme | ||
855 | * @return bool | ||
856 | */ | ||
857 | public function set_scheme($scheme) | ||
858 | { | ||
859 | if ($scheme === null) | ||
860 | { | ||
861 | $this->scheme = null; | ||
862 | } | ||
863 | elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) | ||
864 | { | ||
865 | $this->scheme = null; | ||
866 | return false; | ||
867 | } | ||
868 | else | ||
869 | { | ||
870 | $this->scheme = strtolower($scheme); | ||
871 | } | ||
872 | return true; | ||
873 | } | ||
874 | |||
875 | /** | ||
876 | * Set the authority. Returns true on success, false on failure (if there are | ||
877 | * any invalid characters). | ||
878 | * | ||
879 | * @param string $authority | ||
880 | * @return bool | ||
881 | */ | ||
882 | public function set_authority($authority) | ||
883 | { | ||
884 | static $cache; | ||
885 | if (!$cache) | ||
886 | $cache = array(); | ||
887 | |||
888 | if ($authority === null) | ||
889 | { | ||
890 | $this->iuserinfo = null; | ||
891 | $this->ihost = null; | ||
892 | $this->port = null; | ||
893 | return true; | ||
894 | } | ||
895 | elseif (isset($cache[$authority])) | ||
896 | { | ||
897 | list($this->iuserinfo, | ||
898 | $this->ihost, | ||
899 | $this->port, | ||
900 | $return) = $cache[$authority]; | ||
901 | |||
902 | return $return; | ||
903 | } | ||
904 | else | ||
905 | { | ||
906 | $remaining = $authority; | ||
907 | if (($iuserinfo_end = strrpos($remaining, '@')) !== false) | ||
908 | { | ||
909 | $iuserinfo = substr($remaining, 0, $iuserinfo_end); | ||
910 | $remaining = substr($remaining, $iuserinfo_end + 1); | ||
911 | } | ||
912 | else | ||
913 | { | ||
914 | $iuserinfo = null; | ||
915 | } | ||
916 | if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) | ||
917 | { | ||
918 | if (($port = substr($remaining, $port_start + 1)) === false) | ||
919 | { | ||
920 | $port = null; | ||
921 | } | ||
922 | $remaining = substr($remaining, 0, $port_start); | ||
923 | } | ||
924 | else | ||
925 | { | ||
926 | $port = null; | ||
927 | } | ||
928 | |||
929 | $return = $this->set_userinfo($iuserinfo) && | ||
930 | $this->set_host($remaining) && | ||
931 | $this->set_port($port); | ||
932 | |||
933 | $cache[$authority] = array($this->iuserinfo, | ||
934 | $this->ihost, | ||
935 | $this->port, | ||
936 | $return); | ||
937 | |||
938 | return $return; | ||
939 | } | ||
940 | } | ||
941 | |||
942 | /** | ||
943 | * Set the iuserinfo. | ||
944 | * | ||
945 | * @param string $iuserinfo | ||
946 | * @return bool | ||
947 | */ | ||
948 | public function set_userinfo($iuserinfo) | ||
949 | { | ||
950 | if ($iuserinfo === null) | ||
951 | { | ||
952 | $this->iuserinfo = null; | ||
953 | } | ||
954 | else | ||
955 | { | ||
956 | $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); | ||
957 | $this->scheme_normalization(); | ||
958 | } | ||
959 | |||
960 | return true; | ||
961 | } | ||
962 | |||
963 | /** | ||
964 | * Set the ihost. Returns true on success, false on failure (if there are | ||
965 | * any invalid characters). | ||
966 | * | ||
967 | * @param string $ihost | ||
968 | * @return bool | ||
969 | */ | ||
970 | public function set_host($ihost) | ||
971 | { | ||
972 | if ($ihost === null) | ||
973 | { | ||
974 | $this->ihost = null; | ||
975 | return true; | ||
976 | } | ||
977 | elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') | ||
978 | { | ||
979 | if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1))) | ||
980 | { | ||
981 | $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']'; | ||
982 | } | ||
983 | else | ||
984 | { | ||
985 | $this->ihost = null; | ||
986 | return false; | ||
987 | } | ||
988 | } | ||
989 | else | ||
990 | { | ||
991 | $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); | ||
992 | |||
993 | // Lowercase, but ignore pct-encoded sections (as they should | ||
994 | // remain uppercase). This must be done after the previous step | ||
995 | // as that can add unescaped characters. | ||
996 | $position = 0; | ||
997 | $strlen = strlen($ihost); | ||
998 | while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) | ||
999 | { | ||
1000 | if ($ihost[$position] === '%') | ||
1001 | { | ||
1002 | $position += 3; | ||
1003 | } | ||
1004 | else | ||
1005 | { | ||
1006 | $ihost[$position] = strtolower($ihost[$position]); | ||
1007 | $position++; | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | $this->ihost = $ihost; | ||
1012 | } | ||
1013 | |||
1014 | $this->scheme_normalization(); | ||
1015 | |||
1016 | return true; | ||
1017 | } | ||
1018 | |||
1019 | /** | ||
1020 | * Set the port. Returns true on success, false on failure (if there are | ||
1021 | * any invalid characters). | ||
1022 | * | ||
1023 | * @param string $port | ||
1024 | * @return bool | ||
1025 | */ | ||
1026 | public function set_port($port) | ||
1027 | { | ||
1028 | if ($port === null) | ||
1029 | { | ||
1030 | $this->port = null; | ||
1031 | return true; | ||
1032 | } | ||
1033 | elseif (strspn($port, '0123456789') === strlen($port)) | ||
1034 | { | ||
1035 | $this->port = (int) $port; | ||
1036 | $this->scheme_normalization(); | ||
1037 | return true; | ||
1038 | } | ||
1039 | else | ||
1040 | { | ||
1041 | $this->port = null; | ||
1042 | return false; | ||
1043 | } | ||
1044 | } | ||
1045 | |||
1046 | /** | ||
1047 | * Set the ipath. | ||
1048 | * | ||
1049 | * @param string $ipath | ||
1050 | * @return bool | ||
1051 | */ | ||
1052 | public function set_path($ipath) | ||
1053 | { | ||
1054 | static $cache; | ||
1055 | if (!$cache) | ||
1056 | { | ||
1057 | $cache = array(); | ||
1058 | } | ||
1059 | |||
1060 | $ipath = (string) $ipath; | ||
1061 | |||
1062 | if (isset($cache[$ipath])) | ||
1063 | { | ||
1064 | $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; | ||
1065 | } | ||
1066 | else | ||
1067 | { | ||
1068 | $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); | ||
1069 | $removed = $this->remove_dot_segments($valid); | ||
1070 | |||
1071 | $cache[$ipath] = array($valid, $removed); | ||
1072 | $this->ipath = ($this->scheme !== null) ? $removed : $valid; | ||
1073 | } | ||
1074 | |||
1075 | $this->scheme_normalization(); | ||
1076 | return true; | ||
1077 | } | ||
1078 | |||
1079 | /** | ||
1080 | * Set the iquery. | ||
1081 | * | ||
1082 | * @param string $iquery | ||
1083 | * @return bool | ||
1084 | */ | ||
1085 | public function set_query($iquery) | ||
1086 | { | ||
1087 | if ($iquery === null) | ||
1088 | { | ||
1089 | $this->iquery = null; | ||
1090 | } | ||
1091 | else | ||
1092 | { | ||
1093 | $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); | ||
1094 | $this->scheme_normalization(); | ||
1095 | } | ||
1096 | return true; | ||
1097 | } | ||
1098 | |||
1099 | /** | ||
1100 | * Set the ifragment. | ||
1101 | * | ||
1102 | * @param string $ifragment | ||
1103 | * @return bool | ||
1104 | */ | ||
1105 | public function set_fragment($ifragment) | ||
1106 | { | ||
1107 | if ($ifragment === null) | ||
1108 | { | ||
1109 | $this->ifragment = null; | ||
1110 | } | ||
1111 | else | ||
1112 | { | ||
1113 | $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); | ||
1114 | $this->scheme_normalization(); | ||
1115 | } | ||
1116 | return true; | ||
1117 | } | ||
1118 | |||
1119 | /** | ||
1120 | * Convert an IRI to a URI (or parts thereof) | ||
1121 | * | ||
1122 | * @return string | ||
1123 | */ | ||
1124 | public function to_uri($string) | ||
1125 | { | ||
1126 | static $non_ascii; | ||
1127 | if (!$non_ascii) | ||
1128 | { | ||
1129 | $non_ascii = implode('', range("\x80", "\xFF")); | ||
1130 | } | ||
1131 | |||
1132 | $position = 0; | ||
1133 | $strlen = strlen($string); | ||
1134 | while (($position += strcspn($string, $non_ascii, $position)) < $strlen) | ||
1135 | { | ||
1136 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); | ||
1137 | $position += 3; | ||
1138 | $strlen += 2; | ||
1139 | } | ||
1140 | |||
1141 | return $string; | ||
1142 | } | ||
1143 | |||
1144 | /** | ||
1145 | * Get the complete IRI | ||
1146 | * | ||
1147 | * @return string | ||
1148 | */ | ||
1149 | public function get_iri() | ||
1150 | { | ||
1151 | if (!$this->is_valid()) | ||
1152 | { | ||
1153 | return false; | ||
1154 | } | ||
1155 | |||
1156 | $iri = ''; | ||
1157 | if ($this->scheme !== null) | ||
1158 | { | ||
1159 | $iri .= $this->scheme . ':'; | ||
1160 | } | ||
1161 | if (($iauthority = $this->get_iauthority()) !== null) | ||
1162 | { | ||
1163 | $iri .= '//' . $iauthority; | ||
1164 | } | ||
1165 | if ($this->ipath !== '') | ||
1166 | { | ||
1167 | $iri .= $this->ipath; | ||
1168 | } | ||
1169 | elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') | ||
1170 | { | ||
1171 | $iri .= $this->normalization[$this->scheme]['ipath']; | ||
1172 | } | ||
1173 | if ($this->iquery !== null) | ||
1174 | { | ||
1175 | $iri .= '?' . $this->iquery; | ||
1176 | } | ||
1177 | if ($this->ifragment !== null) | ||
1178 | { | ||
1179 | $iri .= '#' . $this->ifragment; | ||
1180 | } | ||
1181 | |||
1182 | return $iri; | ||
1183 | } | ||
1184 | |||
1185 | /** | ||
1186 | * Get the complete URI | ||
1187 | * | ||
1188 | * @return string | ||
1189 | */ | ||
1190 | public function get_uri() | ||
1191 | { | ||
1192 | return $this->to_uri($this->get_iri()); | ||
1193 | } | ||
1194 | |||
1195 | /** | ||
1196 | * Get the complete iauthority | ||
1197 | * | ||
1198 | * @return string | ||
1199 | */ | ||
1200 | protected function get_iauthority() | ||
1201 | { | ||
1202 | if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) | ||
1203 | { | ||
1204 | $iauthority = ''; | ||
1205 | if ($this->iuserinfo !== null) | ||
1206 | { | ||
1207 | $iauthority .= $this->iuserinfo . '@'; | ||
1208 | } | ||
1209 | if ($this->ihost !== null) | ||
1210 | { | ||
1211 | $iauthority .= $this->ihost; | ||
1212 | } | ||
1213 | if ($this->port !== null) | ||
1214 | { | ||
1215 | $iauthority .= ':' . $this->port; | ||
1216 | } | ||
1217 | return $iauthority; | ||
1218 | } | ||
1219 | else | ||
1220 | { | ||
1221 | return null; | ||
1222 | } | ||
1223 | } | ||
1224 | |||
1225 | /** | ||
1226 | * Get the complete authority | ||
1227 | * | ||
1228 | * @return string | ||
1229 | */ | ||
1230 | protected function get_authority() | ||
1231 | { | ||
1232 | $iauthority = $this->get_iauthority(); | ||
1233 | if (is_string($iauthority)) | ||
1234 | return $this->to_uri($iauthority); | ||
1235 | else | ||
1236 | return $iauthority; | ||
1237 | } | ||
1238 | } | ||