]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | /** | |
3 | * SimplePie | |
4 | * | |
5 | * A PHP-Based RSS and Atom Feed Framework. | |
6 | * Takes the hard work out of managing a complete RSS/Atom solution. | |
7 | * | |
8 | * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors | |
9 | * All rights reserved. | |
10 | * | |
11 | * Redistribution and use in source and binary forms, with or without modification, are | |
12 | * permitted provided that the following conditions are met: | |
13 | * | |
14 | * * Redistributions of source code must retain the above copyright notice, this list of | |
15 | * conditions and the following disclaimer. | |
16 | * | |
17 | * * Redistributions in binary form must reproduce the above copyright notice, this list | |
18 | * of conditions and the following disclaimer in the documentation and/or other materials | |
19 | * provided with the distribution. | |
20 | * | |
21 | * * Neither the name of the SimplePie Team nor the names of its contributors may be used | |
22 | * to endorse or promote products derived from this software without specific prior | |
23 | * written permission. | |
24 | * | |
25 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | |
26 | * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
27 | * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | |
28 | * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
29 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
30 | * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
31 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
32 | * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
33 | * POSSIBILITY OF SUCH DAMAGE. | |
34 | * | |
35 | * @package SimplePie | |
36 | * @version 1.3.1 | |
37 | * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue | |
38 | * @author Ryan Parman | |
39 | * @author Geoffrey Sneddon | |
40 | * @author Ryan McCue | |
41 | * @link http://simplepie.org/ SimplePie | |
42 | * @license http://www.opensource.org/licenses/bsd-license.php BSD License | |
43 | */ | |
44 | ||
45 | /** | |
46 | * IRI parser/serialiser/normaliser | |
47 | * | |
48 | * @package SimplePie | |
49 | * @subpackage HTTP | |
50 | * @author Geoffrey Sneddon | |
51 | * @author Steve Minutillo | |
52 | * @author Ryan McCue | |
53 | * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue | |
54 | * @license http://www.opensource.org/licenses/bsd-license.php | |
55 | */ | |
56 | class SimplePie_IRI | |
57 | { | |
58 | /** | |
59 | * Scheme | |
60 | * | |
61 | * @var string | |
62 | */ | |
63 | protected $scheme = null; | |
64 | ||
65 | /** | |
66 | * User Information | |
67 | * | |
68 | * @var string | |
69 | */ | |
70 | protected $iuserinfo = null; | |
71 | ||
72 | /** | |
73 | * ihost | |
74 | * | |
75 | * @var string | |
76 | */ | |
77 | protected $ihost = null; | |
78 | ||
79 | /** | |
80 | * Port | |
81 | * | |
82 | * @var string | |
83 | */ | |
84 | protected $port = null; | |
85 | ||
86 | /** | |
87 | * ipath | |
88 | * | |
89 | * @var string | |
90 | */ | |
91 | protected $ipath = ''; | |
92 | ||
93 | /** | |
94 | * iquery | |
95 | * | |
96 | * @var string | |
97 | */ | |
98 | protected $iquery = null; | |
99 | ||
100 | /** | |
101 | * ifragment | |
102 | * | |
103 | * @var string | |
104 | */ | |
105 | protected $ifragment = null; | |
106 | ||
107 | /** | |
108 | * Normalization database | |
109 | * | |
110 | * Each key is the scheme, each value is an array with each key as the IRI | |
111 | * part and value as the default value for that part. | |
112 | */ | |
113 | protected $normalization = array( | |
114 | 'acap' => array( | |
115 | 'port' => 674 | |
116 | ), | |
117 | 'dict' => array( | |
118 | 'port' => 2628 | |
119 | ), | |
120 | 'file' => array( | |
121 | 'ihost' => 'localhost' | |
122 | ), | |
123 | 'http' => array( | |
124 | 'port' => 80, | |
125 | 'ipath' => '/' | |
126 | ), | |
127 | 'https' => array( | |
128 | 'port' => 443, | |
129 | 'ipath' => '/' | |
130 | ), | |
131 | ); | |
132 | ||
133 | /** | |
134 | * Return the entire IRI when you try and read the object as a string | |
135 | * | |
136 | * @return string | |
137 | */ | |
138 | public function __toString() | |
139 | { | |
140 | return $this->get_iri(); | |
141 | } | |
142 | ||
143 | /** | |
144 | * Overload __set() to provide access via properties | |
145 | * | |
146 | * @param string $name Property name | |
147 | * @param mixed $value Property value | |
148 | */ | |
149 | public function __set($name, $value) | |
150 | { | |
151 | if (method_exists($this, 'set_' . $name)) | |
152 | { | |
153 | call_user_func(array($this, 'set_' . $name), $value); | |
154 | } | |
155 | elseif ( | |
156 | $name === 'iauthority' | |
157 | || $name === 'iuserinfo' | |
158 | || $name === 'ihost' | |
159 | || $name === 'ipath' | |
160 | || $name === 'iquery' | |
161 | || $name === 'ifragment' | |
162 | ) | |
163 | { | |
164 | call_user_func(array($this, 'set_' . substr($name, 1)), $value); | |
165 | } | |
166 | } | |
167 | ||
168 | /** | |
169 | * Overload __get() to provide access via properties | |
170 | * | |
171 | * @param string $name Property name | |
172 | * @return mixed | |
173 | */ | |
174 | public function __get($name) | |
175 | { | |
176 | // isset() returns false for null, we don't want to do that | |
177 | // Also why we use array_key_exists below instead of isset() | |
178 | $props = get_object_vars($this); | |
179 | ||
180 | if ( | |
181 | $name === 'iri' || | |
182 | $name === 'uri' || | |
183 | $name === 'iauthority' || | |
184 | $name === 'authority' | |
185 | ) | |
186 | { | |
187 | $return = $this->{"get_$name"}(); | |
188 | } | |
189 | elseif (array_key_exists($name, $props)) | |
190 | { | |
191 | $return = $this->$name; | |
192 | } | |
193 | // host -> ihost | |
194 | elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) | |
195 | { | |
196 | $name = $prop; | |
197 | $return = $this->$prop; | |
198 | } | |
199 | // ischeme -> scheme | |
200 | elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) | |
201 | { | |
202 | $name = $prop; | |
203 | $return = $this->$prop; | |
204 | } | |
205 | else | |
206 | { | |
207 | trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE); | |
208 | $return = null; | |
209 | } | |
210 | ||
211 | if ($return === null && isset($this->normalization[$this->scheme][$name])) | |
212 | { | |
213 | return $this->normalization[$this->scheme][$name]; | |
214 | } | |
215 | else | |
216 | { | |
217 | return $return; | |
218 | } | |
219 | } | |
220 | ||
221 | /** | |
222 | * Overload __isset() to provide access via properties | |
223 | * | |
224 | * @param string $name Property name | |
225 | * @return bool | |
226 | */ | |
227 | public function __isset($name) | |
228 | { | |
229 | if (method_exists($this, 'get_' . $name) || isset($this->$name)) | |
230 | { | |
231 | return true; | |
232 | } | |
233 | else | |
234 | { | |
235 | return false; | |
236 | } | |
237 | } | |
238 | ||
239 | /** | |
240 | * Overload __unset() to provide access via properties | |
241 | * | |
242 | * @param string $name Property name | |
243 | */ | |
244 | public function __unset($name) | |
245 | { | |
246 | if (method_exists($this, 'set_' . $name)) | |
247 | { | |
248 | call_user_func(array($this, 'set_' . $name), ''); | |
249 | } | |
250 | } | |
251 | ||
252 | /** | |
253 | * Create a new IRI object, from a specified string | |
254 | * | |
255 | * @param string $iri | |
256 | */ | |
257 | public function __construct($iri = null) | |
258 | { | |
259 | $this->set_iri($iri); | |
260 | } | |
261 | ||
262 | /** | |
263 | * Create a new IRI object by resolving a relative IRI | |
264 | * | |
265 | * Returns false if $base is not absolute, otherwise an IRI. | |
266 | * | |
267 | * @param IRI|string $base (Absolute) Base IRI | |
268 | * @param IRI|string $relative Relative IRI | |
269 | * @return IRI|false | |
270 | */ | |
271 | public static function absolutize($base, $relative) | |
272 | { | |
273 | if (!($relative instanceof SimplePie_IRI)) | |
274 | { | |
275 | $relative = new SimplePie_IRI($relative); | |
276 | } | |
277 | if (!$relative->is_valid()) | |
278 | { | |
279 | return false; | |
280 | } | |
281 | elseif ($relative->scheme !== null) | |
282 | { | |
283 | return clone $relative; | |
284 | } | |
285 | else | |
286 | { | |
287 | if (!($base instanceof SimplePie_IRI)) | |
288 | { | |
289 | $base = new SimplePie_IRI($base); | |
290 | } | |
291 | if ($base->scheme !== null && $base->is_valid()) | |
292 | { | |
293 | if ($relative->get_iri() !== '') | |
294 | { | |
295 | if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) | |
296 | { | |
297 | $target = clone $relative; | |
298 | $target->scheme = $base->scheme; | |
299 | } | |
300 | else | |
301 | { | |
302 | $target = new SimplePie_IRI; | |
303 | $target->scheme = $base->scheme; | |
304 | $target->iuserinfo = $base->iuserinfo; | |
305 | $target->ihost = $base->ihost; | |
306 | $target->port = $base->port; | |
307 | if ($relative->ipath !== '') | |
308 | { | |
309 | if ($relative->ipath[0] === '/') | |
310 | { | |
311 | $target->ipath = $relative->ipath; | |
312 | } | |
313 | elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') | |
314 | { | |
315 | $target->ipath = '/' . $relative->ipath; | |
316 | } | |
317 | elseif (($last_segment = strrpos($base->ipath, '/')) !== false) | |
318 | { | |
319 | $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath; | |
320 | } | |
321 | else | |
322 | { | |
323 | $target->ipath = $relative->ipath; | |
324 | } | |
325 | $target->ipath = $target->remove_dot_segments($target->ipath); | |
326 | $target->iquery = $relative->iquery; | |
327 | } | |
328 | else | |
329 | { | |
330 | $target->ipath = $base->ipath; | |
331 | if ($relative->iquery !== null) | |
332 | { | |
333 | $target->iquery = $relative->iquery; | |
334 | } | |
335 | elseif ($base->iquery !== null) | |
336 | { | |
337 | $target->iquery = $base->iquery; | |
338 | } | |
339 | } | |
340 | $target->ifragment = $relative->ifragment; | |
341 | } | |
342 | } | |
343 | else | |
344 | { | |
345 | $target = clone $base; | |
346 | $target->ifragment = null; | |
347 | } | |
348 | $target->scheme_normalization(); | |
349 | return $target; | |
350 | } | |
351 | else | |
352 | { | |
353 | return false; | |
354 | } | |
355 | } | |
356 | } | |
357 | ||
358 | /** | |
359 | * Parse an IRI into scheme/authority/path/query/fragment segments | |
360 | * | |
361 | * @param string $iri | |
362 | * @return array | |
363 | */ | |
364 | protected function parse_iri($iri) | |
365 | { | |
366 | $iri = trim($iri, "\x20\x09\x0A\x0C\x0D"); | |
367 | if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) | |
368 | { | |
369 | if ($match[1] === '') | |
370 | { | |
371 | $match['scheme'] = null; | |
372 | } | |
373 | if (!isset($match[3]) || $match[3] === '') | |
374 | { | |
375 | $match['authority'] = null; | |
376 | } | |
377 | if (!isset($match[5])) | |
378 | { | |
379 | $match['path'] = ''; | |
380 | } | |
381 | if (!isset($match[6]) || $match[6] === '') | |
382 | { | |
383 | $match['query'] = null; | |
384 | } | |
385 | if (!isset($match[8]) || $match[8] === '') | |
386 | { | |
387 | $match['fragment'] = null; | |
388 | } | |
389 | return $match; | |
390 | } | |
391 | else | |
392 | { | |
393 | // This can occur when a paragraph is accidentally parsed as a URI | |
394 | return false; | |
395 | } | |
396 | } | |
397 | ||
398 | /** | |
399 | * Remove dot segments from a path | |
400 | * | |
401 | * @param string $input | |
402 | * @return string | |
403 | */ | |
404 | protected function remove_dot_segments($input) | |
405 | { | |
406 | $output = ''; | |
407 | while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') | |
408 | { | |
409 | // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise, | |
410 | if (strpos($input, '../') === 0) | |
411 | { | |
412 | $input = substr($input, 3); | |
413 | } | |
414 | elseif (strpos($input, './') === 0) | |
415 | { | |
416 | $input = substr($input, 2); | |
417 | } | |
418 | // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise, | |
419 | elseif (strpos($input, '/./') === 0) | |
420 | { | |
421 | $input = substr($input, 2); | |
422 | } | |
423 | elseif ($input === '/.') | |
424 | { | |
425 | $input = '/'; | |
426 | } | |
427 | // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise, | |
428 | elseif (strpos($input, '/../') === 0) | |
429 | { | |
430 | $input = substr($input, 3); | |
431 | $output = substr_replace($output, '', strrpos($output, '/')); | |
432 | } | |
433 | elseif ($input === '/..') | |
434 | { | |
435 | $input = '/'; | |
436 | $output = substr_replace($output, '', strrpos($output, '/')); | |
437 | } | |
438 | // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise, | |
439 | elseif ($input === '.' || $input === '..') | |
440 | { | |
441 | $input = ''; | |
442 | } | |
443 | // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer | |
444 | elseif (($pos = strpos($input, '/', 1)) !== false) | |
445 | { | |
446 | $output .= substr($input, 0, $pos); | |
447 | $input = substr_replace($input, '', 0, $pos); | |
448 | } | |
449 | else | |
450 | { | |
451 | $output .= $input; | |
452 | $input = ''; | |
453 | } | |
454 | } | |
455 | return $output . $input; | |
456 | } | |
457 | ||
458 | /** | |
459 | * Replace invalid character with percent encoding | |
460 | * | |
461 | * @param string $string Input string | |
462 | * @param string $extra_chars Valid characters not in iunreserved or | |
463 | * iprivate (this is ASCII-only) | |
464 | * @param bool $iprivate Allow iprivate | |
465 | * @return string | |
466 | */ | |
467 | protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) | |
468 | { | |
469 | // Normalize as many pct-encoded sections as possible | |
470 | $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string); | |
471 | ||
472 | // Replace invalid percent characters | |
473 | $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string); | |
474 | ||
475 | // Add unreserved and % to $extra_chars (the latter is safe because all | |
476 | // pct-encoded sections are now valid). | |
477 | $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%'; | |
478 | ||
479 | // Now replace any bytes that aren't allowed with their pct-encoded versions | |
480 | $position = 0; | |
481 | $strlen = strlen($string); | |
482 | while (($position += strspn($string, $extra_chars, $position)) < $strlen) | |
483 | { | |
484 | $value = ord($string[$position]); | |
485 | ||
486 | // Start position | |
487 | $start = $position; | |
488 | ||
489 | // By default we are valid | |
490 | $valid = true; | |
491 | ||
492 | // No one byte sequences are valid due to the while. | |
493 | // Two byte sequence: | |
494 | if (($value & 0xE0) === 0xC0) | |
495 | { | |
496 | $character = ($value & 0x1F) << 6; | |
497 | $length = 2; | |
498 | $remaining = 1; | |
499 | } | |
500 | // Three byte sequence: | |
501 | elseif (($value & 0xF0) === 0xE0) | |
502 | { | |
503 | $character = ($value & 0x0F) << 12; | |
504 | $length = 3; | |
505 | $remaining = 2; | |
506 | } | |
507 | // Four byte sequence: | |
508 | elseif (($value & 0xF8) === 0xF0) | |
509 | { | |
510 | $character = ($value & 0x07) << 18; | |
511 | $length = 4; | |
512 | $remaining = 3; | |
513 | } | |
514 | // Invalid byte: | |
515 | else | |
516 | { | |
517 | $valid = false; | |
518 | $length = 1; | |
519 | $remaining = 0; | |
520 | } | |
521 | ||
522 | if ($remaining) | |
523 | { | |
524 | if ($position + $length <= $strlen) | |
525 | { | |
526 | for ($position++; $remaining; $position++) | |
527 | { | |
528 | $value = ord($string[$position]); | |
529 | ||
530 | // Check that the byte is valid, then add it to the character: | |
531 | if (($value & 0xC0) === 0x80) | |
532 | { | |
533 | $character |= ($value & 0x3F) << (--$remaining * 6); | |
534 | } | |
535 | // If it is invalid, count the sequence as invalid and reprocess the current byte: | |
536 | else | |
537 | { | |
538 | $valid = false; | |
539 | $position--; | |
540 | break; | |
541 | } | |
542 | } | |
543 | } | |
544 | else | |
545 | { | |
546 | $position = $strlen - 1; | |
547 | $valid = false; | |
548 | } | |
549 | } | |
550 | ||
551 | // Percent encode anything invalid or not in ucschar | |
552 | if ( | |
553 | // Invalid sequences | |
554 | !$valid | |
555 | // Non-shortest form sequences are invalid | |
556 | || $length > 1 && $character <= 0x7F | |
557 | || $length > 2 && $character <= 0x7FF | |
558 | || $length > 3 && $character <= 0xFFFF | |
559 | // Outside of range of ucschar codepoints | |
560 | // Noncharacters | |
561 | || ($character & 0xFFFE) === 0xFFFE | |
562 | || $character >= 0xFDD0 && $character <= 0xFDEF | |
563 | || ( | |
564 | // Everything else not in ucschar | |
565 | $character > 0xD7FF && $character < 0xF900 | |
566 | || $character < 0xA0 | |
567 | || $character > 0xEFFFD | |
568 | ) | |
569 | && ( | |
570 | // Everything not in iprivate, if it applies | |
571 | !$iprivate | |
572 | || $character < 0xE000 | |
573 | || $character > 0x10FFFD | |
574 | ) | |
575 | ) | |
576 | { | |
577 | // If we were a character, pretend we weren't, but rather an error. | |
578 | if ($valid) | |
579 | $position--; | |
580 | ||
581 | for ($j = $start; $j <= $position; $j++) | |
582 | { | |
583 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1); | |
584 | $j += 2; | |
585 | $position += 2; | |
586 | $strlen += 2; | |
587 | } | |
588 | } | |
589 | } | |
590 | ||
591 | return $string; | |
592 | } | |
593 | ||
594 | /** | |
595 | * Callback function for preg_replace_callback. | |
596 | * | |
597 | * Removes sequences of percent encoded bytes that represent UTF-8 | |
598 | * encoded characters in iunreserved | |
599 | * | |
600 | * @param array $match PCRE match | |
601 | * @return string Replacement | |
602 | */ | |
603 | protected function remove_iunreserved_percent_encoded($match) | |
604 | { | |
605 | // As we just have valid percent encoded sequences we can just explode | |
606 | // and ignore the first member of the returned array (an empty string). | |
607 | $bytes = explode('%', $match[0]); | |
608 | ||
609 | // Initialize the new string (this is what will be returned) and that | |
610 | // there are no bytes remaining in the current sequence (unsurprising | |
611 | // at the first byte!). | |
612 | $string = ''; | |
613 | $remaining = 0; | |
614 | ||
615 | // Loop over each and every byte, and set $value to its value | |
616 | for ($i = 1, $len = count($bytes); $i < $len; $i++) | |
617 | { | |
618 | $value = hexdec($bytes[$i]); | |
619 | ||
620 | // If we're the first byte of sequence: | |
621 | if (!$remaining) | |
622 | { | |
623 | // Start position | |
624 | $start = $i; | |
625 | ||
626 | // By default we are valid | |
627 | $valid = true; | |
628 | ||
629 | // One byte sequence: | |
630 | if ($value <= 0x7F) | |
631 | { | |
632 | $character = $value; | |
633 | $length = 1; | |
634 | } | |
635 | // Two byte sequence: | |
636 | elseif (($value & 0xE0) === 0xC0) | |
637 | { | |
638 | $character = ($value & 0x1F) << 6; | |
639 | $length = 2; | |
640 | $remaining = 1; | |
641 | } | |
642 | // Three byte sequence: | |
643 | elseif (($value & 0xF0) === 0xE0) | |
644 | { | |
645 | $character = ($value & 0x0F) << 12; | |
646 | $length = 3; | |
647 | $remaining = 2; | |
648 | } | |
649 | // Four byte sequence: | |
650 | elseif (($value & 0xF8) === 0xF0) | |
651 | { | |
652 | $character = ($value & 0x07) << 18; | |
653 | $length = 4; | |
654 | $remaining = 3; | |
655 | } | |
656 | // Invalid byte: | |
657 | else | |
658 | { | |
659 | $valid = false; | |
660 | $remaining = 0; | |
661 | } | |
662 | } | |
663 | // Continuation byte: | |
664 | else | |
665 | { | |
666 | // Check that the byte is valid, then add it to the character: | |
667 | if (($value & 0xC0) === 0x80) | |
668 | { | |
669 | $remaining--; | |
670 | $character |= ($value & 0x3F) << ($remaining * 6); | |
671 | } | |
672 | // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence: | |
673 | else | |
674 | { | |
675 | $valid = false; | |
676 | $remaining = 0; | |
677 | $i--; | |
678 | } | |
679 | } | |
680 | ||
681 | // If we've reached the end of the current byte sequence, append it to Unicode::$data | |
682 | if (!$remaining) | |
683 | { | |
684 | // Percent encode anything invalid or not in iunreserved | |
685 | if ( | |
686 | // Invalid sequences | |
687 | !$valid | |
688 | // Non-shortest form sequences are invalid | |
689 | || $length > 1 && $character <= 0x7F | |
690 | || $length > 2 && $character <= 0x7FF | |
691 | || $length > 3 && $character <= 0xFFFF | |
692 | // Outside of range of iunreserved codepoints | |
693 | || $character < 0x2D | |
694 | || $character > 0xEFFFD | |
695 | // Noncharacters | |
696 | || ($character & 0xFFFE) === 0xFFFE | |
697 | || $character >= 0xFDD0 && $character <= 0xFDEF | |
698 | // Everything else not in iunreserved (this is all BMP) | |
699 | || $character === 0x2F | |
700 | || $character > 0x39 && $character < 0x41 | |
701 | || $character > 0x5A && $character < 0x61 | |
702 | || $character > 0x7A && $character < 0x7E | |
703 | || $character > 0x7E && $character < 0xA0 | |
704 | || $character > 0xD7FF && $character < 0xF900 | |
705 | ) | |
706 | { | |
707 | for ($j = $start; $j <= $i; $j++) | |
708 | { | |
709 | $string .= '%' . strtoupper($bytes[$j]); | |
710 | } | |
711 | } | |
712 | else | |
713 | { | |
714 | for ($j = $start; $j <= $i; $j++) | |
715 | { | |
716 | $string .= chr(hexdec($bytes[$j])); | |
717 | } | |
718 | } | |
719 | } | |
720 | } | |
721 | ||
722 | // If we have any bytes left over they are invalid (i.e., we are | |
723 | // mid-way through a multi-byte sequence) | |
724 | if ($remaining) | |
725 | { | |
726 | for ($j = $start; $j < $len; $j++) | |
727 | { | |
728 | $string .= '%' . strtoupper($bytes[$j]); | |
729 | } | |
730 | } | |
731 | ||
732 | return $string; | |
733 | } | |
734 | ||
735 | protected function scheme_normalization() | |
736 | { | |
737 | if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) | |
738 | { | |
739 | $this->iuserinfo = null; | |
740 | } | |
741 | if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) | |
742 | { | |
743 | $this->ihost = null; | |
744 | } | |
745 | if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) | |
746 | { | |
747 | $this->port = null; | |
748 | } | |
749 | if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) | |
750 | { | |
751 | $this->ipath = ''; | |
752 | } | |
753 | if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) | |
754 | { | |
755 | $this->iquery = null; | |
756 | } | |
757 | if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) | |
758 | { | |
759 | $this->ifragment = null; | |
760 | } | |
761 | } | |
762 | ||
763 | /** | |
764 | * Check if the object represents a valid IRI. This needs to be done on each | |
765 | * call as some things change depending on another part of the IRI. | |
766 | * | |
767 | * @return bool | |
768 | */ | |
769 | public function is_valid() | |
770 | { | |
771 | $isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null; | |
772 | if ($this->ipath !== '' && | |
773 | ( | |
774 | $isauthority && ( | |
775 | $this->ipath[0] !== '/' || | |
776 | substr($this->ipath, 0, 2) === '//' | |
777 | ) || | |
778 | ( | |
779 | $this->scheme === null && | |
780 | !$isauthority && | |
781 | strpos($this->ipath, ':') !== false && | |
782 | (strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/')) | |
783 | ) | |
784 | ) | |
785 | ) | |
786 | { | |
787 | return false; | |
788 | } | |
789 | ||
790 | return true; | |
791 | } | |
792 | ||
793 | /** | |
794 | * Set the entire IRI. Returns true on success, false on failure (if there | |
795 | * are any invalid characters). | |
796 | * | |
797 | * @param string $iri | |
798 | * @return bool | |
799 | */ | |
800 | public function set_iri($iri) | |
801 | { | |
802 | static $cache; | |
803 | if (!$cache) | |
804 | { | |
805 | $cache = array(); | |
806 | } | |
807 | ||
808 | if ($iri === null) | |
809 | { | |
810 | return true; | |
811 | } | |
812 | elseif (isset($cache[$iri])) | |
813 | { | |
814 | list($this->scheme, | |
815 | $this->iuserinfo, | |
816 | $this->ihost, | |
817 | $this->port, | |
818 | $this->ipath, | |
819 | $this->iquery, | |
820 | $this->ifragment, | |
821 | $return) = $cache[$iri]; | |
822 | return $return; | |
823 | } | |
824 | else | |
825 | { | |
826 | $parsed = $this->parse_iri((string) $iri); | |
827 | if (!$parsed) | |
828 | { | |
829 | return false; | |
830 | } | |
831 | ||
832 | $return = $this->set_scheme($parsed['scheme']) | |
833 | && $this->set_authority($parsed['authority']) | |
834 | && $this->set_path($parsed['path']) | |
835 | && $this->set_query($parsed['query']) | |
836 | && $this->set_fragment($parsed['fragment']); | |
837 | ||
838 | $cache[$iri] = array($this->scheme, | |
839 | $this->iuserinfo, | |
840 | $this->ihost, | |
841 | $this->port, | |
842 | $this->ipath, | |
843 | $this->iquery, | |
844 | $this->ifragment, | |
845 | $return); | |
846 | return $return; | |
847 | } | |
848 | } | |
849 | ||
850 | /** | |
851 | * Set the scheme. Returns true on success, false on failure (if there are | |
852 | * any invalid characters). | |
853 | * | |
854 | * @param string $scheme | |
855 | * @return bool | |
856 | */ | |
857 | public function set_scheme($scheme) | |
858 | { | |
859 | if ($scheme === null) | |
860 | { | |
861 | $this->scheme = null; | |
862 | } | |
863 | elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) | |
864 | { | |
865 | $this->scheme = null; | |
866 | return false; | |
867 | } | |
868 | else | |
869 | { | |
870 | $this->scheme = strtolower($scheme); | |
871 | } | |
872 | return true; | |
873 | } | |
874 | ||
875 | /** | |
876 | * Set the authority. Returns true on success, false on failure (if there are | |
877 | * any invalid characters). | |
878 | * | |
879 | * @param string $authority | |
880 | * @return bool | |
881 | */ | |
882 | public function set_authority($authority) | |
883 | { | |
884 | static $cache; | |
885 | if (!$cache) | |
886 | $cache = array(); | |
887 | ||
888 | if ($authority === null) | |
889 | { | |
890 | $this->iuserinfo = null; | |
891 | $this->ihost = null; | |
892 | $this->port = null; | |
893 | return true; | |
894 | } | |
895 | elseif (isset($cache[$authority])) | |
896 | { | |
897 | list($this->iuserinfo, | |
898 | $this->ihost, | |
899 | $this->port, | |
900 | $return) = $cache[$authority]; | |
901 | ||
902 | return $return; | |
903 | } | |
904 | else | |
905 | { | |
906 | $remaining = $authority; | |
907 | if (($iuserinfo_end = strrpos($remaining, '@')) !== false) | |
908 | { | |
909 | $iuserinfo = substr($remaining, 0, $iuserinfo_end); | |
910 | $remaining = substr($remaining, $iuserinfo_end + 1); | |
911 | } | |
912 | else | |
913 | { | |
914 | $iuserinfo = null; | |
915 | } | |
916 | if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) | |
917 | { | |
918 | if (($port = substr($remaining, $port_start + 1)) === false) | |
919 | { | |
920 | $port = null; | |
921 | } | |
922 | $remaining = substr($remaining, 0, $port_start); | |
923 | } | |
924 | else | |
925 | { | |
926 | $port = null; | |
927 | } | |
928 | ||
929 | $return = $this->set_userinfo($iuserinfo) && | |
930 | $this->set_host($remaining) && | |
931 | $this->set_port($port); | |
932 | ||
933 | $cache[$authority] = array($this->iuserinfo, | |
934 | $this->ihost, | |
935 | $this->port, | |
936 | $return); | |
937 | ||
938 | return $return; | |
939 | } | |
940 | } | |
941 | ||
942 | /** | |
943 | * Set the iuserinfo. | |
944 | * | |
945 | * @param string $iuserinfo | |
946 | * @return bool | |
947 | */ | |
948 | public function set_userinfo($iuserinfo) | |
949 | { | |
950 | if ($iuserinfo === null) | |
951 | { | |
952 | $this->iuserinfo = null; | |
953 | } | |
954 | else | |
955 | { | |
956 | $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:'); | |
957 | $this->scheme_normalization(); | |
958 | } | |
959 | ||
960 | return true; | |
961 | } | |
962 | ||
963 | /** | |
964 | * Set the ihost. Returns true on success, false on failure (if there are | |
965 | * any invalid characters). | |
966 | * | |
967 | * @param string $ihost | |
968 | * @return bool | |
969 | */ | |
970 | public function set_host($ihost) | |
971 | { | |
972 | if ($ihost === null) | |
973 | { | |
974 | $this->ihost = null; | |
975 | return true; | |
976 | } | |
977 | elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') | |
978 | { | |
979 | if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1))) | |
980 | { | |
981 | $this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']'; | |
982 | } | |
983 | else | |
984 | { | |
985 | $this->ihost = null; | |
986 | return false; | |
987 | } | |
988 | } | |
989 | else | |
990 | { | |
991 | $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;='); | |
992 | ||
993 | // Lowercase, but ignore pct-encoded sections (as they should | |
994 | // remain uppercase). This must be done after the previous step | |
995 | // as that can add unescaped characters. | |
996 | $position = 0; | |
997 | $strlen = strlen($ihost); | |
998 | while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) | |
999 | { | |
1000 | if ($ihost[$position] === '%') | |
1001 | { | |
1002 | $position += 3; | |
1003 | } | |
1004 | else | |
1005 | { | |
1006 | $ihost[$position] = strtolower($ihost[$position]); | |
1007 | $position++; | |
1008 | } | |
1009 | } | |
1010 | ||
1011 | $this->ihost = $ihost; | |
1012 | } | |
1013 | ||
1014 | $this->scheme_normalization(); | |
1015 | ||
1016 | return true; | |
1017 | } | |
1018 | ||
1019 | /** | |
1020 | * Set the port. Returns true on success, false on failure (if there are | |
1021 | * any invalid characters). | |
1022 | * | |
1023 | * @param string $port | |
1024 | * @return bool | |
1025 | */ | |
1026 | public function set_port($port) | |
1027 | { | |
1028 | if ($port === null) | |
1029 | { | |
1030 | $this->port = null; | |
1031 | return true; | |
1032 | } | |
1033 | elseif (strspn($port, '0123456789') === strlen($port)) | |
1034 | { | |
1035 | $this->port = (int) $port; | |
1036 | $this->scheme_normalization(); | |
1037 | return true; | |
1038 | } | |
1039 | else | |
1040 | { | |
1041 | $this->port = null; | |
1042 | return false; | |
1043 | } | |
1044 | } | |
1045 | ||
1046 | /** | |
1047 | * Set the ipath. | |
1048 | * | |
1049 | * @param string $ipath | |
1050 | * @return bool | |
1051 | */ | |
1052 | public function set_path($ipath) | |
1053 | { | |
1054 | static $cache; | |
1055 | if (!$cache) | |
1056 | { | |
1057 | $cache = array(); | |
1058 | } | |
1059 | ||
1060 | $ipath = (string) $ipath; | |
1061 | ||
1062 | if (isset($cache[$ipath])) | |
1063 | { | |
1064 | $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)]; | |
1065 | } | |
1066 | else | |
1067 | { | |
1068 | $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/'); | |
1069 | $removed = $this->remove_dot_segments($valid); | |
1070 | ||
1071 | $cache[$ipath] = array($valid, $removed); | |
1072 | $this->ipath = ($this->scheme !== null) ? $removed : $valid; | |
1073 | } | |
1074 | ||
1075 | $this->scheme_normalization(); | |
1076 | return true; | |
1077 | } | |
1078 | ||
1079 | /** | |
1080 | * Set the iquery. | |
1081 | * | |
1082 | * @param string $iquery | |
1083 | * @return bool | |
1084 | */ | |
1085 | public function set_query($iquery) | |
1086 | { | |
1087 | if ($iquery === null) | |
1088 | { | |
1089 | $this->iquery = null; | |
1090 | } | |
1091 | else | |
1092 | { | |
1093 | $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true); | |
1094 | $this->scheme_normalization(); | |
1095 | } | |
1096 | return true; | |
1097 | } | |
1098 | ||
1099 | /** | |
1100 | * Set the ifragment. | |
1101 | * | |
1102 | * @param string $ifragment | |
1103 | * @return bool | |
1104 | */ | |
1105 | public function set_fragment($ifragment) | |
1106 | { | |
1107 | if ($ifragment === null) | |
1108 | { | |
1109 | $this->ifragment = null; | |
1110 | } | |
1111 | else | |
1112 | { | |
1113 | $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?'); | |
1114 | $this->scheme_normalization(); | |
1115 | } | |
1116 | return true; | |
1117 | } | |
1118 | ||
1119 | /** | |
1120 | * Convert an IRI to a URI (or parts thereof) | |
1121 | * | |
1122 | * @return string | |
1123 | */ | |
1124 | public function to_uri($string) | |
1125 | { | |
1126 | static $non_ascii; | |
1127 | if (!$non_ascii) | |
1128 | { | |
1129 | $non_ascii = implode('', range("\x80", "\xFF")); | |
1130 | } | |
1131 | ||
1132 | $position = 0; | |
1133 | $strlen = strlen($string); | |
1134 | while (($position += strcspn($string, $non_ascii, $position)) < $strlen) | |
1135 | { | |
1136 | $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1); | |
1137 | $position += 3; | |
1138 | $strlen += 2; | |
1139 | } | |
1140 | ||
1141 | return $string; | |
1142 | } | |
1143 | ||
1144 | /** | |
1145 | * Get the complete IRI | |
1146 | * | |
1147 | * @return string | |
1148 | */ | |
1149 | public function get_iri() | |
1150 | { | |
1151 | if (!$this->is_valid()) | |
1152 | { | |
1153 | return false; | |
1154 | } | |
1155 | ||
1156 | $iri = ''; | |
1157 | if ($this->scheme !== null) | |
1158 | { | |
1159 | $iri .= $this->scheme . ':'; | |
1160 | } | |
1161 | if (($iauthority = $this->get_iauthority()) !== null) | |
1162 | { | |
1163 | $iri .= '//' . $iauthority; | |
1164 | } | |
1165 | if ($this->ipath !== '') | |
1166 | { | |
1167 | $iri .= $this->ipath; | |
1168 | } | |
1169 | elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') | |
1170 | { | |
1171 | $iri .= $this->normalization[$this->scheme]['ipath']; | |
1172 | } | |
1173 | if ($this->iquery !== null) | |
1174 | { | |
1175 | $iri .= '?' . $this->iquery; | |
1176 | } | |
1177 | if ($this->ifragment !== null) | |
1178 | { | |
1179 | $iri .= '#' . $this->ifragment; | |
1180 | } | |
1181 | ||
1182 | return $iri; | |
1183 | } | |
1184 | ||
1185 | /** | |
1186 | * Get the complete URI | |
1187 | * | |
1188 | * @return string | |
1189 | */ | |
1190 | public function get_uri() | |
1191 | { | |
1192 | return $this->to_uri($this->get_iri()); | |
1193 | } | |
1194 | ||
1195 | /** | |
1196 | * Get the complete iauthority | |
1197 | * | |
1198 | * @return string | |
1199 | */ | |
1200 | protected function get_iauthority() | |
1201 | { | |
1202 | if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) | |
1203 | { | |
1204 | $iauthority = ''; | |
1205 | if ($this->iuserinfo !== null) | |
1206 | { | |
1207 | $iauthority .= $this->iuserinfo . '@'; | |
1208 | } | |
1209 | if ($this->ihost !== null) | |
1210 | { | |
1211 | $iauthority .= $this->ihost; | |
1212 | } | |
1213 | if ($this->port !== null) | |
1214 | { | |
1215 | $iauthority .= ':' . $this->port; | |
1216 | } | |
1217 | return $iauthority; | |
1218 | } | |
1219 | else | |
1220 | { | |
1221 | return null; | |
1222 | } | |
1223 | } | |
1224 | ||
1225 | /** | |
1226 | * Get the complete authority | |
1227 | * | |
1228 | * @return string | |
1229 | */ | |
1230 | protected function get_authority() | |
1231 | { | |
1232 | $iauthority = $this->get_iauthority(); | |
1233 | if (is_string($iauthority)) | |
1234 | return $this->to_uri($iauthority); | |
1235 | else | |
1236 | return $iauthority; | |
1237 | } | |
1238 | } |