]>
git.immae.eu Git - github/wallabag/wallabag.git/blob - inc/3rdparty/simplepie/SimplePie/IRI.php
5 * A PHP-Based RSS and Atom Feed Framework.
6 * Takes the hard work out of managing a complete RSS/Atom solution.
8 * Copyright (c) 2004-2009, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
11 * Redistribution and use in source and binary forms, with or without modification, are
12 * permitted provided that the following conditions are met:
14 * * Redistributions of source code must retain the above copyright notice, this list of
15 * conditions and the following disclaimer.
17 * * Redistributions in binary form must reproduce the above copyright notice, this list
18 * of conditions and the following disclaimer in the documentation and/or other materials
19 * provided with the distribution.
21 * * Neither the name of the SimplePie Team nor the names of its contributors may be used
22 * to endorse or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
26 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
27 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
28 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
30 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
31 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
37 * @copyright 2004-2010 Ryan Parman, Geoffrey Sneddon, Ryan McCue
39 * @author Geoffrey Sneddon
41 * @link http://simplepie.org/ SimplePie
42 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
43 * @todo phpDoc comments
47 * IRI parser/serialiser
110 * Whether the object represents a valid IRI
115 var $valid = array ();
118 * Return the entire IRI when you try and read the object as a string
123 public function __toString ()
125 return $this- > get_iri ();
129 * Create a new IRI object, from a specified string
133 * @return SimplePie_IRI
135 public function __construct ( $iri )
137 $iri = ( string ) $iri ;
140 $parsed = $this- > parse_iri ( $iri );
141 $this- > set_scheme ( $parsed [ 'scheme' ]);
142 $this- > set_authority ( $parsed [ 'authority' ]);
143 $this- > set_path ( $parsed [ 'path' ]);
144 $this- > set_query ( $parsed [ 'query' ]);
145 $this- > set_fragment ( $parsed [ 'fragment' ]);
150 * Create a new IRI object by resolving a relative IRI
154 * @param SimplePie_IRI $base Base IRI
155 * @param string $relative Relative IRI
156 * @return SimplePie_IRI
158 public static function absolutize ( $base , $relative )
160 $relative = ( string ) $relative ;
161 if ( $relative !== '' )
163 $relative = new SimplePie_IRI ( $relative );
164 if ( $relative- > get_scheme () !== null )
168 elseif ( $base- > get_iri () !== null )
170 if ( $relative- > get_authority () !== null )
173 $target- > set_scheme ( $base- > get_scheme ());
177 $target = new SimplePie_IRI ( '' );
178 $target- > set_scheme ( $base- > get_scheme ());
179 $target- > set_userinfo ( $base- > get_userinfo ());
180 $target- > set_host ( $base- > get_host ());
181 $target- > set_port ( $base- > get_port ());
182 if ( $relative- > get_path () !== null )
184 if ( strpos ( $relative- > get_path (), '/' ) === 0 )
186 $target- > set_path ( $relative- > get_path ());
188 elseif (( $base- > get_userinfo () !== null || $base- > get_host () !== null || $base- > get_port () !== null ) && $base- > get_path () === null )
190 $target- > set_path ( '/' . $relative- > get_path ());
192 elseif (( $last_segment = strrpos ( $base- > get_path (), '/' )) !== false )
194 $target- > set_path ( substr ( $base- > get_path (), 0 , $last_segment +
1 ) . $relative- > get_path ());
198 $target- > set_path ( $relative- > get_path ());
200 $target- > set_query ( $relative- > get_query ());
204 $target- > set_path ( $base- > get_path ());
205 if ( $relative- > get_query () !== null )
207 $target- > set_query ( $relative- > get_query ());
209 elseif ( $base- > get_query () !== null )
211 $target- > set_query ( $base- > get_query ());
215 $target- > set_fragment ( $relative- > get_fragment ());
219 // No base URL, just return the relative URL
231 * Parse an IRI into scheme/authority/path/query/fragment segments
237 public function parse_iri ( $iri )
239 preg_match ( '/^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)( \? ([^#]*))?(#(.*))?$/' , $iri , $match );
240 for ( $i = count ( $match ); $i <= 9 ; $i ++
)
244 return array ( 'scheme' => $match [ 2 ], 'authority' => $match [ 4 ], 'path' => $match [ 5 ], 'query' => $match [ 7 ], 'fragment' => $match [ 9 ]);
248 * Remove dot segments from a path
251 * @param string $input
254 public function remove_dot_segments ( $input )
257 while ( strpos ( $input , './' ) !== false || strpos ( $input , '/.' ) !== false || $input === '.' || $input === '..' )
259 // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
260 if ( strpos ( $input , '../' ) === 0 )
262 $input = substr ( $input , 3 );
264 elseif ( strpos ( $input , './' ) === 0 )
266 $input = substr ( $input , 2 );
268 // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
269 elseif ( strpos ( $input , '/./' ) === 0 )
271 $input = substr_replace ( $input , '/' , 0 , 3 );
273 elseif ( $input === '/.' )
277 // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
278 elseif ( strpos ( $input , '/../' ) === 0 )
280 $input = substr_replace ( $input , '/' , 0 , 4 );
281 $output = substr_replace ( $output , '' , strrpos ( $output , '/' ));
283 elseif ( $input === '/..' )
286 $output = substr_replace ( $output , '' , strrpos ( $output , '/' ));
288 // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
289 elseif ( $input === '.' || $input === '..' )
293 // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
294 elseif (( $pos = strpos ( $input , '/' , 1 )) !== false )
296 $output .= substr ( $input , 0 , $pos );
297 $input = substr_replace ( $input , '' , 0 , $pos );
305 return $output . $input ;
309 * Replace invalid character with percent encoding
311 * @param string $string Input string
312 * @param string $valid_chars Valid characters not in iunreserved or iprivate (this is ASCII-only)
313 * @param int $case Normalise case
314 * @param bool $iprivate Allow iprivate
317 protected function replace_invalid_with_pct_encoding ( $string , $valid_chars , $case = SIMPLEPIE_SAME_CASE
, $iprivate = false )
319 // Normalize as many pct-encoded sections as possible
320 $string = preg_replace_callback ( '/(?:%[A-Fa-f0-9] {2} )+/' , array (& $this , 'remove_iunreserved_percent_encoded' ), $string );
322 // Replace invalid percent characters
323 $string = preg_replace ( '/%(?![A-Fa-f0-9] {2} )/' , '%25' , $string );
325 // Add unreserved and % to $valid_chars (the latter is safe because all
326 // pct-encoded sections are now valid).
327 $valid_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%' ;
329 // Now replace any bytes that aren't allowed with their pct-encoded versions
331 $strlen = strlen ( $string );
332 while (( $position +
= strspn ( $string , $valid_chars , $position )) < $strlen )
334 $value = ord ( $string [ $position ]);
339 // By default we are valid
342 // No one byte sequences are valid due to the while.
343 // Two byte sequence:
344 if (( $value & 0xE0 ) === 0xC0 )
346 $character = ( $value & 0x1F ) << 6 ;
350 // Three byte sequence:
351 elseif (( $value & 0xF0 ) === 0xE0 )
353 $character = ( $value & 0x0F ) << 12 ;
357 // Four byte sequence:
358 elseif (( $value & 0xF8 ) === 0xF0 )
360 $character = ( $value & 0x07 ) << 18 ;
374 if ( $position +
$length <= $strlen )
376 for ( $position ++
; $remaining ; $position ++
)
378 $value = ord ( $string [ $position ]);
380 // Check that the byte is valid, then add it to the character:
381 if (( $value & 0xC0 ) === 0x80 )
383 $character |= ( $value & 0x3F ) << (-- $remaining * 6 );
385 // If it is invalid, count the sequence as invalid and reprocess the current byte:
396 $position = $strlen - 1 ;
401 // Percent encode anything invalid or not in ucschar
405 // Non-shortest form sequences are invalid
406 || $length > 1 && $character <= 0x7F
407 || $length > 2 && $character <= 0x7FF
408 || $length > 3 && $character <= 0xFFFF
409 // Outside of range of ucschar codepoints
411 || ( $character & 0xFFFE ) === 0xFFFE
412 || $character >= 0xFDD0 && $character <= 0xFDEF
414 // Everything else not in ucschar
415 $character > 0xD7FF && $character < 0xF900
417 || $character > 0xEFFFD
420 // Everything not in iprivate, if it applies
422 || $character < 0xE000
423 || $character > 0x10FFFD
427 // If we were a character, pretend we weren't, but rather an error.
431 for ( $j = $start ; $j <= $position ; $j ++
)
433 $string = substr_replace ( $string , sprintf ( '%%%02X' , ord ( $string [ $j ])), $j , 1 );
442 if ( $case & SIMPLEPIE_LOWERCASE
)
444 $string = strtolower ( $string );
446 elseif ( $case & SIMPLEPIE_UPPERCASE
)
448 $string = strtoupper ( $string );
455 * Callback function for preg_replace_callback.
457 * Removes sequences of percent encoded bytes that represent UTF-8
458 * encoded characters in iunreserved
460 * @param array $match PCRE match
461 * @return string Replacement
463 protected function remove_iunreserved_percent_encoded ( $match )
465 // As we just have valid percent encoded sequences we can just explode
466 // and ignore the first member of the returned array (an empty string).
467 $bytes = explode ( '%' , $match [ 0 ]);
469 // Initialize the new string (this is what will be returned) and that
470 // there are no bytes remaining in the current sequence (unsurprising
471 // at the first byte!).
475 // Loop over each and every byte, and set $value to its value
476 for ( $i = 1 , $len = count ( $bytes ); $i < $len ; $i ++
)
478 $value = hexdec ( $bytes [ $i ]);
480 // If we're the first byte of sequence:
486 // By default we are valid
489 // One byte sequence:
495 // Two byte sequence:
496 elseif (( $value & 0xE0 ) === 0xC0 )
498 $character = ( $value & 0x1F ) << 6 ;
502 // Three byte sequence:
503 elseif (( $value & 0xF0 ) === 0xE0 )
505 $character = ( $value & 0x0F ) << 12 ;
509 // Four byte sequence:
510 elseif (( $value & 0xF8 ) === 0xF0 )
512 $character = ( $value & 0x07 ) << 18 ;
523 // Continuation byte:
526 // Check that the byte is valid, then add it to the character:
527 if (( $value & 0xC0 ) === 0x80 )
530 $character |= ( $value & 0x3F ) << ( $remaining * 6 );
532 // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
541 // If we've reached the end of the current byte sequence, append it to Unicode::$data
544 // Percent encode anything invalid or not in iunreserved
548 // Non-shortest form sequences are invalid
549 || $length > 1 && $character <= 0x7F
550 || $length > 2 && $character <= 0x7FF
551 || $length > 3 && $character <= 0xFFFF
552 // Outside of range of iunreserved codepoints
554 || $character > 0xEFFFD
556 || ( $character & 0xFFFE ) === 0xFFFE
557 || $character >= 0xFDD0 && $character <= 0xFDEF
558 // Everything else not in iunreserved (this is all BMP)
559 || $character === 0x2F
560 || $character > 0x39 && $character < 0x41
561 || $character > 0x5A && $character < 0x61
562 || $character > 0x7A && $character < 0x7E
563 || $character > 0x7E && $character < 0xA0
564 || $character > 0xD7FF && $character < 0xF900
567 for ( $j = $start ; $j <= $i ; $j ++
)
569 $string .= '%' . strtoupper ( $bytes [ $j ]);
574 for ( $j = $start ; $j <= $i ; $j ++
)
576 $string .= chr ( hexdec ( $bytes [ $j ]));
582 // If we have any bytes left over they are invalid (i.e., we are
583 // mid-way through a multi-byte sequence)
586 for ( $j = $start ; $j < $len ; $j ++
)
588 $string .= '%' . strtoupper ( $bytes [ $j ]);
596 * Check if the object represents a valid IRI
601 public function is_valid ()
603 return array_sum ( $this- > valid
) === count ( $this- > valid
);
607 * Set the scheme. Returns true on success, false on failure (if there are
608 * any invalid characters).
611 * @param string $scheme
614 public function set_scheme ( $scheme )
616 if ( $scheme === null || $scheme === '' )
618 $this- > scheme
= null ;
622 $len = strlen ( $scheme );
626 if (! strspn ( $scheme , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+-.' , 1 ))
628 $this- > scheme
= null ;
629 $this- > valid
[ __FUNCTION__
] = false ;
634 if (! strspn ( $scheme , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' , 0 , 1 ))
636 $this- > scheme
= null ;
637 $this- > valid
[ __FUNCTION__
] = false ;
641 $this- > scheme
= strtolower ( $scheme );
643 $this- > valid
[ __FUNCTION__
] = true ;
648 * Set the authority. Returns true on success, false on failure (if there are
649 * any invalid characters).
652 * @param string $authority
655 public function set_authority ( $authority )
657 if (( $userinfo_end = strrpos ( $authority , '@' )) !== false )
659 $userinfo = substr ( $authority , 0 , $userinfo_end );
660 $authority = substr ( $authority , $userinfo_end +
1 );
667 if (( $port_start = strpos ( $authority , ':' )) !== false )
669 $port = substr ( $authority , $port_start +
1 );
674 $authority = substr ( $authority , 0 , $port_start );
681 return $this- > set_userinfo ( $userinfo ) && $this- > set_host ( $authority ) && $this- > set_port ( $port );
688 * @param string $userinfo
691 public function set_userinfo ( $userinfo )
693 if ( $userinfo === null || $userinfo === '' )
695 $this- > userinfo
= null ;
699 $this- > userinfo
= $this- > replace_invalid_with_pct_encoding ( $userinfo , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$& \' ()*+,;=:' );
701 $this- > valid
[ __FUNCTION__
] = true ;
706 * Set the host. Returns true on success, false on failure (if there are
707 * any invalid characters).
710 * @param string $host
713 public function set_host ( $host )
715 if ( $host === null || $host === '' )
718 $this- > valid
[ __FUNCTION__
] = true ;
721 elseif ( $host [ 0 ] === '[' && substr ( $host , - 1 ) === ']' )
723 if ( SimplePie_Net_IPv6
:: checkIPv6 ( substr ( $host , 1 , - 1 )))
726 $this- > valid
[ __FUNCTION__
] = true ;
732 $this- > valid
[ __FUNCTION__
] = false ;
738 $this- > host
= $this- > replace_invalid_with_pct_encoding ( $host , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$& \' ()*+,;=' , SIMPLEPIE_LOWERCASE
);
739 $this- > valid
[ __FUNCTION__
] = true ;
745 * Set the port. Returns true on success, false on failure (if there are
746 * any invalid characters).
749 * @param string $port
752 public function set_port ( $port )
754 if ( $port === null || $port === '' )
757 $this- > valid
[ __FUNCTION__
] = true ;
760 elseif ( strspn ( $port , '0123456789' ) === strlen ( $port ))
762 $this- > port
= ( int ) $port ;
763 $this- > valid
[ __FUNCTION__
] = true ;
769 $this- > valid
[ __FUNCTION__
] = false ;
778 * @param string $path
781 public function set_path ( $path )
783 if ( $path === null || $path === '' )
786 $this- > valid
[ __FUNCTION__
] = true ;
789 elseif ( substr ( $path , 0 , 2 ) === '//' && $this- > userinfo
=== null && $this- > host
=== null && $this- > port
=== null )
792 $this- > valid
[ __FUNCTION__
] = false ;
797 $this- > path
= $this- > replace_invalid_with_pct_encoding ( $path , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$& \' ()*+,;=@/' );
798 if ( $this- > scheme
!== null )
800 $this- > path
= $this- > remove_dot_segments ( $this- > path
);
802 $this- > valid
[ __FUNCTION__
] = true ;
811 * @param string $query
814 public function set_query ( $query )
816 if ( $query === null || $query === '' )
822 $this- > query
= $this- > replace_invalid_with_pct_encoding ( $query , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$ \' ()*+,;:@/?&=' );
824 $this- > valid
[ __FUNCTION__
] = true ;
832 * @param string $fragment
835 public function set_fragment ( $fragment )
837 if ( $fragment === null || $fragment === '' )
839 $this- > fragment
= null ;
843 $this- > fragment
= $this- > replace_invalid_with_pct_encoding ( $fragment , 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~!$& \' ()*+,;=:@/?' );
845 $this- > valid
[ __FUNCTION__
] = true ;
850 * Get the complete IRI
855 public function get_iri ()
858 if ( $this- > scheme
!== null )
860 $iri .= $this- > scheme
. ':' ;
862 if (( $authority = $this- > get_authority ()) !== null )
864 $iri .= '//' . $authority ;
866 if ( $this- > path
!== null )
870 if ( $this- > query
!== null )
872 $iri .= '?' . $this- > query
;
874 if ( $this- > fragment
!== null )
876 $iri .= '#' . $this- > fragment
;
895 public function get_scheme ()
897 return $this- > scheme
;
901 * Get the complete authority
906 public function get_authority ()
909 if ( $this- > userinfo
!== null )
911 $authority .= $this- > userinfo
. '@' ;
913 if ( $this- > host
!== null )
915 $authority .= $this- > host
;
917 if ( $this- > port
!== null )
919 $authority .= ':' . $this- > port
;
922 if ( $authority !== '' )
933 * Get the user information
938 public function get_userinfo ()
940 return $this- > userinfo
;
949 public function get_host ()
960 public function get_port ()
971 public function get_path ()
982 public function get_query ()
993 public function get_fragment ()
995 return $this- > fragment
;