X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FUrl.php;h=6b9870f0c1bb468ebafd67e2d71123411f598a0c;hb=d37348efe280f0b72807ea6f62fca63e2ad28991;hp=d80c9c582f32e5e8b8053b291d354a112e21b6c0;hpb=938d9cce77ed5098dd69643795cb4014f3688b35;p=github%2Fshaarli%2FShaarli.git diff --git a/application/Url.php b/application/Url.php index d80c9c58..6b9870f0 100644 --- a/application/Url.php +++ b/application/Url.php @@ -63,6 +63,30 @@ function add_trailing_slash($url) return $url . (!endsWith($url, '/') ? '/' : ''); } +/** + * Replace not whitelisted protocols by 'http://' from given URL. + * + * @param string $url URL to clean + * @param array $protocols List of allowed protocols (aside from http(s)). + * + * @return string URL with allowed protocol + */ +function whitelist_protocols($url, $protocols) +{ + if (startsWith($url, '?') || startsWith($url, '/')) { + return $url; + } + $protocols = array_merge(['http', 'https'], $protocols); + $protocol = preg_match('#^(\w+):/?/?#', $url, $match); + // Protocol not allowed: we remove it and replace it with http + if ($protocol === 1 && ! in_array($match[1], $protocols)) { + $url = str_replace($match[0], 'http://', $url); + } elseif ($protocol !== 1) { + $url = 'http://' . $url; + } + return $url; +} + /** * URL representation and cleanup utilities * @@ -85,6 +109,7 @@ class Url 'action_type_map=', 'fb_', 'fb=', + 'PHPSESSID=', // Scoop.it '__scoop', @@ -93,7 +118,10 @@ class Url 'utm_', // ATInternet - 'xtor=' + 'xtor=', + + // Other + 'campaign_' ); private static $annoyingFragments = array( @@ -118,6 +146,7 @@ class Url */ public function __construct($url) { + $url = self::cleanupUnparsedUrl(trim($url)); $this->parts = parse_url($url); if (!empty($url) && empty($this->parts['scheme'])) { @@ -125,6 +154,35 @@ class Url } } + /** + * Clean up URL before it's parsed. + * ie. handle urlencode, url prefixes, etc. + * + * @param string $url URL to clean. + * + * @return string cleaned URL. + */ + protected static function cleanupUnparsedUrl($url) + { + return self::removeFirefoxAboutReader($url); + } + + /** + * Remove Firefox Reader prefix if it's present. + * + * @param string $input url + * + * @return string cleaned url + */ + protected static function removeFirefoxAboutReader($input) + { + $firefoxPrefix = 'about://reader?url='; + if (startsWith($input, $firefoxPrefix)) { + return urldecode(ltrim($input, $firefoxPrefix)); + } + return $input; + } + /** * Returns a string representation of this URL */ @@ -190,6 +248,22 @@ class Url return $this->toString(); } + /** + * Converts an URL with an International Domain Name host to a ASCII one. + * This requires PHP-intl. If it's not available, just returns this->cleanup(). + * + * @return string converted cleaned up URL. + */ + public function idnToAscii() + { + $out = $this->cleanup(); + if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { + return $out; + } + $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); + return str_replace($this->parts['host'], $asciiHost, $out); + } + /** * Get URL scheme. * @@ -201,4 +275,25 @@ class Url } return $this->parts['scheme']; } + + /** + * Get URL host. + * + * @return string the URL host or false if none is provided. + */ + public function getHost() { + if (empty($this->parts['host'])) { + return false; + } + return $this->parts['host']; + } + + /** + * Test if the Url is an HTTP one. + * + * @return true is HTTP, false otherwise. + */ + public function isHttp() { + return strpos(strtolower($this->parts['scheme']), 'http') !== false; + } }