X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FUrl.php;h=3b7f19c207770679d2fff77a709210529ffa2afa;hb=1004742f09b55ff781c13745781b9a7e90986faa;hp=a4ac2e73cad2537ab28cd6bb604356bd311d14d7;hpb=92ba7b573f2833bd35c7eb2fc7fdbeb1a0ac7b44;p=github%2Fshaarli%2FShaarli.git diff --git a/application/Url.php b/application/Url.php old mode 100755 new mode 100644 index a4ac2e73..3b7f19c2 --- a/application/Url.php +++ b/application/Url.php @@ -34,8 +34,8 @@ function unparse_url($parsedUrl) */ function cleanup_url($url) { - $obj_url = new Url($url); - return $obj_url->cleanup(); + $obj_url = new Url($url); + return $obj_url->cleanup(); } /** @@ -47,8 +47,8 @@ function cleanup_url($url) */ function get_url_scheme($url) { - $obj_url = new Url($url); - return $obj_url->getScheme(); + $obj_url = new Url($url); + return $obj_url->getScheme(); } /** @@ -63,6 +63,30 @@ function add_trailing_slash($url) return $url . (!endsWith($url, '/') ? '/' : ''); } +/** + * Replace not whitelisted protocols by 'http://' from given URL. + * + * @param string $url URL to clean + * @param array $protocols List of allowed protocols (aside from http(s)). + * + * @return string URL with allowed protocol + */ +function whitelist_protocols($url, $protocols) +{ + if (startsWith($url, '?') || startsWith($url, '/')) { + return $url; + } + $protocols = array_merge(['http', 'https'], $protocols); + $protocol = preg_match('#^(\w+):/?/?#', $url, $match); + // Protocol not allowed: we remove it and replace it with http + if ($protocol === 1 && ! in_array($match[1], $protocols)) { + $url = str_replace($match[0], 'http://', $url); + } elseif ($protocol !== 1) { + $url = 'http://' . $url; + } + return $url; +} + /** * URL representation and cleanup utilities * @@ -85,6 +109,7 @@ class Url 'action_type_map=', 'fb_', 'fb=', + 'PHPSESSID=', // Scoop.it '__scoop', @@ -93,7 +118,10 @@ class Url 'utm_', // ATInternet - 'xtor=' + 'xtor=', + + // Other + 'campaign_' ); private static $annoyingFragments = array( @@ -118,13 +146,43 @@ class Url */ public function __construct($url) { - $this->parts = parse_url(trim($url)); + $url = self::cleanupUnparsedUrl(trim($url)); + $this->parts = parse_url($url); if (!empty($url) && empty($this->parts['scheme'])) { $this->parts['scheme'] = 'http'; } } + /** + * Clean up URL before it's parsed. + * ie. handle urlencode, url prefixes, etc. + * + * @param string $url URL to clean. + * + * @return string cleaned URL. + */ + protected static function cleanupUnparsedUrl($url) + { + return self::removeFirefoxAboutReader($url); + } + + /** + * Remove Firefox Reader prefix if it's present. + * + * @param string $input url + * + * @return string cleaned url + */ + protected static function removeFirefoxAboutReader($input) + { + $firefoxPrefix = 'about://reader?url='; + if (startsWith($input, $firefoxPrefix)) { + return urldecode(ltrim($input, $firefoxPrefix)); + } + return $input; + } + /** * Returns a string representation of this URL */ @@ -159,7 +217,7 @@ class Url } $this->parts['query'] = implode('&', $queryParams); - } + } /** * Removes undesired fragments @@ -190,24 +248,55 @@ class Url return $this->toString(); } + /** + * Converts an URL with an International Domain Name host to a ASCII one. + * This requires PHP-intl. If it's not available, just returns this->cleanup(). + * + * @return string converted cleaned up URL. + */ + public function idnToAscii() + { + $out = $this->cleanup(); + if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { + return $out; + } + $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); + return str_replace($this->parts['host'], $asciiHost, $out); + } + /** * Get URL scheme. * * @return string the URL scheme or false if none is provided. */ - public function getScheme() { + public function getScheme() + { if (!isset($this->parts['scheme'])) { return false; } return $this->parts['scheme']; } + /** + * Get URL host. + * + * @return string the URL host or false if none is provided. + */ + public function getHost() + { + if (empty($this->parts['host'])) { + return false; + } + return $this->parts['host']; + } + /** * Test if the Url is an HTTP one. * * @return true is HTTP, false otherwise. */ - public function isHttp() { + public function isHttp() + { return strpos(strtolower($this->parts['scheme']), 'http') !== false; } }