X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FUrl.php;h=3b7f19c207770679d2fff77a709210529ffa2afa;hb=f211e417bf637b8a83988175c29ee072c69f7642;hp=02a4395d01c8fd75d46921439ba44ef42249da8f;hpb=9e1724f1922bf9e38299eecedfce2dcdbd416749;p=github%2Fshaarli%2FShaarli.git diff --git a/application/Url.php b/application/Url.php old mode 100755 new mode 100644 index 02a4395d..3b7f19c2 --- a/application/Url.php +++ b/application/Url.php @@ -25,6 +25,68 @@ function unparse_url($parsedUrl) return "$scheme$user$pass$host$port$path$query$fragment"; } +/** + * Removes undesired query parameters and fragments + * + * @param string url Url to be cleaned + * + * @return string the string representation of this URL after cleanup + */ +function cleanup_url($url) +{ + $obj_url = new Url($url); + return $obj_url->cleanup(); +} + +/** + * Get URL scheme. + * + * @param string url Url for which the scheme is requested + * + * @return mixed the URL scheme or false if none is provided. + */ +function get_url_scheme($url) +{ + $obj_url = new Url($url); + return $obj_url->getScheme(); +} + +/** + * Adds a trailing slash at the end of URL if necessary. + * + * @param string $url URL to check/edit. + * + * @return string $url URL with a end trailing slash. + */ +function add_trailing_slash($url) +{ + return $url . (!endsWith($url, '/') ? '/' : ''); +} + +/** + * Replace not whitelisted protocols by 'http://' from given URL. + * + * @param string $url URL to clean + * @param array $protocols List of allowed protocols (aside from http(s)). + * + * @return string URL with allowed protocol + */ +function whitelist_protocols($url, $protocols) +{ + if (startsWith($url, '?') || startsWith($url, '/')) { + return $url; + } + $protocols = array_merge(['http', 'https'], $protocols); + $protocol = preg_match('#^(\w+):/?/?#', $url, $match); + // Protocol not allowed: we remove it and replace it with http + if ($protocol === 1 && ! in_array($match[1], $protocols)) { + $url = str_replace($match[0], 'http://', $url); + } elseif ($protocol !== 1) { + $url = 'http://' . $url; + } + return $url; +} + /** * URL representation and cleanup utilities * @@ -47,6 +109,7 @@ class Url 'action_type_map=', 'fb_', 'fb=', + 'PHPSESSID=', // Scoop.it '__scoop', @@ -55,7 +118,10 @@ class Url 'utm_', // ATInternet - 'xtor=' + 'xtor=', + + // Other + 'campaign_' ); private static $annoyingFragments = array( @@ -80,6 +146,7 @@ class Url */ public function __construct($url) { + $url = self::cleanupUnparsedUrl(trim($url)); $this->parts = parse_url($url); if (!empty($url) && empty($this->parts['scheme'])) { @@ -87,10 +154,39 @@ class Url } } + /** + * Clean up URL before it's parsed. + * ie. handle urlencode, url prefixes, etc. + * + * @param string $url URL to clean. + * + * @return string cleaned URL. + */ + protected static function cleanupUnparsedUrl($url) + { + return self::removeFirefoxAboutReader($url); + } + + /** + * Remove Firefox Reader prefix if it's present. + * + * @param string $input url + * + * @return string cleaned url + */ + protected static function removeFirefoxAboutReader($input) + { + $firefoxPrefix = 'about://reader?url='; + if (startsWith($input, $firefoxPrefix)) { + return urldecode(ltrim($input, $firefoxPrefix)); + } + return $input; + } + /** * Returns a string representation of this URL */ - public function __toString() + public function toString() { return unparse_url($this->parts); } @@ -121,7 +217,7 @@ class Url } $this->parts['query'] = implode('&', $queryParams); - } + } /** * Removes undesired fragments @@ -149,7 +245,23 @@ class Url { $this->cleanupQuery(); $this->cleanupFragment(); - return $this->__toString(); + return $this->toString(); + } + + /** + * Converts an URL with an International Domain Name host to a ASCII one. + * This requires PHP-intl. If it's not available, just returns this->cleanup(). + * + * @return string converted cleaned up URL. + */ + public function idnToAscii() + { + $out = $this->cleanup(); + if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { + return $out; + } + $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); + return str_replace($this->parts['host'], $asciiHost, $out); } /** @@ -157,10 +269,34 @@ class Url * * @return string the URL scheme or false if none is provided. */ - public function getScheme() { + public function getScheme() + { if (!isset($this->parts['scheme'])) { return false; } return $this->parts['scheme']; } + + /** + * Get URL host. + * + * @return string the URL host or false if none is provided. + */ + public function getHost() + { + if (empty($this->parts['host'])) { + return false; + } + return $this->parts['host']; + } + + /** + * Test if the Url is an HTTP one. + * + * @return true is HTTP, false otherwise. + */ + public function isHttp() + { + return strpos(strtolower($this->parts['scheme']), 'http') !== false; + } }