X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FUrl.php;h=81f72fb06f46fdbccffce0e41bd302b1db45b8cd;hb=fb1b182fbf0ee5afed586f77eec84d7a906831ef;hp=c166ff6ef03f6023c2bf895279f8eb8940736ba9;hpb=bb9ca54838e2f877635197541e8439171c83d5dc;p=github%2Fshaarli%2FShaarli.git diff --git a/application/Url.php b/application/Url.php index c166ff6e..81f72fb0 100644 --- a/application/Url.php +++ b/application/Url.php @@ -34,8 +34,8 @@ function unparse_url($parsedUrl) */ function cleanup_url($url) { - $obj_url = new Url($url); - return $obj_url->cleanup(); + $obj_url = new \Shaarli\Http\Url($url); + return $obj_url->cleanup(); } /** @@ -47,8 +47,8 @@ function cleanup_url($url) */ function get_url_scheme($url) { - $obj_url = new Url($url); - return $obj_url->getScheme(); + $obj_url = new \Shaarli\Http\Url($url); + return $obj_url->getScheme(); } /** @@ -62,225 +62,27 @@ function add_trailing_slash($url) { return $url . (!endsWith($url, '/') ? '/' : ''); } -/** - * Converts an URL with an IDN host to a ASCII one. - * - * @param string $url Input URL. - * - * @return string converted URL. - */ -function url_with_idn_to_ascii($url) -{ - $parts = parse_url($url); - $parts['host'] = idn_to_ascii($parts['host']); - $httpUrl = new \http\Url($parts); - return $httpUrl->toString(); -} /** - * URL representation and cleanup utilities - * - * Form - * scheme://[username:password@]host[:port][/path][?query][#fragment] + * Replace not whitelisted protocols by 'http://' from given URL. * - * Examples - * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor - * https://host.name.tld - * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer + * @param string $url URL to clean + * @param array $protocols List of allowed protocols (aside from http(s)). * - * @see http://www.faqs.org/rfcs/rfc3986.html + * @return string URL with allowed protocol */ -class Url +function whitelist_protocols($url, $protocols) { - private static $annoyingQueryParams = array( - // Facebook - 'action_object_map=', - 'action_ref_map=', - 'action_type_map=', - 'fb_', - 'fb=', - 'PHPSESSID=', - - // Scoop.it - '__scoop', - - // Google Analytics & FeedProxy - 'utm_', - - // ATInternet - 'xtor=' - ); - - private static $annoyingFragments = array( - // ATInternet - 'xtor=RSS-', - - // Misc. - 'tk.rss_all' - ); - - /* - * URL parts represented as an array - * - * @see http://php.net/parse_url - */ - protected $parts; - - /** - * Parses a string containing a URL - * - * @param string $url a string containing a URL - */ - public function __construct($url) - { - $url = self::cleanupUnparsedUrl(trim($url)); - $this->parts = parse_url($url); - - if (!empty($url) && empty($this->parts['scheme'])) { - $this->parts['scheme'] = 'http'; - } - } - - /** - * Clean up URL before it's parsed. - * ie. handle urlencode, url prefixes, etc. - * - * @param string $url URL to clean. - * - * @return string cleaned URL. - */ - protected static function cleanupUnparsedUrl($url) - { - return self::removeFirefoxAboutReader($url); - } - - /** - * Remove Firefox Reader prefix if it's present. - * - * @param string $input url - * - * @return string cleaned url - */ - protected static function removeFirefoxAboutReader($input) - { - $firefoxPrefix = 'about://reader?url='; - if (startsWith($input, $firefoxPrefix)) { - return urldecode(ltrim($input, $firefoxPrefix)); - } - return $input; + if (startsWith($url, '?') || startsWith($url, '/')) { + return $url; } - - /** - * Returns a string representation of this URL - */ - public function toString() - { - return unparse_url($this->parts); - } - - /** - * Removes undesired query parameters - */ - protected function cleanupQuery() - { - if (! isset($this->parts['query'])) { - return; - } - - $queryParams = explode('&', $this->parts['query']); - - foreach (self::$annoyingQueryParams as $annoying) { - foreach ($queryParams as $param) { - if (startsWith($param, $annoying)) { - $queryParams = array_diff($queryParams, array($param)); - continue; - } - } - } - - if (count($queryParams) == 0) { - unset($this->parts['query']); - return; - } - - $this->parts['query'] = implode('&', $queryParams); - } - - /** - * Removes undesired fragments - */ - protected function cleanupFragment() - { - if (! isset($this->parts['fragment'])) { - return; - } - - foreach (self::$annoyingFragments as $annoying) { - if (startsWith($this->parts['fragment'], $annoying)) { - unset($this->parts['fragment']); - break; - } - } - } - - /** - * Removes undesired query parameters and fragments - * - * @return string the string representation of this URL after cleanup - */ - public function cleanup() - { - $this->cleanupQuery(); - $this->cleanupFragment(); - return $this->toString(); - } - - /** - * Converts an URL with an International Domain Name host to a ASCII one. - * This requires PHP-intl. If it's not available, just returns this->cleanup(). - * - * @return string converted cleaned up URL. - */ - public function idnToAscii() - { - $out = $this->cleanup(); - if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { - return $out; - } - $asciiHost = idn_to_ascii($this->parts['host']); - return str_replace($this->parts['host'], $asciiHost, $out); - } - - /** - * Get URL scheme. - * - * @return string the URL scheme or false if none is provided. - */ - public function getScheme() { - if (!isset($this->parts['scheme'])) { - return false; - } - return $this->parts['scheme']; - } - - /** - * Get URL host. - * - * @return string the URL host or false if none is provided. - */ - public function getHost() { - if (empty($this->parts['host'])) { - return false; - } - return $this->parts['host']; - } - - /** - * Test if the Url is an HTTP one. - * - * @return true is HTTP, false otherwise. - */ - public function isHttp() { - return strpos(strtolower($this->parts['scheme']), 'http') !== false; + $protocols = array_merge(['http', 'https'], $protocols); + $protocol = preg_match('#^(\w+):/?/?#', $url, $match); + // Protocol not allowed: we remove it and replace it with http + if ($protocol === 1 && ! in_array($match[1], $protocols)) { + $url = str_replace($match[0], 'http://', $url); + } elseif ($protocol !== 1) { + $url = 'http://' . $url; } + return $url; }