X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FUrl.php;h=3b7f19c207770679d2fff77a709210529ffa2afa;hb=1004742f09b55ff781c13745781b9a7e90986faa;hp=61a30a7800972d8d8c22fdad98a4023e1c561b23;hpb=86deafe0ff5a22a37255546cf82325e89bf272b1;p=github%2Fshaarli%2FShaarli.git diff --git a/application/Url.php b/application/Url.php index 61a30a78..3b7f19c2 100644 --- a/application/Url.php +++ b/application/Url.php @@ -34,8 +34,8 @@ function unparse_url($parsedUrl) */ function cleanup_url($url) { - $obj_url = new Url($url); - return $obj_url->cleanup(); + $obj_url = new Url($url); + return $obj_url->cleanup(); } /** @@ -47,8 +47,8 @@ function cleanup_url($url) */ function get_url_scheme($url) { - $obj_url = new Url($url); - return $obj_url->getScheme(); + $obj_url = new Url($url); + return $obj_url->getScheme(); } /** @@ -62,21 +62,31 @@ function add_trailing_slash($url) { return $url . (!endsWith($url, '/') ? '/' : ''); } + /** - * Converts an URL with an IDN host to a ASCII one. + * Replace not whitelisted protocols by 'http://' from given URL. * - * @param string $url Input URL. + * @param string $url URL to clean + * @param array $protocols List of allowed protocols (aside from http(s)). * - * @return string converted URL. + * @return string URL with allowed protocol */ -function url_with_idn_to_ascii($url) +function whitelist_protocols($url, $protocols) { - $parts = parse_url($url); - $parts['host'] = idn_to_ascii($parts['host']); - - $httpUrl = new \http\Url($parts); - return $httpUrl->toString(); + if (startsWith($url, '?') || startsWith($url, '/')) { + return $url; + } + $protocols = array_merge(['http', 'https'], $protocols); + $protocol = preg_match('#^(\w+):/?/?#', $url, $match); + // Protocol not allowed: we remove it and replace it with http + if ($protocol === 1 && ! in_array($match[1], $protocols)) { + $url = str_replace($match[0], 'http://', $url); + } elseif ($protocol !== 1) { + $url = 'http://' . $url; + } + return $url; } + /** * URL representation and cleanup utilities * @@ -99,6 +109,7 @@ class Url 'action_type_map=', 'fb_', 'fb=', + 'PHPSESSID=', // Scoop.it '__scoop', @@ -107,7 +118,10 @@ class Url 'utm_', // ATInternet - 'xtor=' + 'xtor=', + + // Other + 'campaign_' ); private static $annoyingFragments = array( @@ -203,7 +217,7 @@ class Url } $this->parts['query'] = implode('&', $queryParams); - } + } /** * Removes undesired fragments @@ -240,13 +254,13 @@ class Url * * @return string converted cleaned up URL. */ - public function indToAscii() + public function idnToAscii() { $out = $this->cleanup(); if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { return $out; } - $asciiHost = idn_to_ascii($this->parts['host']); + $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); return str_replace($this->parts['host'], $asciiHost, $out); } @@ -255,7 +269,8 @@ class Url * * @return string the URL scheme or false if none is provided. */ - public function getScheme() { + public function getScheme() + { if (!isset($this->parts['scheme'])) { return false; } @@ -267,7 +282,8 @@ class Url * * @return string the URL host or false if none is provided. */ - public function getHost() { + public function getHost() + { if (empty($this->parts['host'])) { return false; } @@ -279,7 +295,8 @@ class Url * * @return true is HTTP, false otherwise. */ - public function isHttp() { + public function isHttp() + { return strpos(strtolower($this->parts['scheme']), 'http') !== false; } }