From fb1b182fbf0ee5afed586f77eec84d7a906831ef Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Mon, 3 Dec 2018 00:23:35 +0100 Subject: [PATCH] namespacing: \Shaarli\Http\Url Signed-off-by: VirtualTam --- application/HttpUtils.php | 3 + application/Url.php | 218 +------------------------------- application/http/Url.php | 217 +++++++++++++++++++++++++++++++ tests/{Url => http}/UrlTest.php | 23 ++-- 4 files changed, 234 insertions(+), 227 deletions(-) create mode 100644 application/http/Url.php rename tests/{Url => http}/UrlTest.php (88%) diff --git a/application/HttpUtils.php b/application/HttpUtils.php index 9c438160..51af5d0d 100644 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php @@ -1,4 +1,7 @@ cleanup(); } @@ -47,7 +47,7 @@ function cleanup_url($url) */ function get_url_scheme($url) { - $obj_url = new Url($url); + $obj_url = new \Shaarli\Http\Url($url); return $obj_url->getScheme(); } @@ -86,217 +86,3 @@ function whitelist_protocols($url, $protocols) } return $url; } - -/** - * URL representation and cleanup utilities - * - * Form - * scheme://[username:password@]host[:port][/path][?query][#fragment] - * - * Examples - * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor - * https://host.name.tld - * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer - * - * @see http://www.faqs.org/rfcs/rfc3986.html - */ -class Url -{ - private static $annoyingQueryParams = array( - // Facebook - 'action_object_map=', - 'action_ref_map=', - 'action_type_map=', - 'fb_', - 'fb=', - 'PHPSESSID=', - - // Scoop.it - '__scoop', - - // Google Analytics & FeedProxy - 'utm_', - - // ATInternet - 'xtor=', - - // Other - 'campaign_' - ); - - private static $annoyingFragments = array( - // ATInternet - 'xtor=RSS-', - - // Misc. - 'tk.rss_all' - ); - - /* - * URL parts represented as an array - * - * @see http://php.net/parse_url - */ - protected $parts; - - /** - * Parses a string containing a URL - * - * @param string $url a string containing a URL - */ - public function __construct($url) - { - $url = self::cleanupUnparsedUrl(trim($url)); - $this->parts = parse_url($url); - - if (!empty($url) && empty($this->parts['scheme'])) { - $this->parts['scheme'] = 'http'; - } - } - - /** - * Clean up URL before it's parsed. - * ie. handle urlencode, url prefixes, etc. - * - * @param string $url URL to clean. - * - * @return string cleaned URL. - */ - protected static function cleanupUnparsedUrl($url) - { - return self::removeFirefoxAboutReader($url); - } - - /** - * Remove Firefox Reader prefix if it's present. - * - * @param string $input url - * - * @return string cleaned url - */ - protected static function removeFirefoxAboutReader($input) - { - $firefoxPrefix = 'about://reader?url='; - if (startsWith($input, $firefoxPrefix)) { - return urldecode(ltrim($input, $firefoxPrefix)); - } - return $input; - } - - /** - * Returns a string representation of this URL - */ - public function toString() - { - return unparse_url($this->parts); - } - - /** - * Removes undesired query parameters - */ - protected function cleanupQuery() - { - if (! isset($this->parts['query'])) { - return; - } - - $queryParams = explode('&', $this->parts['query']); - - foreach (self::$annoyingQueryParams as $annoying) { - foreach ($queryParams as $param) { - if (startsWith($param, $annoying)) { - $queryParams = array_diff($queryParams, array($param)); - continue; - } - } - } - - if (count($queryParams) == 0) { - unset($this->parts['query']); - return; - } - - $this->parts['query'] = implode('&', $queryParams); - } - - /** - * Removes undesired fragments - */ - protected function cleanupFragment() - { - if (! isset($this->parts['fragment'])) { - return; - } - - foreach (self::$annoyingFragments as $annoying) { - if (startsWith($this->parts['fragment'], $annoying)) { - unset($this->parts['fragment']); - break; - } - } - } - - /** - * Removes undesired query parameters and fragments - * - * @return string the string representation of this URL after cleanup - */ - public function cleanup() - { - $this->cleanupQuery(); - $this->cleanupFragment(); - return $this->toString(); - } - - /** - * Converts an URL with an International Domain Name host to a ASCII one. - * This requires PHP-intl. If it's not available, just returns this->cleanup(). - * - * @return string converted cleaned up URL. - */ - public function idnToAscii() - { - $out = $this->cleanup(); - if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { - return $out; - } - $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); - return str_replace($this->parts['host'], $asciiHost, $out); - } - - /** - * Get URL scheme. - * - * @return string the URL scheme or false if none is provided. - */ - public function getScheme() - { - if (!isset($this->parts['scheme'])) { - return false; - } - return $this->parts['scheme']; - } - - /** - * Get URL host. - * - * @return string the URL host or false if none is provided. - */ - public function getHost() - { - if (empty($this->parts['host'])) { - return false; - } - return $this->parts['host']; - } - - /** - * Test if the Url is an HTTP one. - * - * @return true is HTTP, false otherwise. - */ - public function isHttp() - { - return strpos(strtolower($this->parts['scheme']), 'http') !== false; - } -} diff --git a/application/http/Url.php b/application/http/Url.php new file mode 100644 index 00000000..260231c6 --- /dev/null +++ b/application/http/Url.php @@ -0,0 +1,217 @@ +parts = parse_url($url); + + if (!empty($url) && empty($this->parts['scheme'])) { + $this->parts['scheme'] = 'http'; + } + } + + /** + * Clean up URL before it's parsed. + * ie. handle urlencode, url prefixes, etc. + * + * @param string $url URL to clean. + * + * @return string cleaned URL. + */ + protected static function cleanupUnparsedUrl($url) + { + return self::removeFirefoxAboutReader($url); + } + + /** + * Remove Firefox Reader prefix if it's present. + * + * @param string $input url + * + * @return string cleaned url + */ + protected static function removeFirefoxAboutReader($input) + { + $firefoxPrefix = 'about://reader?url='; + if (startsWith($input, $firefoxPrefix)) { + return urldecode(ltrim($input, $firefoxPrefix)); + } + return $input; + } + + /** + * Returns a string representation of this URL + */ + public function toString() + { + return unparse_url($this->parts); + } + + /** + * Removes undesired query parameters + */ + protected function cleanupQuery() + { + if (!isset($this->parts['query'])) { + return; + } + + $queryParams = explode('&', $this->parts['query']); + + foreach (self::$annoyingQueryParams as $annoying) { + foreach ($queryParams as $param) { + if (startsWith($param, $annoying)) { + $queryParams = array_diff($queryParams, array($param)); + continue; + } + } + } + + if (count($queryParams) == 0) { + unset($this->parts['query']); + return; + } + + $this->parts['query'] = implode('&', $queryParams); + } + + /** + * Removes undesired fragments + */ + protected function cleanupFragment() + { + if (!isset($this->parts['fragment'])) { + return; + } + + foreach (self::$annoyingFragments as $annoying) { + if (startsWith($this->parts['fragment'], $annoying)) { + unset($this->parts['fragment']); + break; + } + } + } + + /** + * Removes undesired query parameters and fragments + * + * @return string the string representation of this URL after cleanup + */ + public function cleanup() + { + $this->cleanupQuery(); + $this->cleanupFragment(); + return $this->toString(); + } + + /** + * Converts an URL with an International Domain Name host to a ASCII one. + * This requires PHP-intl. If it's not available, just returns this->cleanup(). + * + * @return string converted cleaned up URL. + */ + public function idnToAscii() + { + $out = $this->cleanup(); + if (!function_exists('idn_to_ascii') || !isset($this->parts['host'])) { + return $out; + } + $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); + return str_replace($this->parts['host'], $asciiHost, $out); + } + + /** + * Get URL scheme. + * + * @return string the URL scheme or false if none is provided. + */ + public function getScheme() + { + if (!isset($this->parts['scheme'])) { + return false; + } + return $this->parts['scheme']; + } + + /** + * Get URL host. + * + * @return string the URL host or false if none is provided. + */ + public function getHost() + { + if (empty($this->parts['host'])) { + return false; + } + return $this->parts['host']; + } + + /** + * Test if the Url is an HTTP one. + * + * @return true is HTTP, false otherwise. + */ + public function isHttp() + { + return strpos(strtolower($this->parts['scheme']), 'http') !== false; + } +} diff --git a/tests/Url/UrlTest.php b/tests/http/UrlTest.php similarity index 88% rename from tests/Url/UrlTest.php rename to tests/http/UrlTest.php index db229ce0..011b416d 100644 --- a/tests/Url/UrlTest.php +++ b/tests/http/UrlTest.php @@ -3,12 +3,13 @@ * Url's tests */ -require_once 'application/Url.php'; +namespace Shaarli\Http; + /** * Unitary tests for URL utilities */ -class UrlTest extends PHPUnit_Framework_TestCase +class UrlTest extends \PHPUnit\Framework\TestCase { // base URL for tests protected static $baseUrl = 'http://domain.tld:3000'; @@ -18,7 +19,7 @@ class UrlTest extends PHPUnit_Framework_TestCase */ private function assertUrlIsCleaned($query = '', $fragment = '') { - $url = new Url(self::$baseUrl.$query.$fragment); + $url = new Url(self::$baseUrl . $query . $fragment); $url->cleanup(); $this->assertEquals(self::$baseUrl, $url->toString()); } @@ -38,7 +39,7 @@ class UrlTest extends PHPUnit_Framework_TestCase public function testConstruct() { $ref = 'http://username:password@hostname:9090/path' - .'?arg1=value1&arg2=value2#anchor'; + . '?arg1=value1&arg2=value2#anchor'; $url = new Url($ref); $this->assertEquals($ref, $url->toString()); } @@ -52,7 +53,7 @@ class UrlTest extends PHPUnit_Framework_TestCase $this->assertUrlIsCleaned(); // URL with no annoying elements - $ref = self::$baseUrl.'?p1=val1&p2=1234#edit'; + $ref = self::$baseUrl . '?p1=val1&p2=1234#edit'; $url = new Url($ref); $this->assertEquals($ref, $url->cleanup()); } @@ -115,26 +116,26 @@ class UrlTest extends PHPUnit_Framework_TestCase // ditch annoying query params and fragment, keep useful params $url = new Url( self::$baseUrl - .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' + . '?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' ); - $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); + $this->assertEquals(self::$baseUrl . '?my=stuff&is=kept', $url->cleanup()); // ditch annoying query params, keep useful params and fragment $url = new Url( self::$baseUrl - .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' + . '?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' ); $this->assertEquals( - self::$baseUrl.'?my=stuff&is=kept#again', + self::$baseUrl . '?my=stuff&is=kept#again', $url->cleanup() ); // test firefox reader url $url = new Url( - 'about://reader?url=' . urlencode(self::$baseUrl .'?my=stuff&is=kept') + 'about://reader?url=' . urlencode(self::$baseUrl . '?my=stuff&is=kept') ); - $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); + $this->assertEquals(self::$baseUrl . '?my=stuff&is=kept', $url->cleanup()); } /** -- 2.41.0