From fb1b182fbf0ee5afed586f77eec84d7a906831ef Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Mon, 3 Dec 2018 00:23:35 +0100 Subject: namespacing: \Shaarli\Http\Url Signed-off-by: VirtualTam --- application/HttpUtils.php | 3 + application/Url.php | 218 +--------------------------------------------- application/http/Url.php | 217 +++++++++++++++++++++++++++++++++++++++++++++ tests/Url/UrlTest.php | 200 ------------------------------------------ tests/http/UrlTest.php | 201 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 423 insertions(+), 416 deletions(-) create mode 100644 application/http/Url.php delete mode 100644 tests/Url/UrlTest.php create mode 100644 tests/http/UrlTest.php diff --git a/application/HttpUtils.php b/application/HttpUtils.php index 9c438160..51af5d0d 100644 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php @@ -1,4 +1,7 @@ cleanup(); } @@ -47,7 +47,7 @@ function cleanup_url($url) */ function get_url_scheme($url) { - $obj_url = new Url($url); + $obj_url = new \Shaarli\Http\Url($url); return $obj_url->getScheme(); } @@ -86,217 +86,3 @@ function whitelist_protocols($url, $protocols) } return $url; } - -/** - * URL representation and cleanup utilities - * - * Form - * scheme://[username:password@]host[:port][/path][?query][#fragment] - * - * Examples - * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor - * https://host.name.tld - * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer - * - * @see http://www.faqs.org/rfcs/rfc3986.html - */ -class Url -{ - private static $annoyingQueryParams = array( - // Facebook - 'action_object_map=', - 'action_ref_map=', - 'action_type_map=', - 'fb_', - 'fb=', - 'PHPSESSID=', - - // Scoop.it - '__scoop', - - // Google Analytics & FeedProxy - 'utm_', - - // ATInternet - 'xtor=', - - // Other - 'campaign_' - ); - - private static $annoyingFragments = array( - // ATInternet - 'xtor=RSS-', - - // Misc. - 'tk.rss_all' - ); - - /* - * URL parts represented as an array - * - * @see http://php.net/parse_url - */ - protected $parts; - - /** - * Parses a string containing a URL - * - * @param string $url a string containing a URL - */ - public function __construct($url) - { - $url = self::cleanupUnparsedUrl(trim($url)); - $this->parts = parse_url($url); - - if (!empty($url) && empty($this->parts['scheme'])) { - $this->parts['scheme'] = 'http'; - } - } - - /** - * Clean up URL before it's parsed. - * ie. handle urlencode, url prefixes, etc. - * - * @param string $url URL to clean. - * - * @return string cleaned URL. - */ - protected static function cleanupUnparsedUrl($url) - { - return self::removeFirefoxAboutReader($url); - } - - /** - * Remove Firefox Reader prefix if it's present. - * - * @param string $input url - * - * @return string cleaned url - */ - protected static function removeFirefoxAboutReader($input) - { - $firefoxPrefix = 'about://reader?url='; - if (startsWith($input, $firefoxPrefix)) { - return urldecode(ltrim($input, $firefoxPrefix)); - } - return $input; - } - - /** - * Returns a string representation of this URL - */ - public function toString() - { - return unparse_url($this->parts); - } - - /** - * Removes undesired query parameters - */ - protected function cleanupQuery() - { - if (! isset($this->parts['query'])) { - return; - } - - $queryParams = explode('&', $this->parts['query']); - - foreach (self::$annoyingQueryParams as $annoying) { - foreach ($queryParams as $param) { - if (startsWith($param, $annoying)) { - $queryParams = array_diff($queryParams, array($param)); - continue; - } - } - } - - if (count($queryParams) == 0) { - unset($this->parts['query']); - return; - } - - $this->parts['query'] = implode('&', $queryParams); - } - - /** - * Removes undesired fragments - */ - protected function cleanupFragment() - { - if (! isset($this->parts['fragment'])) { - return; - } - - foreach (self::$annoyingFragments as $annoying) { - if (startsWith($this->parts['fragment'], $annoying)) { - unset($this->parts['fragment']); - break; - } - } - } - - /** - * Removes undesired query parameters and fragments - * - * @return string the string representation of this URL after cleanup - */ - public function cleanup() - { - $this->cleanupQuery(); - $this->cleanupFragment(); - return $this->toString(); - } - - /** - * Converts an URL with an International Domain Name host to a ASCII one. - * This requires PHP-intl. If it's not available, just returns this->cleanup(). - * - * @return string converted cleaned up URL. - */ - public function idnToAscii() - { - $out = $this->cleanup(); - if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { - return $out; - } - $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); - return str_replace($this->parts['host'], $asciiHost, $out); - } - - /** - * Get URL scheme. - * - * @return string the URL scheme or false if none is provided. - */ - public function getScheme() - { - if (!isset($this->parts['scheme'])) { - return false; - } - return $this->parts['scheme']; - } - - /** - * Get URL host. - * - * @return string the URL host or false if none is provided. - */ - public function getHost() - { - if (empty($this->parts['host'])) { - return false; - } - return $this->parts['host']; - } - - /** - * Test if the Url is an HTTP one. - * - * @return true is HTTP, false otherwise. - */ - public function isHttp() - { - return strpos(strtolower($this->parts['scheme']), 'http') !== false; - } -} diff --git a/application/http/Url.php b/application/http/Url.php new file mode 100644 index 00000000..260231c6 --- /dev/null +++ b/application/http/Url.php @@ -0,0 +1,217 @@ +parts = parse_url($url); + + if (!empty($url) && empty($this->parts['scheme'])) { + $this->parts['scheme'] = 'http'; + } + } + + /** + * Clean up URL before it's parsed. + * ie. handle urlencode, url prefixes, etc. + * + * @param string $url URL to clean. + * + * @return string cleaned URL. + */ + protected static function cleanupUnparsedUrl($url) + { + return self::removeFirefoxAboutReader($url); + } + + /** + * Remove Firefox Reader prefix if it's present. + * + * @param string $input url + * + * @return string cleaned url + */ + protected static function removeFirefoxAboutReader($input) + { + $firefoxPrefix = 'about://reader?url='; + if (startsWith($input, $firefoxPrefix)) { + return urldecode(ltrim($input, $firefoxPrefix)); + } + return $input; + } + + /** + * Returns a string representation of this URL + */ + public function toString() + { + return unparse_url($this->parts); + } + + /** + * Removes undesired query parameters + */ + protected function cleanupQuery() + { + if (!isset($this->parts['query'])) { + return; + } + + $queryParams = explode('&', $this->parts['query']); + + foreach (self::$annoyingQueryParams as $annoying) { + foreach ($queryParams as $param) { + if (startsWith($param, $annoying)) { + $queryParams = array_diff($queryParams, array($param)); + continue; + } + } + } + + if (count($queryParams) == 0) { + unset($this->parts['query']); + return; + } + + $this->parts['query'] = implode('&', $queryParams); + } + + /** + * Removes undesired fragments + */ + protected function cleanupFragment() + { + if (!isset($this->parts['fragment'])) { + return; + } + + foreach (self::$annoyingFragments as $annoying) { + if (startsWith($this->parts['fragment'], $annoying)) { + unset($this->parts['fragment']); + break; + } + } + } + + /** + * Removes undesired query parameters and fragments + * + * @return string the string representation of this URL after cleanup + */ + public function cleanup() + { + $this->cleanupQuery(); + $this->cleanupFragment(); + return $this->toString(); + } + + /** + * Converts an URL with an International Domain Name host to a ASCII one. + * This requires PHP-intl. If it's not available, just returns this->cleanup(). + * + * @return string converted cleaned up URL. + */ + public function idnToAscii() + { + $out = $this->cleanup(); + if (!function_exists('idn_to_ascii') || !isset($this->parts['host'])) { + return $out; + } + $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); + return str_replace($this->parts['host'], $asciiHost, $out); + } + + /** + * Get URL scheme. + * + * @return string the URL scheme or false if none is provided. + */ + public function getScheme() + { + if (!isset($this->parts['scheme'])) { + return false; + } + return $this->parts['scheme']; + } + + /** + * Get URL host. + * + * @return string the URL host or false if none is provided. + */ + public function getHost() + { + if (empty($this->parts['host'])) { + return false; + } + return $this->parts['host']; + } + + /** + * Test if the Url is an HTTP one. + * + * @return true is HTTP, false otherwise. + */ + public function isHttp() + { + return strpos(strtolower($this->parts['scheme']), 'http') !== false; + } +} diff --git a/tests/Url/UrlTest.php b/tests/Url/UrlTest.php deleted file mode 100644 index db229ce0..00000000 --- a/tests/Url/UrlTest.php +++ /dev/null @@ -1,200 +0,0 @@ -cleanup(); - $this->assertEquals(self::$baseUrl, $url->toString()); - } - - /** - * Instantiate an empty URL - */ - public function testEmptyConstruct() - { - $url = new Url(''); - $this->assertEquals('', $url->toString()); - } - - /** - * Instantiate a URL - */ - public function testConstruct() - { - $ref = 'http://username:password@hostname:9090/path' - .'?arg1=value1&arg2=value2#anchor'; - $url = new Url($ref); - $this->assertEquals($ref, $url->toString()); - } - - /** - * URL cleanup - nothing to do - */ - public function testNoCleanup() - { - // URL with no query nor fragment - $this->assertUrlIsCleaned(); - - // URL with no annoying elements - $ref = self::$baseUrl.'?p1=val1&p2=1234#edit'; - $url = new Url($ref); - $this->assertEquals($ref, $url->cleanup()); - } - - /** - * URL cleanup - annoying fragment - */ - public function testCleanupFragment() - { - $this->assertUrlIsCleaned('', '#tk.rss_all'); - $this->assertUrlIsCleaned('', '#xtor=RSS-'); - $this->assertUrlIsCleaned('', '#xtor=RSS-U3ht0tkc4b'); - } - - /** - * URL cleanup - single annoying query parameter - */ - public function testCleanupSingleQueryParam() - { - $this->assertUrlIsCleaned('?action_object_map=junk'); - $this->assertUrlIsCleaned('?action_ref_map=Cr4p!'); - $this->assertUrlIsCleaned('?action_type_map=g4R84g3'); - - $this->assertUrlIsCleaned('?fb_stuff=v41u3'); - $this->assertUrlIsCleaned('?fb=71m3w4573'); - - $this->assertUrlIsCleaned('?utm_campaign=zomg'); - $this->assertUrlIsCleaned('?utm_medium=numnum'); - $this->assertUrlIsCleaned('?utm_source=c0d3'); - $this->assertUrlIsCleaned('?utm_term=1n4l'); - - $this->assertUrlIsCleaned('?xtor=some-url'); - $this->assertUrlIsCleaned('?PHPSESSID=012345678910111213'); - } - - /** - * URL cleanup - multiple annoying query parameters - */ - public function testCleanupMultipleQueryParams() - { - $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng'); - $this->assertUrlIsCleaned( - '?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' - ); - } - - /** - * URL cleanup - multiple annoying query parameters, annoying fragment - */ - public function testCleanupMultipleQueryParamsAndFragment() - { - $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng', '#tk.rss_all'); - } - - /** - * Nominal case - the URL contains both useful and annoying parameters - */ - public function testCleanupMixedContent() - { - // ditch annoying query params and fragment, keep useful params - $url = new Url( - self::$baseUrl - .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' - ); - $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); - - - // ditch annoying query params, keep useful params and fragment - $url = new Url( - self::$baseUrl - .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' - ); - $this->assertEquals( - self::$baseUrl.'?my=stuff&is=kept#again', - $url->cleanup() - ); - - // test firefox reader url - $url = new Url( - 'about://reader?url=' . urlencode(self::$baseUrl .'?my=stuff&is=kept') - ); - $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); - } - - /** - * Test default http scheme. - */ - public function testDefaultScheme() - { - $url = new Url(self::$baseUrl); - $this->assertEquals('http', $url->getScheme()); - $url = new Url('domain.tld'); - $this->assertEquals('http', $url->getScheme()); - $url = new Url('ssh://domain.tld'); - $this->assertEquals('ssh', $url->getScheme()); - $url = new Url('ftp://domain.tld'); - $this->assertEquals('ftp', $url->getScheme()); - $url = new Url('git://domain.tld/push?pull=clone#checkout'); - $this->assertEquals('git', $url->getScheme()); - } - - /** - * Test add trailing slash. - */ - public function testAddTrailingSlash() - { - $strOn = 'http://randomstr.com/test/'; - $strOff = 'http://randomstr.com/test'; - $this->assertEquals($strOn, add_trailing_slash($strOn)); - $this->assertEquals($strOn, add_trailing_slash($strOff)); - } - - /** - * Test valid HTTP url. - */ - public function testUrlIsHttp() - { - $url = new Url(self::$baseUrl); - $this->assertTrue($url->isHttp()); - } - - /** - * Test non HTTP url. - */ - public function testUrlIsNotHttp() - { - $url = new Url('ftp://save.tld/mysave'); - $this->assertFalse($url->isHttp()); - } - - /** - * Test International Domain Name to ASCII conversion - */ - public function testIdnToAscii() - { - $ind = 'http://www.académie-française.fr/'; - $expected = 'http://www.xn--acadmie-franaise-npb1a.fr/'; - $url = new Url($ind); - $this->assertEquals($expected, $url->idnToAscii()); - - $notInd = 'http://www.academie-francaise.fr/'; - $url = new Url($notInd); - $this->assertEquals($notInd, $url->idnToAscii()); - } -} diff --git a/tests/http/UrlTest.php b/tests/http/UrlTest.php new file mode 100644 index 00000000..011b416d --- /dev/null +++ b/tests/http/UrlTest.php @@ -0,0 +1,201 @@ +cleanup(); + $this->assertEquals(self::$baseUrl, $url->toString()); + } + + /** + * Instantiate an empty URL + */ + public function testEmptyConstruct() + { + $url = new Url(''); + $this->assertEquals('', $url->toString()); + } + + /** + * Instantiate a URL + */ + public function testConstruct() + { + $ref = 'http://username:password@hostname:9090/path' + . '?arg1=value1&arg2=value2#anchor'; + $url = new Url($ref); + $this->assertEquals($ref, $url->toString()); + } + + /** + * URL cleanup - nothing to do + */ + public function testNoCleanup() + { + // URL with no query nor fragment + $this->assertUrlIsCleaned(); + + // URL with no annoying elements + $ref = self::$baseUrl . '?p1=val1&p2=1234#edit'; + $url = new Url($ref); + $this->assertEquals($ref, $url->cleanup()); + } + + /** + * URL cleanup - annoying fragment + */ + public function testCleanupFragment() + { + $this->assertUrlIsCleaned('', '#tk.rss_all'); + $this->assertUrlIsCleaned('', '#xtor=RSS-'); + $this->assertUrlIsCleaned('', '#xtor=RSS-U3ht0tkc4b'); + } + + /** + * URL cleanup - single annoying query parameter + */ + public function testCleanupSingleQueryParam() + { + $this->assertUrlIsCleaned('?action_object_map=junk'); + $this->assertUrlIsCleaned('?action_ref_map=Cr4p!'); + $this->assertUrlIsCleaned('?action_type_map=g4R84g3'); + + $this->assertUrlIsCleaned('?fb_stuff=v41u3'); + $this->assertUrlIsCleaned('?fb=71m3w4573'); + + $this->assertUrlIsCleaned('?utm_campaign=zomg'); + $this->assertUrlIsCleaned('?utm_medium=numnum'); + $this->assertUrlIsCleaned('?utm_source=c0d3'); + $this->assertUrlIsCleaned('?utm_term=1n4l'); + + $this->assertUrlIsCleaned('?xtor=some-url'); + $this->assertUrlIsCleaned('?PHPSESSID=012345678910111213'); + } + + /** + * URL cleanup - multiple annoying query parameters + */ + public function testCleanupMultipleQueryParams() + { + $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng'); + $this->assertUrlIsCleaned( + '?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' + ); + } + + /** + * URL cleanup - multiple annoying query parameters, annoying fragment + */ + public function testCleanupMultipleQueryParamsAndFragment() + { + $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng', '#tk.rss_all'); + } + + /** + * Nominal case - the URL contains both useful and annoying parameters + */ + public function testCleanupMixedContent() + { + // ditch annoying query params and fragment, keep useful params + $url = new Url( + self::$baseUrl + . '?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' + ); + $this->assertEquals(self::$baseUrl . '?my=stuff&is=kept', $url->cleanup()); + + + // ditch annoying query params, keep useful params and fragment + $url = new Url( + self::$baseUrl + . '?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' + ); + $this->assertEquals( + self::$baseUrl . '?my=stuff&is=kept#again', + $url->cleanup() + ); + + // test firefox reader url + $url = new Url( + 'about://reader?url=' . urlencode(self::$baseUrl . '?my=stuff&is=kept') + ); + $this->assertEquals(self::$baseUrl . '?my=stuff&is=kept', $url->cleanup()); + } + + /** + * Test default http scheme. + */ + public function testDefaultScheme() + { + $url = new Url(self::$baseUrl); + $this->assertEquals('http', $url->getScheme()); + $url = new Url('domain.tld'); + $this->assertEquals('http', $url->getScheme()); + $url = new Url('ssh://domain.tld'); + $this->assertEquals('ssh', $url->getScheme()); + $url = new Url('ftp://domain.tld'); + $this->assertEquals('ftp', $url->getScheme()); + $url = new Url('git://domain.tld/push?pull=clone#checkout'); + $this->assertEquals('git', $url->getScheme()); + } + + /** + * Test add trailing slash. + */ + public function testAddTrailingSlash() + { + $strOn = 'http://randomstr.com/test/'; + $strOff = 'http://randomstr.com/test'; + $this->assertEquals($strOn, add_trailing_slash($strOn)); + $this->assertEquals($strOn, add_trailing_slash($strOff)); + } + + /** + * Test valid HTTP url. + */ + public function testUrlIsHttp() + { + $url = new Url(self::$baseUrl); + $this->assertTrue($url->isHttp()); + } + + /** + * Test non HTTP url. + */ + public function testUrlIsNotHttp() + { + $url = new Url('ftp://save.tld/mysave'); + $this->assertFalse($url->isHttp()); + } + + /** + * Test International Domain Name to ASCII conversion + */ + public function testIdnToAscii() + { + $ind = 'http://www.académie-française.fr/'; + $expected = 'http://www.xn--acadmie-franaise-npb1a.fr/'; + $url = new Url($ind); + $this->assertEquals($expected, $url->idnToAscii()); + + $notInd = 'http://www.academie-francaise.fr/'; + $url = new Url($notInd); + $this->assertEquals($notInd, $url->idnToAscii()); + } +} -- cgit v1.2.3