From ef591e7ee21435da9314c5f7f6ea983c6f423898 Mon Sep 17 00:00:00 2001 From: Guillaume Virlet Date: Wed, 2 Sep 2015 13:55:39 +0200 Subject: Url: introduce global helper functions for cleanup and scheme detection Relates to #314 & #326 Additions: - add global `cleanup_url()` and `get_url_scheme()` functions Modifications: - replace `Url` usage in `index.php` by calls to global functions - fix `Url` tests not being run: PHPUnit expects a single test class per file - move classes to separate files --- application/LinkDB.php | 4 + application/Url.php | 30 +++++++- index.php | 9 +-- tests/Url/CleanupUrlTest.php | 76 ++++++++++++++++++ tests/Url/GetUrlSchemeTest.php | 31 ++++++++ tests/Url/UnparseUrlTest.php | 31 ++++++++ tests/Url/UrlTest.php | 148 +++++++++++++++++++++++++++++++++++ tests/UrlTest.php | 170 ----------------------------------------- 8 files changed, 322 insertions(+), 177 deletions(-) create mode 100644 tests/Url/CleanupUrlTest.php create mode 100644 tests/Url/GetUrlSchemeTest.php create mode 100644 tests/Url/UnparseUrlTest.php create mode 100755 tests/Url/UrlTest.php delete mode 100755 tests/UrlTest.php diff --git a/application/LinkDB.php b/application/LinkDB.php index 463aa47e..84733505 100644 --- a/application/LinkDB.php +++ b/application/LinkDB.php @@ -287,6 +287,10 @@ You use the community supported version of the original Shaarli project, by Seba /** * Returns the link for a given URL, or False if it does not exist. + * + * @param string $url URL to search for + * + * @return mixed the existing link if it exists, else 'false' */ public function getLinkFromUrl($url) { diff --git a/application/Url.php b/application/Url.php index 02a4395d..af43b457 100755 --- a/application/Url.php +++ b/application/Url.php @@ -25,6 +25,32 @@ function unparse_url($parsedUrl) return "$scheme$user$pass$host$port$path$query$fragment"; } +/** + * Removes undesired query parameters and fragments + * + * @param string url Url to be cleaned + * + * @return string the string representation of this URL after cleanup + */ +function cleanup_url($url) +{ + $obj_url = new Url($url); + return $obj_url->cleanup(); +} + +/** + * Get URL scheme. + * + * @param string url Url for which the scheme is requested + * + * @return mixed the URL scheme or false if none is provided. + */ +function get_url_scheme($url) +{ + $obj_url = new Url($url); + return $obj_url->getScheme(); +} + /** * URL representation and cleanup utilities * @@ -90,7 +116,7 @@ class Url /** * Returns a string representation of this URL */ - public function __toString() + public function toString() { return unparse_url($this->parts); } @@ -149,7 +175,7 @@ class Url { $this->cleanupQuery(); $this->cleanupFragment(); - return $this->__toString(); + return $this->toString(); } /** diff --git a/index.php b/index.php index e39cff38..61d92f04 100755 --- a/index.php +++ b/index.php @@ -1454,12 +1454,11 @@ function renderPage() // -------- User want to post a new link: Display link edit form. if (isset($_GET['post'])) { - $url = new Url($_GET['post']); - $url->cleanup(); + $url = cleanup_url($_GET['post']); $link_is_new = false; // Check if URL is not already in database (in this case, we will edit the existing link) - $link = $LINKSDB->getLinkFromUrl((string)$url); + $link = $LINKSDB->getLinkFromUrl($url); if (!$link) { $link_is_new = true; @@ -1471,7 +1470,7 @@ function renderPage() $tags = (empty($_GET['tags']) ? '' : $_GET['tags'] ); $private = (!empty($_GET['private']) && $_GET['private'] === "1" ? 1 : 0); // If this is an HTTP(S) link, we try go get the page to extract the title (otherwise we will to straight to the edit form.) - if (empty($title) && strpos($url->getScheme(), 'http') !== false) { + if (empty($title) && strpos(get_url_scheme($url), 'http') !== false) { // Short timeout to keep the application responsive list($headers, $data) = get_http_url($url, 4); // FIXME: Decode charset according to specified in either 1) HTTP response headers or 2) in html @@ -1505,7 +1504,7 @@ function renderPage() $link = array( 'linkdate' => $linkdate, 'title' => $title, - 'url' => (string)$url, + 'url' => $url, 'description' => $description, 'tags' => $tags, 'private' => $private diff --git a/tests/Url/CleanupUrlTest.php b/tests/Url/CleanupUrlTest.php new file mode 100644 index 00000000..ba9a0437 --- /dev/null +++ b/tests/Url/CleanupUrlTest.php @@ -0,0 +1,76 @@ +assertEquals('', cleanup_url('')); + } + + /** + * Clean an already cleaned Url + */ + public function testCleanupUrlAlreadyClean() + { + $ref = 'http://domain.tld:3000'; + $this->assertEquals($ref, cleanup_url($ref)); + $ref = $ref.'/path/to/dir/'; + $this->assertEquals($ref, cleanup_url($ref)); + } + + /** + * Clean Url needing cleaning + */ + public function testCleanupUrlNeedClean() + { + $ref = 'http://domain.tld:3000'; + $this->assertEquals($ref, cleanup_url($ref.'#tk.rss_all')); + $this->assertEquals($ref, cleanup_url($ref.'#xtor=RSS-')); + $this->assertEquals($ref, cleanup_url($ref.'#xtor=RSS-U3ht0tkc4b')); + $this->assertEquals($ref, cleanup_url($ref.'?action_object_map=junk')); + $this->assertEquals($ref, cleanup_url($ref.'?action_ref_map=Cr4p!')); + $this->assertEquals($ref, cleanup_url($ref.'?action_type_map=g4R84g3')); + + $this->assertEquals($ref, cleanup_url($ref.'?fb_stuff=v41u3')); + $this->assertEquals($ref, cleanup_url($ref.'?fb=71m3w4573')); + + $this->assertEquals($ref, cleanup_url($ref.'?utm_campaign=zomg')); + $this->assertEquals($ref, cleanup_url($ref.'?utm_medium=numnum')); + $this->assertEquals($ref, cleanup_url($ref.'?utm_source=c0d3')); + $this->assertEquals($ref, cleanup_url($ref.'?utm_term=1n4l')); + + $this->assertEquals($ref, cleanup_url($ref.'?xtor=some-url')); + $this->assertEquals($ref, cleanup_url($ref.'?xtor=some-url&fb=som3th1ng')); + $this->assertEquals($ref, cleanup_url( + $ref.'?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' + )); + $this->assertEquals($ref, cleanup_url( + $ref.'?xtor=some-url&fb=som3th1ng#tk.rss_all' + )); + + // ditch annoying query params and fragment, keep useful params + $this->assertEquals( + $ref.'?my=stuff&is=kept', + cleanup_url( + $ref.'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' + ) + ); + + // ditch annoying query params, keep useful params and fragment + $this->assertEquals( + $ref.'?my=stuff&is=kept#again', + cleanup_url( + $ref.'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' + ) + ); + } +} + diff --git a/tests/Url/GetUrlSchemeTest.php b/tests/Url/GetUrlSchemeTest.php new file mode 100644 index 00000000..72d80b30 --- /dev/null +++ b/tests/Url/GetUrlSchemeTest.php @@ -0,0 +1,31 @@ +assertEquals('', get_url_scheme('')); + } + + /** + * Get normal scheme of Url + */ + public function testGetUrlScheme() + { + $this->assertEquals('http', get_url_scheme('http://domain.tld:3000')); + $this->assertEquals('https', get_url_scheme('https://domain.tld:3000')); + $this->assertEquals('http', get_url_scheme('domain.tld')); + $this->assertEquals('ssh', get_url_scheme('ssh://domain.tld')); + $this->assertEquals('ftp', get_url_scheme('ftp://domain.tld')); + $this->assertEquals('git', get_url_scheme('git://domain.tld/push?pull=clone#checkout')); + } +} + diff --git a/tests/Url/UnparseUrlTest.php b/tests/Url/UnparseUrlTest.php new file mode 100644 index 00000000..edde73e4 --- /dev/null +++ b/tests/Url/UnparseUrlTest.php @@ -0,0 +1,31 @@ +assertEquals('', unparse_url(array())); + } + + /** + * Rebuild a full-featured URL + */ + public function testUnparseFull() + { + $ref = 'http://username:password@hostname:9090/path' + .'?arg1=value1&arg2=value2#anchor'; + $this->assertEquals($ref, unparse_url(parse_url($ref))); + } +} + diff --git a/tests/Url/UrlTest.php b/tests/Url/UrlTest.php new file mode 100755 index 00000000..e498d79e --- /dev/null +++ b/tests/Url/UrlTest.php @@ -0,0 +1,148 @@ +cleanup(); + $this->assertEquals(self::$baseUrl, $url->toString()); + } + + /** + * Instantiate an empty URL + */ + public function testEmptyConstruct() + { + $url = new Url(''); + $this->assertEquals('', $url->toString()); + } + + /** + * Instantiate a URL + */ + public function testConstruct() + { + $ref = 'http://username:password@hostname:9090/path' + .'?arg1=value1&arg2=value2#anchor'; + $url = new Url($ref); + $this->assertEquals($ref, $url->toString()); + } + + /** + * URL cleanup - nothing to do + */ + public function testNoCleanup() + { + // URL with no query nor fragment + $this->assertUrlIsCleaned(); + + // URL with no annoying elements + $ref = self::$baseUrl.'?p1=val1&p2=1234#edit'; + $url = new Url($ref); + $this->assertEquals($ref, $url->cleanup()); + } + + /** + * URL cleanup - annoying fragment + */ + public function testCleanupFragment() + { + $this->assertUrlIsCleaned('', '#tk.rss_all'); + $this->assertUrlIsCleaned('', '#xtor=RSS-'); + $this->assertUrlIsCleaned('', '#xtor=RSS-U3ht0tkc4b'); + } + + /** + * URL cleanup - single annoying query parameter + */ + public function testCleanupSingleQueryParam() + { + $this->assertUrlIsCleaned('?action_object_map=junk'); + $this->assertUrlIsCleaned('?action_ref_map=Cr4p!'); + $this->assertUrlIsCleaned('?action_type_map=g4R84g3'); + + $this->assertUrlIsCleaned('?fb_stuff=v41u3'); + $this->assertUrlIsCleaned('?fb=71m3w4573'); + + $this->assertUrlIsCleaned('?utm_campaign=zomg'); + $this->assertUrlIsCleaned('?utm_medium=numnum'); + $this->assertUrlIsCleaned('?utm_source=c0d3'); + $this->assertUrlIsCleaned('?utm_term=1n4l'); + + $this->assertUrlIsCleaned('?xtor=some-url'); + } + + /** + * URL cleanup - multiple annoying query parameters + */ + public function testCleanupMultipleQueryParams() + { + $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng'); + $this->assertUrlIsCleaned( + '?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' + ); + } + + /** + * URL cleanup - multiple annoying query parameters, annoying fragment + */ + public function testCleanupMultipleQueryParamsAndFragment() + { + $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng', '#tk.rss_all'); + } + + /** + * Nominal case - the URL contains both useful and annoying parameters + */ + public function testCleanupMixedContent() + { + // ditch annoying query params and fragment, keep useful params + $url = new Url( + self::$baseUrl + .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' + ); + $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); + + + // ditch annoying query params, keep useful params and fragment + $url = new Url( + self::$baseUrl + .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' + ); + $this->assertEquals( + self::$baseUrl.'?my=stuff&is=kept#again', + $url->cleanup() + ); + } + + /** + * Test default http scheme. + */ + public function testDefaultScheme() { + $url = new Url(self::$baseUrl); + $this->assertEquals('http', $url->getScheme()); + $url = new Url('domain.tld'); + $this->assertEquals('http', $url->getScheme()); + $url = new Url('ssh://domain.tld'); + $this->assertEquals('ssh', $url->getScheme()); + $url = new Url('ftp://domain.tld'); + $this->assertEquals('ftp', $url->getScheme()); + $url = new Url('git://domain.tld/push?pull=clone#checkout'); + $this->assertEquals('git', $url->getScheme()); + } +} diff --git a/tests/UrlTest.php b/tests/UrlTest.php deleted file mode 100755 index c848e88e..00000000 --- a/tests/UrlTest.php +++ /dev/null @@ -1,170 +0,0 @@ -assertEquals('', unparse_url(array())); - } - - /** - * Rebuild a full-featured URL - */ - public function testUnparseFull() - { - $ref = 'http://username:password@hostname:9090/path' - .'?arg1=value1&arg2=value2#anchor'; - $this->assertEquals($ref, unparse_url(parse_url($ref))); - } -} - -/** - * Unitary tests for URL utilities - */ -class UrlTest extends PHPUnit_Framework_TestCase -{ - // base URL for tests - protected static $baseUrl = 'http://domain.tld:3000'; - - /** - * Helper method - */ - private function assertUrlIsCleaned($query='', $fragment='') - { - $url = new Url(self::$baseUrl.$query.$fragment); - $url->cleanup(); - $this->assertEquals(self::$baseUrl, $url->__toString()); - } - - /** - * Instantiate an empty URL - */ - public function testEmptyConstruct() - { - $this->assertEquals('', new Url('')); - } - - /** - * Instantiate a URL - */ - public function testConstruct() - { - $ref = 'http://username:password@hostname:9090/path' - .'?arg1=value1&arg2=value2#anchor'; - $this->assertEquals($ref, new Url($ref)); - } - - /** - * URL cleanup - nothing to do - */ - public function testNoCleanup() - { - // URL with no query nor fragment - $this->assertUrlIsCleaned(); - - // URL with no annoying elements - $ref = self::$baseUrl.'?p1=val1&p2=1234#edit'; - $url = new Url($ref); - $this->assertEquals($ref, $url->cleanup()); - } - - /** - * URL cleanup - annoying fragment - */ - public function testCleanupFragment() - { - $this->assertUrlIsCleaned('', '#tk.rss_all'); - $this->assertUrlIsCleaned('', '#xtor=RSS-'); - $this->assertUrlIsCleaned('', '#xtor=RSS-U3ht0tkc4b'); - } - - /** - * URL cleanup - single annoying query parameter - */ - public function testCleanupSingleQueryParam() - { - $this->assertUrlIsCleaned('?action_object_map=junk'); - $this->assertUrlIsCleaned('?action_ref_map=Cr4p!'); - $this->assertUrlIsCleaned('?action_type_map=g4R84g3'); - - $this->assertUrlIsCleaned('?fb_stuff=v41u3'); - $this->assertUrlIsCleaned('?fb=71m3w4573'); - - $this->assertUrlIsCleaned('?utm_campaign=zomg'); - $this->assertUrlIsCleaned('?utm_medium=numnum'); - $this->assertUrlIsCleaned('?utm_source=c0d3'); - $this->assertUrlIsCleaned('?utm_term=1n4l'); - - $this->assertUrlIsCleaned('?xtor=some-url'); - } - - /** - * URL cleanup - multiple annoying query parameters - */ - public function testCleanupMultipleQueryParams() - { - $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng'); - $this->assertUrlIsCleaned( - '?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3' - ); - } - - /** - * URL cleanup - multiple annoying query parameters, annoying fragment - */ - public function testCleanupMultipleQueryParamsAndFragment() - { - $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng', '#tk.rss_all'); - } - - /** - * Nominal case - the URL contains both useful and annoying parameters - */ - public function testCleanupMixedContent() - { - // ditch annoying query params and fragment, keep useful params - $url = new Url( - self::$baseUrl - .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all' - ); - $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup()); - - - // ditch annoying query params, keep useful params and fragment - $url = new Url( - self::$baseUrl - .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again' - ); - $this->assertEquals( - self::$baseUrl.'?my=stuff&is=kept#again', - $url->cleanup() - ); - } - - /** - * Test default http scheme. - */ - public function testDefaultScheme() { - $url = new Url(self::$baseUrl); - $this->assertEquals('http', $url->getScheme()); - $url = new Url('domain.tld'); - $this->assertEquals('http', $url->getScheme()); - $url = new Url('ssh://domain.tld'); - $this->assertEquals('ssh', $url->getScheme()); - $url = new Url('ftp://domain.tld'); - $this->assertEquals('ftp', $url->getScheme()); - $url = new Url('git://domain.tld/push?pull=clone#checkout'); - $this->assertEquals('git', $url->getScheme()); - } -} -- cgit v1.2.3