aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorVirtualTam <virtualtam@flibidi.net>2015-08-16 23:01:54 +0200
committerVirtualTam <virtualtam@flibidi.net>2015-08-16 23:01:54 +0200
commitf8bf8d8e595a9128ebdf00091a648d186ba9a628 (patch)
tree8a53db5956e81bb8856333caa711ae202b84937e
parentc622d32820685566c7c0228ae9cdc6c26f10fa29 (diff)
parentd9d776af19fd0a191f82525991dafbb56e1bcfcb (diff)
downloadShaarli-f8bf8d8e595a9128ebdf00091a648d186ba9a628.tar.gz
Shaarli-f8bf8d8e595a9128ebdf00091a648d186ba9a628.tar.zst
Shaarli-f8bf8d8e595a9128ebdf00091a648d186ba9a628.zip
Merge pull request #314 from shaarli/clean-utm_term
clean utm_term url parameter
-rw-r--r--.gitignore1
-rw-r--r--application/Url.php150
-rwxr-xr-xindex.php27
-rw-r--r--tests/UrlTest.php154
4 files changed, 309 insertions, 23 deletions
diff --git a/.gitignore b/.gitignore
index 6fd0ccd8..3ffedb31 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,4 +19,5 @@ composer.lock
19# Ignore test data & output 19# Ignore test data & output
20coverage 20coverage
21tests/datastore.php 21tests/datastore.php
22tests/dummycache/
22phpmd.html 23phpmd.html
diff --git a/application/Url.php b/application/Url.php
new file mode 100644
index 00000000..23356f39
--- /dev/null
+++ b/application/Url.php
@@ -0,0 +1,150 @@
1<?php
2/**
3 * Converts an array-represented URL to a string
4 *
5 * Source: http://php.net/manual/en/function.parse-url.php#106731
6 *
7 * @see http://php.net/manual/en/function.parse-url.php
8 *
9 * @param array $parsedUrl an array-represented URL
10 *
11 * @return string the string representation of the URL
12 */
13function unparse_url($parsedUrl)
14{
15 $scheme = isset($parsedUrl['scheme']) ? $parsedUrl['scheme'].'://' : '';
16 $host = isset($parsedUrl['host']) ? $parsedUrl['host'] : '';
17 $port = isset($parsedUrl['port']) ? ':'.$parsedUrl['port'] : '';
18 $user = isset($parsedUrl['user']) ? $parsedUrl['user'] : '';
19 $pass = isset($parsedUrl['pass']) ? ':'.$parsedUrl['pass'] : '';
20 $pass = ($user || $pass) ? "$pass@" : '';
21 $path = isset($parsedUrl['path']) ? $parsedUrl['path'] : '';
22 $query = isset($parsedUrl['query']) ? '?'.$parsedUrl['query'] : '';
23 $fragment = isset($parsedUrl['fragment']) ? '#'.$parsedUrl['fragment'] : '';
24
25 return "$scheme$user$pass$host$port$path$query$fragment";
26}
27
28/**
29 * URL representation and cleanup utilities
30 *
31 * Form
32 * scheme://[username:password@]host[:port][/path][?query][#fragment]
33 *
34 * Examples
35 * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor
36 * https://host.name.tld
37 * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer
38 *
39 * @see http://www.faqs.org/rfcs/rfc3986.html
40 */
41class Url
42{
43 private static $annoyingQueryParams = array(
44 // Facebook
45 'action_object_map=',
46 'action_ref_map=',
47 'action_type_map=',
48 'fb_',
49 'fb=',
50
51 // Scoop.it
52 '__scoop',
53
54 // Google Analytics & FeedProxy
55 'utm_',
56
57 // ATInternet
58 'xtor='
59 );
60
61 private static $annoyingFragments = array(
62 // ATInternet
63 'xtor=RSS-',
64
65 // Misc.
66 'tk.rss_all'
67 );
68
69 /*
70 * URL parts represented as an array
71 *
72 * @see http://php.net/parse_url
73 */
74 protected $parts;
75
76 /**
77 * Parses a string containing a URL
78 *
79 * @param string $url a string containing a URL
80 */
81 public function __construct($url)
82 {
83 $this->parts = parse_url($url);
84 }
85
86 /**
87 * Returns a string representation of this URL
88 */
89 public function __toString()
90 {
91 return unparse_url($this->parts);
92 }
93
94 /**
95 * Removes undesired query parameters
96 */
97 protected function cleanupQuery()
98 {
99 if (! isset($this->parts['query'])) {
100 return;
101 }
102
103 $queryParams = explode('&', $this->parts['query']);
104
105 foreach (self::$annoyingQueryParams as $annoying) {
106 foreach ($queryParams as $param) {
107 if (startsWith($param, $annoying)) {
108 $queryParams = array_diff($queryParams, array($param));
109 continue;
110 }
111 }
112 }
113
114 if (count($queryParams) == 0) {
115 unset($this->parts['query']);
116 return;
117 }
118
119 $this->parts['query'] = implode('&', $queryParams);
120 }
121
122 /**
123 * Removes undesired fragments
124 */
125 protected function cleanupFragment()
126 {
127 if (! isset($this->parts['fragment'])) {
128 return;
129 }
130
131 foreach (self::$annoyingFragments as $annoying) {
132 if (startsWith($this->parts['fragment'], $annoying)) {
133 unset($this->parts['fragment']);
134 break;
135 }
136 }
137 }
138
139 /**
140 * Removes undesired query parameters and fragments
141 *
142 * @return string the string representation of this URL after cleanup
143 */
144 public function cleanup()
145 {
146 $this->cleanupQuery();
147 $this->cleanupFragment();
148 return $this->__toString();
149 }
150}
diff --git a/index.php b/index.php
index 84b8f015..74f95497 100755
--- a/index.php
+++ b/index.php
@@ -74,6 +74,7 @@ require_once 'application/Cache.php';
74require_once 'application/CachedPage.php'; 74require_once 'application/CachedPage.php';
75require_once 'application/LinkDB.php'; 75require_once 'application/LinkDB.php';
76require_once 'application/TimeZone.php'; 76require_once 'application/TimeZone.php';
77require_once 'application/Url.php';
77require_once 'application/Utils.php'; 78require_once 'application/Utils.php';
78require_once 'application/Config.php'; 79require_once 'application/Config.php';
79 80
@@ -1479,29 +1480,9 @@ function renderPage()
1479 } 1480 }
1480 1481
1481 // -------- User want to post a new link: Display link edit form. 1482 // -------- User want to post a new link: Display link edit form.
1482 if (isset($_GET['post'])) 1483 if (isset($_GET['post'])) {
1483 { 1484 $url = new Url($_GET['post']);
1484 $url=$_GET['post']; 1485 $url->cleanup();
1485
1486 // We remove the annoying parameters added by FeedBurner, GoogleFeedProxy, Facebook...
1487 $annoyingpatterns = array('/[\?&]utm_source=[^&]*/',
1488 '/[\?&]utm_campaign=[^&]*/',
1489 '/[\?&]utm_medium=[^&]*/',
1490 '/#xtor=RSS-[^&]*/',
1491 '/[\?&]fb_[^&]*/',
1492 '/[\?&]__scoop[^&]*/',
1493 '/#tk\.rss_all\?/',
1494 '/[\?&]action_ref_map=[^&]*/',
1495 '/[\?&]action_type_map=[^&]*/',
1496 '/[\?&]action_object_map=[^&]*/',
1497 '/[\?&]utm_content=[^&]*/',
1498 '/[\?&]fb=[^&]*/',
1499 '/[\?&]xtor=[^&]*/'
1500 );
1501 foreach($annoyingpatterns as $pattern)
1502 {
1503 $url = preg_replace($pattern, "", $url);
1504 }
1505 1486
1506 $link_is_new = false; 1487 $link_is_new = false;
1507 $link = $LINKSDB->getLinkFromUrl($url); // Check if URL is not already in database (in this case, we will edit the existing link) 1488 $link = $LINKSDB->getLinkFromUrl($url); // Check if URL is not already in database (in this case, we will edit the existing link)
diff --git a/tests/UrlTest.php b/tests/UrlTest.php
new file mode 100644
index 00000000..a39630f1
--- /dev/null
+++ b/tests/UrlTest.php
@@ -0,0 +1,154 @@
1<?php
2/**
3 * Url's tests
4 */
5
6require_once 'application/Url.php';
7
8/**
9 * Unitary tests for unparse_url()
10 */
11class UnparseUrlTest extends PHPUnit_Framework_TestCase
12{
13 /**
14 * Thanks for building nothing
15 */
16 public function testUnparseEmptyArray()
17 {
18 $this->assertEquals('', unparse_url(array()));
19 }
20
21 /**
22 * Rebuild a full-featured URL
23 */
24 public function testUnparseFull()
25 {
26 $ref = 'http://username:password@hostname:9090/path'
27 .'?arg1=value1&arg2=value2#anchor';
28 $this->assertEquals($ref, unparse_url(parse_url($ref)));
29 }
30}
31
32/**
33 * Unitary tests for URL utilities
34 */
35class UrlTest extends PHPUnit_Framework_TestCase
36{
37 // base URL for tests
38 protected static $baseUrl = 'http://domain.tld:3000';
39
40 /**
41 * Helper method
42 */
43 private function assertUrlIsCleaned($query='', $fragment='')
44 {
45 $url = new Url(self::$baseUrl.$query.$fragment);
46 $url->cleanup();
47 $this->assertEquals(self::$baseUrl, $url->__toString());
48 }
49
50 /**
51 * Instantiate an empty URL
52 */
53 public function testEmptyConstruct()
54 {
55 $this->assertEquals('', new Url(''));
56 }
57
58 /**
59 * Instantiate a URL
60 */
61 public function testConstruct()
62 {
63 $ref = 'http://username:password@hostname:9090/path'
64 .'?arg1=value1&arg2=value2#anchor';
65 $this->assertEquals($ref, new Url($ref));
66 }
67
68 /**
69 * URL cleanup - nothing to do
70 */
71 public function testNoCleanup()
72 {
73 // URL with no query nor fragment
74 $this->assertUrlIsCleaned();
75
76 // URL with no annoying elements
77 $ref = self::$baseUrl.'?p1=val1&p2=1234#edit';
78 $url = new Url($ref);
79 $this->assertEquals($ref, $url->cleanup());
80 }
81
82 /**
83 * URL cleanup - annoying fragment
84 */
85 public function testCleanupFragment()
86 {
87 $this->assertUrlIsCleaned('', '#tk.rss_all');
88 $this->assertUrlIsCleaned('', '#xtor=RSS-');
89 $this->assertUrlIsCleaned('', '#xtor=RSS-U3ht0tkc4b');
90 }
91
92 /**
93 * URL cleanup - single annoying query parameter
94 */
95 public function testCleanupSingleQueryParam()
96 {
97 $this->assertUrlIsCleaned('?action_object_map=junk');
98 $this->assertUrlIsCleaned('?action_ref_map=Cr4p!');
99 $this->assertUrlIsCleaned('?action_type_map=g4R84g3');
100
101 $this->assertUrlIsCleaned('?fb_stuff=v41u3');
102 $this->assertUrlIsCleaned('?fb=71m3w4573');
103
104 $this->assertUrlIsCleaned('?utm_campaign=zomg');
105 $this->assertUrlIsCleaned('?utm_medium=numnum');
106 $this->assertUrlIsCleaned('?utm_source=c0d3');
107 $this->assertUrlIsCleaned('?utm_term=1n4l');
108
109 $this->assertUrlIsCleaned('?xtor=some-url');
110 }
111
112 /**
113 * URL cleanup - multiple annoying query parameters
114 */
115 public function testCleanupMultipleQueryParams()
116 {
117 $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng');
118 $this->assertUrlIsCleaned(
119 '?fb=stuff&utm_campaign=zomg&utm_medium=numnum&utm_source=c0d3'
120 );
121 }
122
123 /**
124 * URL cleanup - multiple annoying query parameters, annoying fragment
125 */
126 public function testCleanupMultipleQueryParamsAndFragment()
127 {
128 $this->assertUrlIsCleaned('?xtor=some-url&fb=som3th1ng', '#tk.rss_all');
129 }
130
131 /**
132 * Nominal case - the URL contains both useful and annoying parameters
133 */
134 public function testCleanupMixedContent()
135 {
136 // ditch annoying query params and fragment, keep useful params
137 $url = new Url(
138 self::$baseUrl
139 .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#tk.rss_all'
140 );
141 $this->assertEquals(self::$baseUrl.'?my=stuff&is=kept', $url->cleanup());
142
143
144 // ditch annoying query params, keep useful params and fragment
145 $url = new Url(
146 self::$baseUrl
147 .'?fb=zomg&my=stuff&utm_medium=numnum&is=kept#again'
148 );
149 $this->assertEquals(
150 self::$baseUrl.'?my=stuff&is=kept#again',
151 $url->cleanup()
152 );
153 }
154}