]> git.immae.eu Git - github/shaarli/Shaarli.git/blame - application/Url.php
Links: refactor & improve URL cleanup
[github/shaarli/Shaarli.git] / application / Url.php
CommitLineData
d9d776af
V
1<?php
2/**
3 * Converts an array-represented URL to a string
4 *
5 * Source: http://php.net/manual/en/function.parse-url.php#106731
6 *
7 * @see http://php.net/manual/en/function.parse-url.php
8 *
9 * @param array $parsedUrl an array-represented URL
10 *
11 * @return string the string representation of the URL
12 */
13function unparse_url($parsedUrl)
14{
15 $scheme = isset($parsedUrl['scheme']) ? $parsedUrl['scheme'].'://' : '';
16 $host = isset($parsedUrl['host']) ? $parsedUrl['host'] : '';
17 $port = isset($parsedUrl['port']) ? ':'.$parsedUrl['port'] : '';
18 $user = isset($parsedUrl['user']) ? $parsedUrl['user'] : '';
19 $pass = isset($parsedUrl['pass']) ? ':'.$parsedUrl['pass'] : '';
20 $pass = ($user || $pass) ? "$pass@" : '';
21 $path = isset($parsedUrl['path']) ? $parsedUrl['path'] : '';
22 $query = isset($parsedUrl['query']) ? '?'.$parsedUrl['query'] : '';
23 $fragment = isset($parsedUrl['fragment']) ? '#'.$parsedUrl['fragment'] : '';
24
25 return "$scheme$user$pass$host$port$path$query$fragment";
26}
27
28/**
29 * URL representation and cleanup utilities
30 *
31 * Form
32 * scheme://[username:password@]host[:port][/path][?query][#fragment]
33 *
34 * Examples
35 * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor
36 * https://host.name.tld
37 * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer
38 *
39 * @see http://www.faqs.org/rfcs/rfc3986.html
40 */
41class Url
42{
43 private static $annoyingQueryParams = array(
44 // Facebook
45 'action_object_map=',
46 'action_ref_map=',
47 'action_type_map=',
48 'fb_',
49 'fb=',
50
51 // Scoop.it
52 '__scoop',
53
54 // Google Analytics & FeedProxy
55 'utm_',
56
57 // ATInternet
58 'xtor='
59 );
60
61 private static $annoyingFragments = array(
62 // ATInternet
63 'xtor=RSS-',
64
65 // Misc.
66 'tk.rss_all'
67 );
68
69 /*
70 * URL parts represented as an array
71 *
72 * @see http://php.net/parse_url
73 */
74 protected $parts;
75
76 /**
77 * Parses a string containing a URL
78 *
79 * @param string $url a string containing a URL
80 */
81 public function __construct($url)
82 {
83 $this->parts = parse_url($url);
84 }
85
86 /**
87 * Returns a string representation of this URL
88 */
89 public function __toString()
90 {
91 return unparse_url($this->parts);
92 }
93
94 /**
95 * Removes undesired query parameters
96 */
97 protected function cleanupQuery()
98 {
99 if (! isset($this->parts['query'])) {
100 return;
101 }
102
103 $queryParams = explode('&', $this->parts['query']);
104
105 foreach (self::$annoyingQueryParams as $annoying) {
106 foreach ($queryParams as $param) {
107 if (startsWith($param, $annoying)) {
108 $queryParams = array_diff($queryParams, array($param));
109 continue;
110 }
111 }
112 }
113
114 if (count($queryParams) == 0) {
115 unset($this->parts['query']);
116 return;
117 }
118
119 $this->parts['query'] = implode('&', $queryParams);
120 }
121
122 /**
123 * Removes undesired fragments
124 */
125 protected function cleanupFragment()
126 {
127 if (! isset($this->parts['fragment'])) {
128 return;
129 }
130
131 foreach (self::$annoyingFragments as $annoying) {
132 if (startsWith($this->parts['fragment'], $annoying)) {
133 unset($this->parts['fragment']);
134 break;
135 }
136 }
137 }
138
139 /**
140 * Removes undesired query parameters and fragments
141 *
142 * @return string the string representation of this URL after cleanup
143 */
144 public function cleanup()
145 {
146 $this->cleanupQuery();
147 $this->cleanupFragment();
148 return $this->__toString();
149 }
150}