aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/http/Url.php
diff options
context:
space:
mode:
Diffstat (limited to 'application/http/Url.php')
-rw-r--r--application/http/Url.php217
1 files changed, 217 insertions, 0 deletions
diff --git a/application/http/Url.php b/application/http/Url.php
new file mode 100644
index 00000000..260231c6
--- /dev/null
+++ b/application/http/Url.php
@@ -0,0 +1,217 @@
1<?php
2
3namespace Shaarli\Http;
4
5/**
6 * URL representation and cleanup utilities
7 *
8 * Form
9 * scheme://[username:password@]host[:port][/path][?query][#fragment]
10 *
11 * Examples
12 * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor
13 * https://host.name.tld
14 * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer
15 *
16 * @see http://www.faqs.org/rfcs/rfc3986.html
17 */
18class Url
19{
20 private static $annoyingQueryParams = array(
21 // Facebook
22 'action_object_map=',
23 'action_ref_map=',
24 'action_type_map=',
25 'fb_',
26 'fb=',
27 'PHPSESSID=',
28
29 // Scoop.it
30 '__scoop',
31
32 // Google Analytics & FeedProxy
33 'utm_',
34
35 // ATInternet
36 'xtor=',
37
38 // Other
39 'campaign_'
40 );
41
42 private static $annoyingFragments = array(
43 // ATInternet
44 'xtor=RSS-',
45
46 // Misc.
47 'tk.rss_all'
48 );
49
50 /*
51 * URL parts represented as an array
52 *
53 * @see http://php.net/parse_url
54 */
55 protected $parts;
56
57 /**
58 * Parses a string containing a URL
59 *
60 * @param string $url a string containing a URL
61 */
62 public function __construct($url)
63 {
64 $url = self::cleanupUnparsedUrl(trim($url));
65 $this->parts = parse_url($url);
66
67 if (!empty($url) && empty($this->parts['scheme'])) {
68 $this->parts['scheme'] = 'http';
69 }
70 }
71
72 /**
73 * Clean up URL before it's parsed.
74 * ie. handle urlencode, url prefixes, etc.
75 *
76 * @param string $url URL to clean.
77 *
78 * @return string cleaned URL.
79 */
80 protected static function cleanupUnparsedUrl($url)
81 {
82 return self::removeFirefoxAboutReader($url);
83 }
84
85 /**
86 * Remove Firefox Reader prefix if it's present.
87 *
88 * @param string $input url
89 *
90 * @return string cleaned url
91 */
92 protected static function removeFirefoxAboutReader($input)
93 {
94 $firefoxPrefix = 'about://reader?url=';
95 if (startsWith($input, $firefoxPrefix)) {
96 return urldecode(ltrim($input, $firefoxPrefix));
97 }
98 return $input;
99 }
100
101 /**
102 * Returns a string representation of this URL
103 */
104 public function toString()
105 {
106 return unparse_url($this->parts);
107 }
108
109 /**
110 * Removes undesired query parameters
111 */
112 protected function cleanupQuery()
113 {
114 if (!isset($this->parts['query'])) {
115 return;
116 }
117
118 $queryParams = explode('&', $this->parts['query']);
119
120 foreach (self::$annoyingQueryParams as $annoying) {
121 foreach ($queryParams as $param) {
122 if (startsWith($param, $annoying)) {
123 $queryParams = array_diff($queryParams, array($param));
124 continue;
125 }
126 }
127 }
128
129 if (count($queryParams) == 0) {
130 unset($this->parts['query']);
131 return;
132 }
133
134 $this->parts['query'] = implode('&', $queryParams);
135 }
136
137 /**
138 * Removes undesired fragments
139 */
140 protected function cleanupFragment()
141 {
142 if (!isset($this->parts['fragment'])) {
143 return;
144 }
145
146 foreach (self::$annoyingFragments as $annoying) {
147 if (startsWith($this->parts['fragment'], $annoying)) {
148 unset($this->parts['fragment']);
149 break;
150 }
151 }
152 }
153
154 /**
155 * Removes undesired query parameters and fragments
156 *
157 * @return string the string representation of this URL after cleanup
158 */
159 public function cleanup()
160 {
161 $this->cleanupQuery();
162 $this->cleanupFragment();
163 return $this->toString();
164 }
165
166 /**
167 * Converts an URL with an International Domain Name host to a ASCII one.
168 * This requires PHP-intl. If it's not available, just returns this->cleanup().
169 *
170 * @return string converted cleaned up URL.
171 */
172 public function idnToAscii()
173 {
174 $out = $this->cleanup();
175 if (!function_exists('idn_to_ascii') || !isset($this->parts['host'])) {
176 return $out;
177 }
178 $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46);
179 return str_replace($this->parts['host'], $asciiHost, $out);
180 }
181
182 /**
183 * Get URL scheme.
184 *
185 * @return string the URL scheme or false if none is provided.
186 */
187 public function getScheme()
188 {
189 if (!isset($this->parts['scheme'])) {
190 return false;
191 }
192 return $this->parts['scheme'];
193 }
194
195 /**
196 * Get URL host.
197 *
198 * @return string the URL host or false if none is provided.
199 */
200 public function getHost()
201 {
202 if (empty($this->parts['host'])) {
203 return false;
204 }
205 return $this->parts['host'];
206 }
207
208 /**
209 * Test if the Url is an HTTP one.
210 *
211 * @return true is HTTP, false otherwise.
212 */
213 public function isHttp()
214 {
215 return strpos(strtolower($this->parts['scheme']), 'http') !== false;
216 }
217}