diff options
Diffstat (limited to 'application/Url.php')
-rw-r--r-- | application/Url.php | 218 |
1 files changed, 2 insertions, 216 deletions
diff --git a/application/Url.php b/application/Url.php index 3b7f19c2..81f72fb0 100644 --- a/application/Url.php +++ b/application/Url.php | |||
@@ -34,7 +34,7 @@ function unparse_url($parsedUrl) | |||
34 | */ | 34 | */ |
35 | function cleanup_url($url) | 35 | function cleanup_url($url) |
36 | { | 36 | { |
37 | $obj_url = new Url($url); | 37 | $obj_url = new \Shaarli\Http\Url($url); |
38 | return $obj_url->cleanup(); | 38 | return $obj_url->cleanup(); |
39 | } | 39 | } |
40 | 40 | ||
@@ -47,7 +47,7 @@ function cleanup_url($url) | |||
47 | */ | 47 | */ |
48 | function get_url_scheme($url) | 48 | function get_url_scheme($url) |
49 | { | 49 | { |
50 | $obj_url = new Url($url); | 50 | $obj_url = new \Shaarli\Http\Url($url); |
51 | return $obj_url->getScheme(); | 51 | return $obj_url->getScheme(); |
52 | } | 52 | } |
53 | 53 | ||
@@ -86,217 +86,3 @@ function whitelist_protocols($url, $protocols) | |||
86 | } | 86 | } |
87 | return $url; | 87 | return $url; |
88 | } | 88 | } |
89 | |||
90 | /** | ||
91 | * URL representation and cleanup utilities | ||
92 | * | ||
93 | * Form | ||
94 | * scheme://[username:password@]host[:port][/path][?query][#fragment] | ||
95 | * | ||
96 | * Examples | ||
97 | * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor | ||
98 | * https://host.name.tld | ||
99 | * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer | ||
100 | * | ||
101 | * @see http://www.faqs.org/rfcs/rfc3986.html | ||
102 | */ | ||
103 | class Url | ||
104 | { | ||
105 | private static $annoyingQueryParams = array( | ||
106 | |||
107 | 'action_object_map=', | ||
108 | 'action_ref_map=', | ||
109 | 'action_type_map=', | ||
110 | 'fb_', | ||
111 | 'fb=', | ||
112 | 'PHPSESSID=', | ||
113 | |||
114 | // Scoop.it | ||
115 | '__scoop', | ||
116 | |||
117 | // Google Analytics & FeedProxy | ||
118 | 'utm_', | ||
119 | |||
120 | // ATInternet | ||
121 | 'xtor=', | ||
122 | |||
123 | // Other | ||
124 | 'campaign_' | ||
125 | ); | ||
126 | |||
127 | private static $annoyingFragments = array( | ||
128 | // ATInternet | ||
129 | 'xtor=RSS-', | ||
130 | |||
131 | // Misc. | ||
132 | 'tk.rss_all' | ||
133 | ); | ||
134 | |||
135 | /* | ||
136 | * URL parts represented as an array | ||
137 | * | ||
138 | * @see http://php.net/parse_url | ||
139 | */ | ||
140 | protected $parts; | ||
141 | |||
142 | /** | ||
143 | * Parses a string containing a URL | ||
144 | * | ||
145 | * @param string $url a string containing a URL | ||
146 | */ | ||
147 | public function __construct($url) | ||
148 | { | ||
149 | $url = self::cleanupUnparsedUrl(trim($url)); | ||
150 | $this->parts = parse_url($url); | ||
151 | |||
152 | if (!empty($url) && empty($this->parts['scheme'])) { | ||
153 | $this->parts['scheme'] = 'http'; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * Clean up URL before it's parsed. | ||
159 | * ie. handle urlencode, url prefixes, etc. | ||
160 | * | ||
161 | * @param string $url URL to clean. | ||
162 | * | ||
163 | * @return string cleaned URL. | ||
164 | */ | ||
165 | protected static function cleanupUnparsedUrl($url) | ||
166 | { | ||
167 | return self::removeFirefoxAboutReader($url); | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * Remove Firefox Reader prefix if it's present. | ||
172 | * | ||
173 | * @param string $input url | ||
174 | * | ||
175 | * @return string cleaned url | ||
176 | */ | ||
177 | protected static function removeFirefoxAboutReader($input) | ||
178 | { | ||
179 | $firefoxPrefix = 'about://reader?url='; | ||
180 | if (startsWith($input, $firefoxPrefix)) { | ||
181 | return urldecode(ltrim($input, $firefoxPrefix)); | ||
182 | } | ||
183 | return $input; | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * Returns a string representation of this URL | ||
188 | */ | ||
189 | public function toString() | ||
190 | { | ||
191 | return unparse_url($this->parts); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * Removes undesired query parameters | ||
196 | */ | ||
197 | protected function cleanupQuery() | ||
198 | { | ||
199 | if (! isset($this->parts['query'])) { | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | $queryParams = explode('&', $this->parts['query']); | ||
204 | |||
205 | foreach (self::$annoyingQueryParams as $annoying) { | ||
206 | foreach ($queryParams as $param) { | ||
207 | if (startsWith($param, $annoying)) { | ||
208 | $queryParams = array_diff($queryParams, array($param)); | ||
209 | continue; | ||
210 | } | ||
211 | } | ||
212 | } | ||
213 | |||
214 | if (count($queryParams) == 0) { | ||
215 | unset($this->parts['query']); | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | $this->parts['query'] = implode('&', $queryParams); | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * Removes undesired fragments | ||
224 | */ | ||
225 | protected function cleanupFragment() | ||
226 | { | ||
227 | if (! isset($this->parts['fragment'])) { | ||
228 | return; | ||
229 | } | ||
230 | |||
231 | foreach (self::$annoyingFragments as $annoying) { | ||
232 | if (startsWith($this->parts['fragment'], $annoying)) { | ||
233 | unset($this->parts['fragment']); | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * Removes undesired query parameters and fragments | ||
241 | * | ||
242 | * @return string the string representation of this URL after cleanup | ||
243 | */ | ||
244 | public function cleanup() | ||
245 | { | ||
246 | $this->cleanupQuery(); | ||
247 | $this->cleanupFragment(); | ||
248 | return $this->toString(); | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * Converts an URL with an International Domain Name host to a ASCII one. | ||
253 | * This requires PHP-intl. If it's not available, just returns this->cleanup(). | ||
254 | * | ||
255 | * @return string converted cleaned up URL. | ||
256 | */ | ||
257 | public function idnToAscii() | ||
258 | { | ||
259 | $out = $this->cleanup(); | ||
260 | if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { | ||
261 | return $out; | ||
262 | } | ||
263 | $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); | ||
264 | return str_replace($this->parts['host'], $asciiHost, $out); | ||
265 | } | ||
266 | |||
267 | /** | ||
268 | * Get URL scheme. | ||
269 | * | ||
270 | * @return string the URL scheme or false if none is provided. | ||
271 | */ | ||
272 | public function getScheme() | ||
273 | { | ||
274 | if (!isset($this->parts['scheme'])) { | ||
275 | return false; | ||
276 | } | ||
277 | return $this->parts['scheme']; | ||
278 | } | ||
279 | |||
280 | /** | ||
281 | * Get URL host. | ||
282 | * | ||
283 | * @return string the URL host or false if none is provided. | ||
284 | */ | ||
285 | public function getHost() | ||
286 | { | ||
287 | if (empty($this->parts['host'])) { | ||
288 | return false; | ||
289 | } | ||
290 | return $this->parts['host']; | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * Test if the Url is an HTTP one. | ||
295 | * | ||
296 | * @return true is HTTP, false otherwise. | ||
297 | */ | ||
298 | public function isHttp() | ||
299 | { | ||
300 | return strpos(strtolower($this->parts['scheme']), 'http') !== false; | ||
301 | } | ||
302 | } | ||