diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:07:13 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:07:13 +0200 |
commit | d9f6275ebca035fec8331652c677981056793ccc (patch) | |
tree | 37a64baf4f0eba6b781040605965383d8aded2cc /application/Url.php | |
parent | 38672ba0d1c722e5d6d33a58255ceb55e9410e46 (diff) | |
parent | d63ff87a009313141ae684ec447b902562ff6ee7 (diff) | |
download | Shaarli-stable.tar.gz Shaarli-stable.tar.zst Shaarli-stable.zip |
Merge branch 'v0.11' into stablestable
Diffstat (limited to 'application/Url.php')
-rw-r--r-- | application/Url.php | 302 |
1 files changed, 0 insertions, 302 deletions
diff --git a/application/Url.php b/application/Url.php deleted file mode 100644 index 3b7f19c2..00000000 --- a/application/Url.php +++ /dev/null | |||
@@ -1,302 +0,0 @@ | |||
1 | <?php | ||
2 | /** | ||
3 | * Converts an array-represented URL to a string | ||
4 | * | ||
5 | * Source: http://php.net/manual/en/function.parse-url.php#106731 | ||
6 | * | ||
7 | * @see http://php.net/manual/en/function.parse-url.php | ||
8 | * | ||
9 | * @param array $parsedUrl an array-represented URL | ||
10 | * | ||
11 | * @return string the string representation of the URL | ||
12 | */ | ||
13 | function unparse_url($parsedUrl) | ||
14 | { | ||
15 | $scheme = isset($parsedUrl['scheme']) ? $parsedUrl['scheme'].'://' : ''; | ||
16 | $host = isset($parsedUrl['host']) ? $parsedUrl['host'] : ''; | ||
17 | $port = isset($parsedUrl['port']) ? ':'.$parsedUrl['port'] : ''; | ||
18 | $user = isset($parsedUrl['user']) ? $parsedUrl['user'] : ''; | ||
19 | $pass = isset($parsedUrl['pass']) ? ':'.$parsedUrl['pass'] : ''; | ||
20 | $pass = ($user || $pass) ? "$pass@" : ''; | ||
21 | $path = isset($parsedUrl['path']) ? $parsedUrl['path'] : ''; | ||
22 | $query = isset($parsedUrl['query']) ? '?'.$parsedUrl['query'] : ''; | ||
23 | $fragment = isset($parsedUrl['fragment']) ? '#'.$parsedUrl['fragment'] : ''; | ||
24 | |||
25 | return "$scheme$user$pass$host$port$path$query$fragment"; | ||
26 | } | ||
27 | |||
28 | /** | ||
29 | * Removes undesired query parameters and fragments | ||
30 | * | ||
31 | * @param string url Url to be cleaned | ||
32 | * | ||
33 | * @return string the string representation of this URL after cleanup | ||
34 | */ | ||
35 | function cleanup_url($url) | ||
36 | { | ||
37 | $obj_url = new Url($url); | ||
38 | return $obj_url->cleanup(); | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * Get URL scheme. | ||
43 | * | ||
44 | * @param string url Url for which the scheme is requested | ||
45 | * | ||
46 | * @return mixed the URL scheme or false if none is provided. | ||
47 | */ | ||
48 | function get_url_scheme($url) | ||
49 | { | ||
50 | $obj_url = new Url($url); | ||
51 | return $obj_url->getScheme(); | ||
52 | } | ||
53 | |||
54 | /** | ||
55 | * Adds a trailing slash at the end of URL if necessary. | ||
56 | * | ||
57 | * @param string $url URL to check/edit. | ||
58 | * | ||
59 | * @return string $url URL with a end trailing slash. | ||
60 | */ | ||
61 | function add_trailing_slash($url) | ||
62 | { | ||
63 | return $url . (!endsWith($url, '/') ? '/' : ''); | ||
64 | } | ||
65 | |||
66 | /** | ||
67 | * Replace not whitelisted protocols by 'http://' from given URL. | ||
68 | * | ||
69 | * @param string $url URL to clean | ||
70 | * @param array $protocols List of allowed protocols (aside from http(s)). | ||
71 | * | ||
72 | * @return string URL with allowed protocol | ||
73 | */ | ||
74 | function whitelist_protocols($url, $protocols) | ||
75 | { | ||
76 | if (startsWith($url, '?') || startsWith($url, '/')) { | ||
77 | return $url; | ||
78 | } | ||
79 | $protocols = array_merge(['http', 'https'], $protocols); | ||
80 | $protocol = preg_match('#^(\w+):/?/?#', $url, $match); | ||
81 | // Protocol not allowed: we remove it and replace it with http | ||
82 | if ($protocol === 1 && ! in_array($match[1], $protocols)) { | ||
83 | $url = str_replace($match[0], 'http://', $url); | ||
84 | } elseif ($protocol !== 1) { | ||
85 | $url = 'http://' . $url; | ||
86 | } | ||
87 | return $url; | ||
88 | } | ||
89 | |||
90 | /** | ||
91 | * URL representation and cleanup utilities | ||
92 | * | ||
93 | * Form | ||
94 | * scheme://[username:password@]host[:port][/path][?query][#fragment] | ||
95 | * | ||
96 | * Examples | ||
97 | * http://username:password@hostname:9090/path?arg1=value1&arg2=value2#anchor | ||
98 | * https://host.name.tld | ||
99 | * https://h2.g2/faq/?vendor=hitchhiker&item=guide&dest=galaxy#answer | ||
100 | * | ||
101 | * @see http://www.faqs.org/rfcs/rfc3986.html | ||
102 | */ | ||
103 | class Url | ||
104 | { | ||
105 | private static $annoyingQueryParams = array( | ||
106 | |||
107 | 'action_object_map=', | ||
108 | 'action_ref_map=', | ||
109 | 'action_type_map=', | ||
110 | 'fb_', | ||
111 | 'fb=', | ||
112 | 'PHPSESSID=', | ||
113 | |||
114 | // Scoop.it | ||
115 | '__scoop', | ||
116 | |||
117 | // Google Analytics & FeedProxy | ||
118 | 'utm_', | ||
119 | |||
120 | // ATInternet | ||
121 | 'xtor=', | ||
122 | |||
123 | // Other | ||
124 | 'campaign_' | ||
125 | ); | ||
126 | |||
127 | private static $annoyingFragments = array( | ||
128 | // ATInternet | ||
129 | 'xtor=RSS-', | ||
130 | |||
131 | // Misc. | ||
132 | 'tk.rss_all' | ||
133 | ); | ||
134 | |||
135 | /* | ||
136 | * URL parts represented as an array | ||
137 | * | ||
138 | * @see http://php.net/parse_url | ||
139 | */ | ||
140 | protected $parts; | ||
141 | |||
142 | /** | ||
143 | * Parses a string containing a URL | ||
144 | * | ||
145 | * @param string $url a string containing a URL | ||
146 | */ | ||
147 | public function __construct($url) | ||
148 | { | ||
149 | $url = self::cleanupUnparsedUrl(trim($url)); | ||
150 | $this->parts = parse_url($url); | ||
151 | |||
152 | if (!empty($url) && empty($this->parts['scheme'])) { | ||
153 | $this->parts['scheme'] = 'http'; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | /** | ||
158 | * Clean up URL before it's parsed. | ||
159 | * ie. handle urlencode, url prefixes, etc. | ||
160 | * | ||
161 | * @param string $url URL to clean. | ||
162 | * | ||
163 | * @return string cleaned URL. | ||
164 | */ | ||
165 | protected static function cleanupUnparsedUrl($url) | ||
166 | { | ||
167 | return self::removeFirefoxAboutReader($url); | ||
168 | } | ||
169 | |||
170 | /** | ||
171 | * Remove Firefox Reader prefix if it's present. | ||
172 | * | ||
173 | * @param string $input url | ||
174 | * | ||
175 | * @return string cleaned url | ||
176 | */ | ||
177 | protected static function removeFirefoxAboutReader($input) | ||
178 | { | ||
179 | $firefoxPrefix = 'about://reader?url='; | ||
180 | if (startsWith($input, $firefoxPrefix)) { | ||
181 | return urldecode(ltrim($input, $firefoxPrefix)); | ||
182 | } | ||
183 | return $input; | ||
184 | } | ||
185 | |||
186 | /** | ||
187 | * Returns a string representation of this URL | ||
188 | */ | ||
189 | public function toString() | ||
190 | { | ||
191 | return unparse_url($this->parts); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * Removes undesired query parameters | ||
196 | */ | ||
197 | protected function cleanupQuery() | ||
198 | { | ||
199 | if (! isset($this->parts['query'])) { | ||
200 | return; | ||
201 | } | ||
202 | |||
203 | $queryParams = explode('&', $this->parts['query']); | ||
204 | |||
205 | foreach (self::$annoyingQueryParams as $annoying) { | ||
206 | foreach ($queryParams as $param) { | ||
207 | if (startsWith($param, $annoying)) { | ||
208 | $queryParams = array_diff($queryParams, array($param)); | ||
209 | continue; | ||
210 | } | ||
211 | } | ||
212 | } | ||
213 | |||
214 | if (count($queryParams) == 0) { | ||
215 | unset($this->parts['query']); | ||
216 | return; | ||
217 | } | ||
218 | |||
219 | $this->parts['query'] = implode('&', $queryParams); | ||
220 | } | ||
221 | |||
222 | /** | ||
223 | * Removes undesired fragments | ||
224 | */ | ||
225 | protected function cleanupFragment() | ||
226 | { | ||
227 | if (! isset($this->parts['fragment'])) { | ||
228 | return; | ||
229 | } | ||
230 | |||
231 | foreach (self::$annoyingFragments as $annoying) { | ||
232 | if (startsWith($this->parts['fragment'], $annoying)) { | ||
233 | unset($this->parts['fragment']); | ||
234 | break; | ||
235 | } | ||
236 | } | ||
237 | } | ||
238 | |||
239 | /** | ||
240 | * Removes undesired query parameters and fragments | ||
241 | * | ||
242 | * @return string the string representation of this URL after cleanup | ||
243 | */ | ||
244 | public function cleanup() | ||
245 | { | ||
246 | $this->cleanupQuery(); | ||
247 | $this->cleanupFragment(); | ||
248 | return $this->toString(); | ||
249 | } | ||
250 | |||
251 | /** | ||
252 | * Converts an URL with an International Domain Name host to a ASCII one. | ||
253 | * This requires PHP-intl. If it's not available, just returns this->cleanup(). | ||
254 | * | ||
255 | * @return string converted cleaned up URL. | ||
256 | */ | ||
257 | public function idnToAscii() | ||
258 | { | ||
259 | $out = $this->cleanup(); | ||
260 | if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { | ||
261 | return $out; | ||
262 | } | ||
263 | $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); | ||
264 | return str_replace($this->parts['host'], $asciiHost, $out); | ||
265 | } | ||
266 | |||
267 | /** | ||
268 | * Get URL scheme. | ||
269 | * | ||
270 | * @return string the URL scheme or false if none is provided. | ||
271 | */ | ||
272 | public function getScheme() | ||
273 | { | ||
274 | if (!isset($this->parts['scheme'])) { | ||
275 | return false; | ||
276 | } | ||
277 | return $this->parts['scheme']; | ||
278 | } | ||
279 | |||
280 | /** | ||
281 | * Get URL host. | ||
282 | * | ||
283 | * @return string the URL host or false if none is provided. | ||
284 | */ | ||
285 | public function getHost() | ||
286 | { | ||
287 | if (empty($this->parts['host'])) { | ||
288 | return false; | ||
289 | } | ||
290 | return $this->parts['host']; | ||
291 | } | ||
292 | |||
293 | /** | ||
294 | * Test if the Url is an HTTP one. | ||
295 | * | ||
296 | * @return true is HTTP, false otherwise. | ||
297 | */ | ||
298 | public function isHttp() | ||
299 | { | ||
300 | return strpos(strtolower($this->parts['scheme']), 'http') !== false; | ||
301 | } | ||
302 | } | ||