diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:07:13 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:07:13 +0200 |
commit | d9f6275ebca035fec8331652c677981056793ccc (patch) | |
tree | 37a64baf4f0eba6b781040605965383d8aded2cc /application/http/HttpUtils.php | |
parent | 38672ba0d1c722e5d6d33a58255ceb55e9410e46 (diff) | |
parent | d63ff87a009313141ae684ec447b902562ff6ee7 (diff) | |
download | Shaarli-d9f6275ebca035fec8331652c677981056793ccc.tar.gz Shaarli-d9f6275ebca035fec8331652c677981056793ccc.tar.zst Shaarli-d9f6275ebca035fec8331652c677981056793ccc.zip |
Merge branch 'v0.11' into stablestable
Diffstat (limited to 'application/http/HttpUtils.php')
-rw-r--r-- | application/http/HttpUtils.php | 479 |
1 files changed, 479 insertions, 0 deletions
diff --git a/application/http/HttpUtils.php b/application/http/HttpUtils.php new file mode 100644 index 00000000..2ea9195d --- /dev/null +++ b/application/http/HttpUtils.php | |||
@@ -0,0 +1,479 @@ | |||
1 | <?php | ||
2 | |||
3 | use Shaarli\Http\Url; | ||
4 | |||
5 | /** | ||
6 | * GET an HTTP URL to retrieve its content | ||
7 | * Uses the cURL library or a fallback method | ||
8 | * | ||
9 | * @param string $url URL to get (http://...) | ||
10 | * @param int $timeout network timeout (in seconds) | ||
11 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) | ||
12 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). | ||
13 | * Can be used to add download conditions on the | ||
14 | * headers (response code, content type, etc.). | ||
15 | * | ||
16 | * @return array HTTP response headers, downloaded content | ||
17 | * | ||
18 | * Output format: | ||
19 | * [0] = associative array containing HTTP response headers | ||
20 | * [1] = URL content (downloaded data) | ||
21 | * | ||
22 | * Example: | ||
23 | * list($headers, $data) = get_http_response('http://sebauvage.net/'); | ||
24 | * if (strpos($headers[0], '200 OK') !== false) { | ||
25 | * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); | ||
26 | * } else { | ||
27 | * echo 'There was an error: '.htmlspecialchars($headers[0]); | ||
28 | * } | ||
29 | * | ||
30 | * @see https://secure.php.net/manual/en/ref.curl.php | ||
31 | * @see https://secure.php.net/manual/en/functions.anonymous.php | ||
32 | * @see https://secure.php.net/manual/en/function.preg-split.php | ||
33 | * @see https://secure.php.net/manual/en/function.explode.php | ||
34 | * @see http://stackoverflow.com/q/17641073 | ||
35 | * @see http://stackoverflow.com/q/9183178 | ||
36 | * @see http://stackoverflow.com/q/1462720 | ||
37 | */ | ||
38 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) | ||
39 | { | ||
40 | $urlObj = new Url($url); | ||
41 | $cleanUrl = $urlObj->idnToAscii(); | ||
42 | |||
43 | if (!filter_var($cleanUrl, FILTER_VALIDATE_URL) || !$urlObj->isHttp()) { | ||
44 | return array(array(0 => 'Invalid HTTP UrlUtils'), false); | ||
45 | } | ||
46 | |||
47 | $userAgent = | ||
48 | 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)' | ||
49 | . ' Gecko/20100101 Firefox/45.0'; | ||
50 | $acceptLanguage = | ||
51 | substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3'; | ||
52 | $maxRedirs = 3; | ||
53 | |||
54 | if (!function_exists('curl_init')) { | ||
55 | return get_http_response_fallback( | ||
56 | $cleanUrl, | ||
57 | $timeout, | ||
58 | $maxBytes, | ||
59 | $userAgent, | ||
60 | $acceptLanguage, | ||
61 | $maxRedirs | ||
62 | ); | ||
63 | } | ||
64 | |||
65 | $ch = curl_init($cleanUrl); | ||
66 | if ($ch === false) { | ||
67 | return array(array(0 => 'curl_init() error'), false); | ||
68 | } | ||
69 | |||
70 | // General cURL settings | ||
71 | curl_setopt($ch, CURLOPT_AUTOREFERER, true); | ||
72 | curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); | ||
73 | curl_setopt($ch, CURLOPT_HEADER, true); | ||
74 | curl_setopt( | ||
75 | $ch, | ||
76 | CURLOPT_HTTPHEADER, | ||
77 | array('Accept-Language: ' . $acceptLanguage) | ||
78 | ); | ||
79 | curl_setopt($ch, CURLOPT_MAXREDIRS, $maxRedirs); | ||
80 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); | ||
81 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); | ||
82 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); | ||
83 | |||
84 | if (is_callable($curlWriteFunction)) { | ||
85 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); | ||
86 | } | ||
87 | |||
88 | // Max download size management | ||
89 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); | ||
90 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); | ||
91 | curl_setopt( | ||
92 | $ch, | ||
93 | CURLOPT_PROGRESSFUNCTION, | ||
94 | function ($arg0, $arg1, $arg2, $arg3, $arg4 = 0) use ($maxBytes) { | ||
95 | if (version_compare(phpversion(), '5.5', '<')) { | ||
96 | // PHP version lower than 5.5 | ||
97 | // Callback has 4 arguments | ||
98 | $downloaded = $arg1; | ||
99 | } else { | ||
100 | // Callback has 5 arguments | ||
101 | $downloaded = $arg2; | ||
102 | } | ||
103 | // Non-zero return stops downloading | ||
104 | return ($downloaded > $maxBytes) ? 1 : 0; | ||
105 | } | ||
106 | ); | ||
107 | |||
108 | $response = curl_exec($ch); | ||
109 | $errorNo = curl_errno($ch); | ||
110 | $errorStr = curl_error($ch); | ||
111 | $headSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); | ||
112 | curl_close($ch); | ||
113 | |||
114 | if ($response === false) { | ||
115 | if ($errorNo == CURLE_COULDNT_RESOLVE_HOST) { | ||
116 | /* | ||
117 | * Workaround to match fallback method behaviour | ||
118 | * Removing this would require updating | ||
119 | * GetHttpUrlTest::testGetInvalidRemoteUrl() | ||
120 | */ | ||
121 | return array(false, false); | ||
122 | } | ||
123 | return array(array(0 => 'curl_exec() error: ' . $errorStr), false); | ||
124 | } | ||
125 | |||
126 | // Formatting output like the fallback method | ||
127 | $rawHeaders = substr($response, 0, $headSize); | ||
128 | |||
129 | // Keep only headers from latest redirection | ||
130 | $rawHeadersArrayRedirs = explode("\r\n\r\n", trim($rawHeaders)); | ||
131 | $rawHeadersLastRedir = end($rawHeadersArrayRedirs); | ||
132 | |||
133 | $content = substr($response, $headSize); | ||
134 | $headers = array(); | ||
135 | foreach (preg_split('~[\r\n]+~', $rawHeadersLastRedir) as $line) { | ||
136 | if (empty($line) || ctype_space($line)) { | ||
137 | continue; | ||
138 | } | ||
139 | $splitLine = explode(': ', $line, 2); | ||
140 | if (count($splitLine) > 1) { | ||
141 | $key = $splitLine[0]; | ||
142 | $value = $splitLine[1]; | ||
143 | if (array_key_exists($key, $headers)) { | ||
144 | if (!is_array($headers[$key])) { | ||
145 | $headers[$key] = array(0 => $headers[$key]); | ||
146 | } | ||
147 | $headers[$key][] = $value; | ||
148 | } else { | ||
149 | $headers[$key] = $value; | ||
150 | } | ||
151 | } else { | ||
152 | $headers[] = $splitLine[0]; | ||
153 | } | ||
154 | } | ||
155 | |||
156 | return array($headers, $content); | ||
157 | } | ||
158 | |||
159 | /** | ||
160 | * GET an HTTP URL to retrieve its content (fallback method) | ||
161 | * | ||
162 | * @param string $cleanUrl URL to get (http://... valid and in ASCII form) | ||
163 | * @param int $timeout network timeout (in seconds) | ||
164 | * @param int $maxBytes maximum downloaded bytes | ||
165 | * @param string $userAgent "User-Agent" header | ||
166 | * @param string $acceptLanguage "Accept-Language" header | ||
167 | * @param int $maxRedr maximum amount of redirections followed | ||
168 | * | ||
169 | * @return array HTTP response headers, downloaded content | ||
170 | * | ||
171 | * Output format: | ||
172 | * [0] = associative array containing HTTP response headers | ||
173 | * [1] = URL content (downloaded data) | ||
174 | * | ||
175 | * @see http://php.net/manual/en/function.file-get-contents.php | ||
176 | * @see http://php.net/manual/en/function.stream-context-create.php | ||
177 | * @see http://php.net/manual/en/function.get-headers.php | ||
178 | */ | ||
179 | function get_http_response_fallback( | ||
180 | $cleanUrl, | ||
181 | $timeout, | ||
182 | $maxBytes, | ||
183 | $userAgent, | ||
184 | $acceptLanguage, | ||
185 | $maxRedr | ||
186 | ) { | ||
187 | $options = array( | ||
188 | 'http' => array( | ||
189 | 'method' => 'GET', | ||
190 | 'timeout' => $timeout, | ||
191 | 'user_agent' => $userAgent, | ||
192 | 'header' => "Accept: */*\r\n" | ||
193 | . 'Accept-Language: ' . $acceptLanguage | ||
194 | ) | ||
195 | ); | ||
196 | |||
197 | stream_context_set_default($options); | ||
198 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl, $maxRedr); | ||
199 | if (! $headers || strpos($headers[0], '200 OK') === false) { | ||
200 | $options['http']['request_fulluri'] = true; | ||
201 | stream_context_set_default($options); | ||
202 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl, $maxRedr); | ||
203 | } | ||
204 | |||
205 | if (! $headers) { | ||
206 | return array($headers, false); | ||
207 | } | ||
208 | |||
209 | try { | ||
210 | // TODO: catch Exception in calling code (thumbnailer) | ||
211 | $context = stream_context_create($options); | ||
212 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); | ||
213 | } catch (Exception $exc) { | ||
214 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); | ||
215 | } | ||
216 | |||
217 | return array($headers, $content); | ||
218 | } | ||
219 | |||
220 | /** | ||
221 | * Retrieve HTTP headers, following n redirections (temporary and permanent ones). | ||
222 | * | ||
223 | * @param string $url initial URL to reach. | ||
224 | * @param int $redirectionLimit max redirection follow. | ||
225 | * | ||
226 | * @return array HTTP headers, or false if it failed. | ||
227 | */ | ||
228 | function get_redirected_headers($url, $redirectionLimit = 3) | ||
229 | { | ||
230 | $headers = get_headers($url, 1); | ||
231 | if (!empty($headers['location']) && empty($headers['Location'])) { | ||
232 | $headers['Location'] = $headers['location']; | ||
233 | } | ||
234 | |||
235 | // Headers found, redirection found, and limit not reached. | ||
236 | if ($redirectionLimit-- > 0 | ||
237 | && !empty($headers) | ||
238 | && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) | ||
239 | && !empty($headers['Location'])) { | ||
240 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; | ||
241 | if ($redirection != $url) { | ||
242 | $redirection = getAbsoluteUrl($url, $redirection); | ||
243 | return get_redirected_headers($redirection, $redirectionLimit); | ||
244 | } | ||
245 | } | ||
246 | |||
247 | return array($headers, $url); | ||
248 | } | ||
249 | |||
250 | /** | ||
251 | * Get an absolute URL from a complete one, and another absolute/relative URL. | ||
252 | * | ||
253 | * @param string $originalUrl The original complete URL. | ||
254 | * @param string $newUrl The new one, absolute or relative. | ||
255 | * | ||
256 | * @return string Final URL: | ||
257 | * - $newUrl if it was already an absolute URL. | ||
258 | * - if it was relative, absolute URL from $originalUrl path. | ||
259 | */ | ||
260 | function getAbsoluteUrl($originalUrl, $newUrl) | ||
261 | { | ||
262 | $newScheme = parse_url($newUrl, PHP_URL_SCHEME); | ||
263 | // Already an absolute URL. | ||
264 | if (!empty($newScheme)) { | ||
265 | return $newUrl; | ||
266 | } | ||
267 | |||
268 | $parts = parse_url($originalUrl); | ||
269 | $final = $parts['scheme'] .'://'. $parts['host']; | ||
270 | $final .= (!empty($parts['port'])) ? $parts['port'] : ''; | ||
271 | $final .= '/'; | ||
272 | if ($newUrl[0] != '/') { | ||
273 | $final .= substr(ltrim($parts['path'], '/'), 0, strrpos($parts['path'], '/')); | ||
274 | } | ||
275 | $final .= ltrim($newUrl, '/'); | ||
276 | return $final; | ||
277 | } | ||
278 | |||
279 | /** | ||
280 | * Returns the server's base URL: scheme://domain.tld[:port] | ||
281 | * | ||
282 | * @param array $server the $_SERVER array | ||
283 | * | ||
284 | * @return string the server's base URL | ||
285 | * | ||
286 | * @see http://www.ietf.org/rfc/rfc7239.txt | ||
287 | * @see http://www.ietf.org/rfc/rfc6648.txt | ||
288 | * @see http://stackoverflow.com/a/3561399 | ||
289 | * @see http://stackoverflow.com/q/452375 | ||
290 | */ | ||
291 | function server_url($server) | ||
292 | { | ||
293 | $scheme = 'http'; | ||
294 | $port = ''; | ||
295 | |||
296 | // Shaarli is served behind a proxy | ||
297 | if (isset($server['HTTP_X_FORWARDED_PROTO'])) { | ||
298 | // Keep forwarded scheme | ||
299 | if (strpos($server['HTTP_X_FORWARDED_PROTO'], ',') !== false) { | ||
300 | $schemes = explode(',', $server['HTTP_X_FORWARDED_PROTO']); | ||
301 | $scheme = trim($schemes[0]); | ||
302 | } else { | ||
303 | $scheme = $server['HTTP_X_FORWARDED_PROTO']; | ||
304 | } | ||
305 | |||
306 | if (isset($server['HTTP_X_FORWARDED_PORT'])) { | ||
307 | // Keep forwarded port | ||
308 | if (strpos($server['HTTP_X_FORWARDED_PORT'], ',') !== false) { | ||
309 | $ports = explode(',', $server['HTTP_X_FORWARDED_PORT']); | ||
310 | $port = trim($ports[0]); | ||
311 | } else { | ||
312 | $port = $server['HTTP_X_FORWARDED_PORT']; | ||
313 | } | ||
314 | |||
315 | // This is a workaround for proxies that don't forward the scheme properly. | ||
316 | // Connecting over port 443 has to be in HTTPS. | ||
317 | // See https://github.com/shaarli/Shaarli/issues/1022 | ||
318 | if ($port == '443') { | ||
319 | $scheme = 'https'; | ||
320 | } | ||
321 | |||
322 | if (($scheme == 'http' && $port != '80') | ||
323 | || ($scheme == 'https' && $port != '443') | ||
324 | ) { | ||
325 | $port = ':' . $port; | ||
326 | } else { | ||
327 | $port = ''; | ||
328 | } | ||
329 | } | ||
330 | |||
331 | if (isset($server['HTTP_X_FORWARDED_HOST'])) { | ||
332 | // Keep forwarded host | ||
333 | if (strpos($server['HTTP_X_FORWARDED_HOST'], ',') !== false) { | ||
334 | $hosts = explode(',', $server['HTTP_X_FORWARDED_HOST']); | ||
335 | $host = trim($hosts[0]); | ||
336 | } else { | ||
337 | $host = $server['HTTP_X_FORWARDED_HOST']; | ||
338 | } | ||
339 | } else { | ||
340 | $host = $server['SERVER_NAME']; | ||
341 | } | ||
342 | |||
343 | return $scheme.'://'.$host.$port; | ||
344 | } | ||
345 | |||
346 | // SSL detection | ||
347 | if ((! empty($server['HTTPS']) && strtolower($server['HTTPS']) == 'on') | ||
348 | || (isset($server['SERVER_PORT']) && $server['SERVER_PORT'] == '443')) { | ||
349 | $scheme = 'https'; | ||
350 | } | ||
351 | |||
352 | // Do not append standard port values | ||
353 | if (($scheme == 'http' && $server['SERVER_PORT'] != '80') | ||
354 | || ($scheme == 'https' && $server['SERVER_PORT'] != '443')) { | ||
355 | $port = ':'.$server['SERVER_PORT']; | ||
356 | } | ||
357 | |||
358 | return $scheme.'://'.$server['SERVER_NAME'].$port; | ||
359 | } | ||
360 | |||
361 | /** | ||
362 | * Returns the absolute URL of the current script, without the query | ||
363 | * | ||
364 | * If the resource is "index.php", then it is removed (for better-looking URLs) | ||
365 | * | ||
366 | * @param array $server the $_SERVER array | ||
367 | * | ||
368 | * @return string the absolute URL of the current script, without the query | ||
369 | */ | ||
370 | function index_url($server) | ||
371 | { | ||
372 | $scriptname = $server['SCRIPT_NAME']; | ||
373 | if (endsWith($scriptname, 'index.php')) { | ||
374 | $scriptname = substr($scriptname, 0, -9); | ||
375 | } | ||
376 | return server_url($server) . $scriptname; | ||
377 | } | ||
378 | |||
379 | /** | ||
380 | * Returns the absolute URL of the current script, with the query | ||
381 | * | ||
382 | * If the resource is "index.php", then it is removed (for better-looking URLs) | ||
383 | * | ||
384 | * @param array $server the $_SERVER array | ||
385 | * | ||
386 | * @return string the absolute URL of the current script, with the query | ||
387 | */ | ||
388 | function page_url($server) | ||
389 | { | ||
390 | if (! empty($server['QUERY_STRING'])) { | ||
391 | return index_url($server).'?'.$server['QUERY_STRING']; | ||
392 | } | ||
393 | return index_url($server); | ||
394 | } | ||
395 | |||
396 | /** | ||
397 | * Retrieve the initial IP forwarded by the reverse proxy. | ||
398 | * | ||
399 | * Inspired from: https://github.com/zendframework/zend-http/blob/master/src/PhpEnvironment/RemoteAddress.php | ||
400 | * | ||
401 | * @param array $server $_SERVER array which contains HTTP headers. | ||
402 | * @param array $trustedIps List of trusted IP from the configuration. | ||
403 | * | ||
404 | * @return string|bool The forwarded IP, or false if none could be extracted. | ||
405 | */ | ||
406 | function getIpAddressFromProxy($server, $trustedIps) | ||
407 | { | ||
408 | $forwardedIpHeader = 'HTTP_X_FORWARDED_FOR'; | ||
409 | if (empty($server[$forwardedIpHeader])) { | ||
410 | return false; | ||
411 | } | ||
412 | |||
413 | $ips = preg_split('/\s*,\s*/', $server[$forwardedIpHeader]); | ||
414 | $ips = array_diff($ips, $trustedIps); | ||
415 | if (empty($ips)) { | ||
416 | return false; | ||
417 | } | ||
418 | |||
419 | return array_pop($ips); | ||
420 | } | ||
421 | |||
422 | |||
423 | /** | ||
424 | * Return an identifier based on the advertised client IP address(es) | ||
425 | * | ||
426 | * This aims at preventing session hijacking from users behind the same proxy | ||
427 | * by relying on HTTP headers. | ||
428 | * | ||
429 | * See: | ||
430 | * - https://secure.php.net/manual/en/reserved.variables.server.php | ||
431 | * - https://stackoverflow.com/questions/3003145/how-to-get-the-client-ip-address-in-php | ||
432 | * - https://stackoverflow.com/questions/12233406/preventing-session-hijacking | ||
433 | * - https://stackoverflow.com/questions/21354859/trusting-x-forwarded-for-to-identify-a-visitor | ||
434 | * | ||
435 | * @param array $server The $_SERVER array | ||
436 | * | ||
437 | * @return string An identifier based on client IP address information | ||
438 | */ | ||
439 | function client_ip_id($server) | ||
440 | { | ||
441 | $ip = $server['REMOTE_ADDR']; | ||
442 | |||
443 | if (isset($server['HTTP_X_FORWARDED_FOR'])) { | ||
444 | $ip = $ip . '_' . $server['HTTP_X_FORWARDED_FOR']; | ||
445 | } | ||
446 | if (isset($server['HTTP_CLIENT_IP'])) { | ||
447 | $ip = $ip . '_' . $server['HTTP_CLIENT_IP']; | ||
448 | } | ||
449 | return $ip; | ||
450 | } | ||
451 | |||
452 | |||
453 | /** | ||
454 | * Returns true if Shaarli's currently browsed in HTTPS. | ||
455 | * Supports reverse proxies (if the headers are correctly set). | ||
456 | * | ||
457 | * @param array $server $_SERVER. | ||
458 | * | ||
459 | * @return bool true if HTTPS, false otherwise. | ||
460 | */ | ||
461 | function is_https($server) | ||
462 | { | ||
463 | |||
464 | if (isset($server['HTTP_X_FORWARDED_PORT'])) { | ||
465 | // Keep forwarded port | ||
466 | if (strpos($server['HTTP_X_FORWARDED_PORT'], ',') !== false) { | ||
467 | $ports = explode(',', $server['HTTP_X_FORWARDED_PORT']); | ||
468 | $port = trim($ports[0]); | ||
469 | } else { | ||
470 | $port = $server['HTTP_X_FORWARDED_PORT']; | ||
471 | } | ||
472 | |||
473 | if ($port == '443') { | ||
474 | return true; | ||
475 | } | ||
476 | } | ||
477 | |||
478 | return ! empty($server['HTTPS']); | ||
479 | } | ||