aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/HttpUtils.php
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2016-04-06 22:00:52 +0200
committerArthurHoaro <arthur@hoa.ro>2016-05-03 19:51:29 +0200
commitce7b0b6480aa854ee6893f5c889277b0e3b13efc (patch)
tree8d8beb4ea5568d9989a5ebf52e2adc542e17f74e /application/HttpUtils.php
parent11609d9fd8ba53f049e6c913d8e3affab6cfc9ce (diff)
downloadShaarli-ce7b0b6480aa854ee6893f5c889277b0e3b13efc.tar.gz
Shaarli-ce7b0b6480aa854ee6893f5c889277b0e3b13efc.tar.zst
Shaarli-ce7b0b6480aa854ee6893f5c889277b0e3b13efc.zip
Fixes #531 - Title retrieving is failing with multiple use case
see https://github.com/shaarli/Shaarli/issues/531 for details
Diffstat (limited to 'application/HttpUtils.php')
-rw-r--r--application/HttpUtils.php60
1 files changed, 50 insertions, 10 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php
index af7cb371..0e1ce879 100644
--- a/application/HttpUtils.php
+++ b/application/HttpUtils.php
@@ -27,7 +27,9 @@
27function get_http_response($url, $timeout = 30, $maxBytes = 4194304) 27function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
28{ 28{
29 $urlObj = new Url($url); 29 $urlObj = new Url($url);
30 if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { 30 $cleanUrl = $urlObj->indToAscii();
31
32 if (! filter_var($cleanUrl, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) {
31 return array(array(0 => 'Invalid HTTP Url'), false); 33 return array(array(0 => 'Invalid HTTP Url'), false);
32 } 34 }
33 35
@@ -35,22 +37,27 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
35 'http' => array( 37 'http' => array(
36 'method' => 'GET', 38 'method' => 'GET',
37 'timeout' => $timeout, 39 'timeout' => $timeout,
38 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' 40 'user_agent' => 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)'
39 .' Gecko/20100101 Firefox/23.0', 41 .' Gecko/20100101 Firefox/45.0',
40 'request_fulluri' => true, 42 'accept_language' => substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3',
41 ) 43 )
42 ); 44 );
43 45
44 $context = stream_context_create($options);
45 stream_context_set_default($options); 46 stream_context_set_default($options);
47 list($headers, $finalUrl) = get_redirected_headers($cleanUrl);
48 if (! $headers || strpos($headers[0], '200 OK') === false) {
49 $options['http']['request_fulluri'] = true;
50 stream_context_set_default($options);
51 list($headers, $finalUrl) = get_redirected_headers($cleanUrl);
52 }
46 53
47 list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup());
48 if (! $headers || strpos($headers[0], '200 OK') === false) { 54 if (! $headers || strpos($headers[0], '200 OK') === false) {
49 return array($headers, false); 55 return array($headers, false);
50 } 56 }
51 57
52 try { 58 try {
53 // TODO: catch Exception in calling code (thumbnailer) 59 // TODO: catch Exception in calling code (thumbnailer)
60 $context = stream_context_create($options);
54 $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); 61 $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes);
55 } catch (Exception $exc) { 62 } catch (Exception $exc) {
56 return array(array(0 => 'HTTP Error'), $exc->getMessage()); 63 return array(array(0 => 'HTTP Error'), $exc->getMessage());
@@ -60,16 +67,19 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
60} 67}
61 68
62/** 69/**
63 * Retrieve HTTP headers, following n redirections (temporary and permanent). 70 * Retrieve HTTP headers, following n redirections (temporary and permanent ones).
64 * 71 *
65 * @param string $url initial URL to reach. 72 * @param string $url initial URL to reach.
66 * @param int $redirectionLimit max redirection follow.. 73 * @param int $redirectionLimit max redirection follow..
67 * 74 *
68 * @return array 75 * @return array HTTP headers, or false if it failed.
69 */ 76 */
70function get_redirected_headers($url, $redirectionLimit = 3) 77function get_redirected_headers($url, $redirectionLimit = 3)
71{ 78{
72 $headers = get_headers($url, 1); 79 $headers = get_headers($url, 1);
80 if (!empty($headers['location']) && empty($headers['Location'])) {
81 $headers['Location'] = $headers['location'];
82 }
73 83
74 // Headers found, redirection found, and limit not reached. 84 // Headers found, redirection found, and limit not reached.
75 if ($redirectionLimit-- > 0 85 if ($redirectionLimit-- > 0
@@ -79,6 +89,7 @@ function get_redirected_headers($url, $redirectionLimit = 3)
79 89
80 $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; 90 $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location'];
81 if ($redirection != $url) { 91 if ($redirection != $url) {
92 $redirection = getAbsoluteUrl($url, $redirection);
82 return get_redirected_headers($redirection, $redirectionLimit); 93 return get_redirected_headers($redirection, $redirectionLimit);
83 } 94 }
84 } 95 }
@@ -87,6 +98,35 @@ function get_redirected_headers($url, $redirectionLimit = 3)
87} 98}
88 99
89/** 100/**
101 * Get an absolute URL from a complete one, and another absolute/relative URL.
102 *
103 * @param string $originalUrl The original complete URL.
104 * @param string $newUrl The new one, absolute or relative.
105 *
106 * @return string Final URL:
107 * - $newUrl if it was already an absolute URL.
108 * - if it was relative, absolute URL from $originalUrl path.
109 */
110function getAbsoluteUrl($originalUrl, $newUrl)
111{
112 $newScheme = parse_url($newUrl, PHP_URL_SCHEME);
113 // Already an absolute URL.
114 if (!empty($newScheme)) {
115 return $newUrl;
116 }
117
118 $parts = parse_url($originalUrl);
119 $final = $parts['scheme'] .'://'. $parts['host'];
120 $final .= (!empty($parts['port'])) ? $parts['port'] : '';
121 $final .= '/';
122 if ($newUrl[0] != '/') {
123 $final .= substr(ltrim($parts['path'], '/'), 0, strrpos($parts['path'], '/'));
124 }
125 $final .= ltrim($newUrl, '/');
126 return $final;
127}
128
129/**
90 * Returns the server's base URL: scheme://domain.tld[:port] 130 * Returns the server's base URL: scheme://domain.tld[:port]
91 * 131 *
92 * @param array $server the $_SERVER array 132 * @param array $server the $_SERVER array