diff options
author | ArthurHoaro <arthur@hoa.ro> | 2016-04-06 22:00:52 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2016-05-03 19:51:29 +0200 |
commit | ce7b0b6480aa854ee6893f5c889277b0e3b13efc (patch) | |
tree | 8d8beb4ea5568d9989a5ebf52e2adc542e17f74e /application/HttpUtils.php | |
parent | 11609d9fd8ba53f049e6c913d8e3affab6cfc9ce (diff) | |
download | Shaarli-ce7b0b6480aa854ee6893f5c889277b0e3b13efc.tar.gz Shaarli-ce7b0b6480aa854ee6893f5c889277b0e3b13efc.tar.zst Shaarli-ce7b0b6480aa854ee6893f5c889277b0e3b13efc.zip |
Fixes #531 - Title retrieving is failing with multiple use case
see https://github.com/shaarli/Shaarli/issues/531 for details
Diffstat (limited to 'application/HttpUtils.php')
-rw-r--r-- | application/HttpUtils.php | 60 |
1 files changed, 50 insertions, 10 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php index af7cb371..0e1ce879 100644 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php | |||
@@ -27,7 +27,9 @@ | |||
27 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | 27 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) |
28 | { | 28 | { |
29 | $urlObj = new Url($url); | 29 | $urlObj = new Url($url); |
30 | if (! filter_var($url, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { | 30 | $cleanUrl = $urlObj->indToAscii(); |
31 | |||
32 | if (! filter_var($cleanUrl, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { | ||
31 | return array(array(0 => 'Invalid HTTP Url'), false); | 33 | return array(array(0 => 'Invalid HTTP Url'), false); |
32 | } | 34 | } |
33 | 35 | ||
@@ -35,22 +37,27 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | |||
35 | 'http' => array( | 37 | 'http' => array( |
36 | 'method' => 'GET', | 38 | 'method' => 'GET', |
37 | 'timeout' => $timeout, | 39 | 'timeout' => $timeout, |
38 | 'user_agent' => 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:23.0)' | 40 | 'user_agent' => 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)' |
39 | .' Gecko/20100101 Firefox/23.0', | 41 | .' Gecko/20100101 Firefox/45.0', |
40 | 'request_fulluri' => true, | 42 | 'accept_language' => substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3', |
41 | ) | 43 | ) |
42 | ); | 44 | ); |
43 | 45 | ||
44 | $context = stream_context_create($options); | ||
45 | stream_context_set_default($options); | 46 | stream_context_set_default($options); |
47 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl); | ||
48 | if (! $headers || strpos($headers[0], '200 OK') === false) { | ||
49 | $options['http']['request_fulluri'] = true; | ||
50 | stream_context_set_default($options); | ||
51 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl); | ||
52 | } | ||
46 | 53 | ||
47 | list($headers, $finalUrl) = get_redirected_headers($urlObj->cleanup()); | ||
48 | if (! $headers || strpos($headers[0], '200 OK') === false) { | 54 | if (! $headers || strpos($headers[0], '200 OK') === false) { |
49 | return array($headers, false); | 55 | return array($headers, false); |
50 | } | 56 | } |
51 | 57 | ||
52 | try { | 58 | try { |
53 | // TODO: catch Exception in calling code (thumbnailer) | 59 | // TODO: catch Exception in calling code (thumbnailer) |
60 | $context = stream_context_create($options); | ||
54 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); | 61 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); |
55 | } catch (Exception $exc) { | 62 | } catch (Exception $exc) { |
56 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); | 63 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); |
@@ -60,16 +67,19 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | |||
60 | } | 67 | } |
61 | 68 | ||
62 | /** | 69 | /** |
63 | * Retrieve HTTP headers, following n redirections (temporary and permanent). | 70 | * Retrieve HTTP headers, following n redirections (temporary and permanent ones). |
64 | * | 71 | * |
65 | * @param string $url initial URL to reach. | 72 | * @param string $url initial URL to reach. |
66 | * @param int $redirectionLimit max redirection follow.. | 73 | * @param int $redirectionLimit max redirection follow.. |
67 | * | 74 | * |
68 | * @return array | 75 | * @return array HTTP headers, or false if it failed. |
69 | */ | 76 | */ |
70 | function get_redirected_headers($url, $redirectionLimit = 3) | 77 | function get_redirected_headers($url, $redirectionLimit = 3) |
71 | { | 78 | { |
72 | $headers = get_headers($url, 1); | 79 | $headers = get_headers($url, 1); |
80 | if (!empty($headers['location']) && empty($headers['Location'])) { | ||
81 | $headers['Location'] = $headers['location']; | ||
82 | } | ||
73 | 83 | ||
74 | // Headers found, redirection found, and limit not reached. | 84 | // Headers found, redirection found, and limit not reached. |
75 | if ($redirectionLimit-- > 0 | 85 | if ($redirectionLimit-- > 0 |
@@ -79,6 +89,7 @@ function get_redirected_headers($url, $redirectionLimit = 3) | |||
79 | 89 | ||
80 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; | 90 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; |
81 | if ($redirection != $url) { | 91 | if ($redirection != $url) { |
92 | $redirection = getAbsoluteUrl($url, $redirection); | ||
82 | return get_redirected_headers($redirection, $redirectionLimit); | 93 | return get_redirected_headers($redirection, $redirectionLimit); |
83 | } | 94 | } |
84 | } | 95 | } |
@@ -87,6 +98,35 @@ function get_redirected_headers($url, $redirectionLimit = 3) | |||
87 | } | 98 | } |
88 | 99 | ||
89 | /** | 100 | /** |
101 | * Get an absolute URL from a complete one, and another absolute/relative URL. | ||
102 | * | ||
103 | * @param string $originalUrl The original complete URL. | ||
104 | * @param string $newUrl The new one, absolute or relative. | ||
105 | * | ||
106 | * @return string Final URL: | ||
107 | * - $newUrl if it was already an absolute URL. | ||
108 | * - if it was relative, absolute URL from $originalUrl path. | ||
109 | */ | ||
110 | function getAbsoluteUrl($originalUrl, $newUrl) | ||
111 | { | ||
112 | $newScheme = parse_url($newUrl, PHP_URL_SCHEME); | ||
113 | // Already an absolute URL. | ||
114 | if (!empty($newScheme)) { | ||
115 | return $newUrl; | ||
116 | } | ||
117 | |||
118 | $parts = parse_url($originalUrl); | ||
119 | $final = $parts['scheme'] .'://'. $parts['host']; | ||
120 | $final .= (!empty($parts['port'])) ? $parts['port'] : ''; | ||
121 | $final .= '/'; | ||
122 | if ($newUrl[0] != '/') { | ||
123 | $final .= substr(ltrim($parts['path'], '/'), 0, strrpos($parts['path'], '/')); | ||
124 | } | ||
125 | $final .= ltrim($newUrl, '/'); | ||
126 | return $final; | ||
127 | } | ||
128 | |||
129 | /** | ||
90 | * Returns the server's base URL: scheme://domain.tld[:port] | 130 | * Returns the server's base URL: scheme://domain.tld[:port] |
91 | * | 131 | * |
92 | * @param array $server the $_SERVER array | 132 | * @param array $server the $_SERVER array |