]>
Commit | Line | Data |
---|---|---|
451314eb V |
1 | <?php |
2 | /** | |
3 | * GET an HTTP URL to retrieve its content | |
4 | * | |
5 | * @param string $url URL to get (http://...) | |
6 | * @param int $timeout network timeout (in seconds) | |
7 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) | |
8 | * | |
9 | * @return array HTTP response headers, downloaded content | |
10 | * | |
11 | * Output format: | |
12 | * [0] = associative array containing HTTP response headers | |
13 | * [1] = URL content (downloaded data) | |
14 | * | |
15 | * Example: | |
1557cefb | 16 | * list($headers, $data) = get_http_response('http://sebauvage.net/'); |
451314eb V |
17 | * if (strpos($headers[0], '200 OK') !== false) { |
18 | * echo 'Data type: '.htmlspecialchars($headers['Content-Type']); | |
19 | * } else { | |
20 | * echo 'There was an error: '.htmlspecialchars($headers[0]); | |
21 | * } | |
22 | * | |
23 | * @see http://php.net/manual/en/function.file-get-contents.php | |
24 | * @see http://php.net/manual/en/function.stream-context-create.php | |
25 | * @see http://php.net/manual/en/function.get-headers.php | |
26 | */ | |
1557cefb | 27 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) |
451314eb | 28 | { |
1557cefb | 29 | $urlObj = new Url($url); |
caa69b58 | 30 | $cleanUrl = $urlObj->idnToAscii(); |
ce7b0b64 A |
31 | |
32 | if (! filter_var($cleanUrl, FILTER_VALIDATE_URL) || ! $urlObj->isHttp()) { | |
1557cefb A |
33 | return array(array(0 => 'Invalid HTTP Url'), false); |
34 | } | |
35 | ||
451314eb V |
36 | $options = array( |
37 | 'http' => array( | |
38 | 'method' => 'GET', | |
39 | 'timeout' => $timeout, | |
ce7b0b64 A |
40 | 'user_agent' => 'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:45.0)' |
41 | .' Gecko/20100101 Firefox/45.0', | |
42 | 'accept_language' => substr(setlocale(LC_COLLATE, 0), 0, 2) . ',en-US;q=0.7,en;q=0.3', | |
451314eb V |
43 | ) |
44 | ); | |
45 | ||
1557cefb | 46 | stream_context_set_default($options); |
ce7b0b64 A |
47 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl); |
48 | if (! $headers || strpos($headers[0], '200 OK') === false) { | |
49 | $options['http']['request_fulluri'] = true; | |
50 | stream_context_set_default($options); | |
51 | list($headers, $finalUrl) = get_redirected_headers($cleanUrl); | |
52 | } | |
1557cefb | 53 | |
1557cefb A |
54 | if (! $headers || strpos($headers[0], '200 OK') === false) { |
55 | return array($headers, false); | |
56 | } | |
451314eb V |
57 | |
58 | try { | |
59 | // TODO: catch Exception in calling code (thumbnailer) | |
ce7b0b64 | 60 | $context = stream_context_create($options); |
1557cefb | 61 | $content = file_get_contents($finalUrl, false, $context, -1, $maxBytes); |
451314eb V |
62 | } catch (Exception $exc) { |
63 | return array(array(0 => 'HTTP Error'), $exc->getMessage()); | |
64 | } | |
65 | ||
1557cefb A |
66 | return array($headers, $content); |
67 | } | |
68 | ||
69 | /** | |
ce7b0b64 | 70 | * Retrieve HTTP headers, following n redirections (temporary and permanent ones). |
1557cefb | 71 | * |
ce7b0b64 | 72 | * @param string $url initial URL to reach. |
caa69b58 | 73 | * @param int $redirectionLimit max redirection follow. |
1557cefb | 74 | * |
ce7b0b64 | 75 | * @return array HTTP headers, or false if it failed. |
1557cefb A |
76 | */ |
77 | function get_redirected_headers($url, $redirectionLimit = 3) | |
78 | { | |
79 | $headers = get_headers($url, 1); | |
ce7b0b64 A |
80 | if (!empty($headers['location']) && empty($headers['Location'])) { |
81 | $headers['Location'] = $headers['location']; | |
82 | } | |
1557cefb A |
83 | |
84 | // Headers found, redirection found, and limit not reached. | |
85 | if ($redirectionLimit-- > 0 | |
86 | && !empty($headers) | |
87 | && (strpos($headers[0], '301') !== false || strpos($headers[0], '302') !== false) | |
88 | && !empty($headers['Location'])) { | |
89 | ||
90 | $redirection = is_array($headers['Location']) ? end($headers['Location']) : $headers['Location']; | |
91 | if ($redirection != $url) { | |
ce7b0b64 | 92 | $redirection = getAbsoluteUrl($url, $redirection); |
1557cefb A |
93 | return get_redirected_headers($redirection, $redirectionLimit); |
94 | } | |
451314eb V |
95 | } |
96 | ||
1557cefb | 97 | return array($headers, $url); |
451314eb | 98 | } |
482d67bd | 99 | |
ce7b0b64 A |
100 | /** |
101 | * Get an absolute URL from a complete one, and another absolute/relative URL. | |
102 | * | |
103 | * @param string $originalUrl The original complete URL. | |
104 | * @param string $newUrl The new one, absolute or relative. | |
105 | * | |
106 | * @return string Final URL: | |
107 | * - $newUrl if it was already an absolute URL. | |
108 | * - if it was relative, absolute URL from $originalUrl path. | |
109 | */ | |
110 | function getAbsoluteUrl($originalUrl, $newUrl) | |
111 | { | |
112 | $newScheme = parse_url($newUrl, PHP_URL_SCHEME); | |
113 | // Already an absolute URL. | |
114 | if (!empty($newScheme)) { | |
115 | return $newUrl; | |
116 | } | |
117 | ||
118 | $parts = parse_url($originalUrl); | |
119 | $final = $parts['scheme'] .'://'. $parts['host']; | |
120 | $final .= (!empty($parts['port'])) ? $parts['port'] : ''; | |
121 | $final .= '/'; | |
122 | if ($newUrl[0] != '/') { | |
123 | $final .= substr(ltrim($parts['path'], '/'), 0, strrpos($parts['path'], '/')); | |
124 | } | |
125 | $final .= ltrim($newUrl, '/'); | |
126 | return $final; | |
127 | } | |
128 | ||
482d67bd V |
129 | /** |
130 | * Returns the server's base URL: scheme://domain.tld[:port] | |
131 | * | |
132 | * @param array $server the $_SERVER array | |
133 | * | |
134 | * @return string the server's base URL | |
135 | * | |
136 | * @see http://www.ietf.org/rfc/rfc7239.txt | |
137 | * @see http://www.ietf.org/rfc/rfc6648.txt | |
138 | * @see http://stackoverflow.com/a/3561399 | |
139 | * @see http://stackoverflow.com/q/452375 | |
140 | */ | |
141 | function server_url($server) | |
142 | { | |
143 | $scheme = 'http'; | |
144 | $port = ''; | |
145 | ||
146 | // Shaarli is served behind a proxy | |
147 | if (isset($server['HTTP_X_FORWARDED_PROTO'])) { | |
148 | // Keep forwarded scheme | |
85244fa0 A |
149 | if (strpos($server['HTTP_X_FORWARDED_PROTO'], ',') !== false) { |
150 | $schemes = explode(',', $server['HTTP_X_FORWARDED_PROTO']); | |
151 | $scheme = trim($schemes[0]); | |
152 | } else { | |
153 | $scheme = $server['HTTP_X_FORWARDED_PROTO']; | |
154 | } | |
482d67bd V |
155 | |
156 | if (isset($server['HTTP_X_FORWARDED_PORT'])) { | |
157 | // Keep forwarded port | |
85244fa0 A |
158 | if (strpos($server['HTTP_X_FORWARDED_PORT'], ',') !== false) { |
159 | $ports = explode(',', $server['HTTP_X_FORWARDED_PORT']); | |
160 | $port = ':' . trim($ports[0]); | |
161 | } else { | |
162 | $port = ':' . $server['HTTP_X_FORWARDED_PORT']; | |
163 | } | |
482d67bd V |
164 | } |
165 | ||
166 | return $scheme.'://'.$server['SERVER_NAME'].$port; | |
167 | } | |
168 | ||
169 | // SSL detection | |
170 | if ((! empty($server['HTTPS']) && strtolower($server['HTTPS']) == 'on') | |
171 | || (isset($server['SERVER_PORT']) && $server['SERVER_PORT'] == '443')) { | |
172 | $scheme = 'https'; | |
173 | } | |
174 | ||
175 | // Do not append standard port values | |
176 | if (($scheme == 'http' && $server['SERVER_PORT'] != '80') | |
177 | || ($scheme == 'https' && $server['SERVER_PORT'] != '443')) { | |
178 | $port = ':'.$server['SERVER_PORT']; | |
179 | } | |
180 | ||
181 | return $scheme.'://'.$server['SERVER_NAME'].$port; | |
182 | } | |
183 | ||
184 | /** | |
185 | * Returns the absolute URL of the current script, without the query | |
186 | * | |
187 | * If the resource is "index.php", then it is removed (for better-looking URLs) | |
188 | * | |
189 | * @param array $server the $_SERVER array | |
190 | * | |
191 | * @return string the absolute URL of the current script, without the query | |
192 | */ | |
193 | function index_url($server) | |
194 | { | |
195 | $scriptname = $server['SCRIPT_NAME']; | |
5046bcb6 | 196 | if (endsWith($scriptname, 'index.php')) { |
482d67bd V |
197 | $scriptname = substr($scriptname, 0, -9); |
198 | } | |
199 | return server_url($server) . $scriptname; | |
200 | } | |
201 | ||
202 | /** | |
203 | * Returns the absolute URL of the current script, with the query | |
204 | * | |
205 | * If the resource is "index.php", then it is removed (for better-looking URLs) | |
206 | * | |
207 | * @param array $server the $_SERVER array | |
208 | * | |
209 | * @return string the absolute URL of the current script, with the query | |
210 | */ | |
211 | function page_url($server) | |
212 | { | |
213 | if (! empty($server['QUERY_STRING'])) { | |
214 | return index_url($server).'?'.$server['QUERY_STRING']; | |
215 | } | |
216 | return index_url($server); | |
217 | } |