diff options
Diffstat (limited to 'application')
-rw-r--r-- | application/HttpUtils.php | 21 | ||||
-rw-r--r-- | application/Languages.php | 2 | ||||
-rw-r--r-- | application/LinkUtils.php | 89 | ||||
-rw-r--r-- | application/Url.php | 2 | ||||
-rw-r--r-- | application/config/ConfigPhp.php | 6 |
5 files changed, 78 insertions, 42 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php index ec54dcd4..83a4c5e2 100644 --- a/application/HttpUtils.php +++ b/application/HttpUtils.php | |||
@@ -3,9 +3,11 @@ | |||
3 | * GET an HTTP URL to retrieve its content | 3 | * GET an HTTP URL to retrieve its content |
4 | * Uses the cURL library or a fallback method | 4 | * Uses the cURL library or a fallback method |
5 | * | 5 | * |
6 | * @param string $url URL to get (http://...) | 6 | * @param string $url URL to get (http://...) |
7 | * @param int $timeout network timeout (in seconds) | 7 | * @param int $timeout network timeout (in seconds) |
8 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) | 8 | * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) |
9 | * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION). | ||
10 | * Can be used to add download conditions on the headers (response code, content type, etc.). | ||
9 | * | 11 | * |
10 | * @return array HTTP response headers, downloaded content | 12 | * @return array HTTP response headers, downloaded content |
11 | * | 13 | * |
@@ -29,7 +31,7 @@ | |||
29 | * @see http://stackoverflow.com/q/9183178 | 31 | * @see http://stackoverflow.com/q/9183178 |
30 | * @see http://stackoverflow.com/q/1462720 | 32 | * @see http://stackoverflow.com/q/1462720 |
31 | */ | 33 | */ |
32 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | 34 | function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null) |
33 | { | 35 | { |
34 | $urlObj = new Url($url); | 36 | $urlObj = new Url($url); |
35 | $cleanUrl = $urlObj->idnToAscii(); | 37 | $cleanUrl = $urlObj->idnToAscii(); |
@@ -75,6 +77,10 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304) | |||
75 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); | 77 | curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); |
76 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); | 78 | curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); |
77 | 79 | ||
80 | if (is_callable($curlWriteFunction)) { | ||
81 | curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction); | ||
82 | } | ||
83 | |||
78 | // Max download size management | 84 | // Max download size management |
79 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); | 85 | curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); |
80 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); | 86 | curl_setopt($ch, CURLOPT_NOPROGRESS, false); |
@@ -302,6 +308,13 @@ function server_url($server) | |||
302 | $port = $server['HTTP_X_FORWARDED_PORT']; | 308 | $port = $server['HTTP_X_FORWARDED_PORT']; |
303 | } | 309 | } |
304 | 310 | ||
311 | // This is a workaround for proxies that don't forward the scheme properly. | ||
312 | // Connecting over port 443 has to be in HTTPS. | ||
313 | // See https://github.com/shaarli/Shaarli/issues/1022 | ||
314 | if ($port == '443') { | ||
315 | $scheme = 'https'; | ||
316 | } | ||
317 | |||
305 | if (($scheme == 'http' && $port != '80') | 318 | if (($scheme == 'http' && $port != '80') |
306 | || ($scheme == 'https' && $port != '443') | 319 | || ($scheme == 'https' && $port != '443') |
307 | ) { | 320 | ) { |
diff --git a/application/Languages.php b/application/Languages.php index 357c7524..3eb3388f 100644 --- a/application/Languages.php +++ b/application/Languages.php | |||
@@ -69,6 +69,8 @@ class Languages | |||
69 | { | 69 | { |
70 | $this->conf = $conf; | 70 | $this->conf = $conf; |
71 | $confLanguage = $this->conf->get('translation.language', 'auto'); | 71 | $confLanguage = $this->conf->get('translation.language', 'auto'); |
72 | // Auto mode or invalid parameter, use the detected language. | ||
73 | // If the detected language is invalid, it doesn't matter, it will use English. | ||
72 | if ($confLanguage === 'auto' || ! $this->isValidLanguage($confLanguage)) { | 74 | if ($confLanguage === 'auto' || ! $this->isValidLanguage($confLanguage)) { |
73 | $this->language = substr($language, 0, 5); | 75 | $this->language = substr($language, 0, 5); |
74 | } else { | 76 | } else { |
diff --git a/application/LinkUtils.php b/application/LinkUtils.php index e3d95d08..3705f7e9 100644 --- a/application/LinkUtils.php +++ b/application/LinkUtils.php | |||
@@ -1,60 +1,81 @@ | |||
1 | <?php | 1 | <?php |
2 | 2 | ||
3 | /** | 3 | /** |
4 | * Extract title from an HTML document. | 4 | * Get cURL callback function for CURLOPT_WRITEFUNCTION |
5 | * | 5 | * |
6 | * @param string $html HTML content where to look for a title. | 6 | * @param string $charset to extract from the downloaded page (reference) |
7 | * @param string $title to extract from the downloaded page (reference) | ||
8 | * @param string $curlGetInfo Optionnaly overrides curl_getinfo function | ||
7 | * | 9 | * |
8 | * @return bool|string Extracted title if found, false otherwise. | 10 | * @return Closure |
9 | */ | 11 | */ |
10 | function html_extract_title($html) | 12 | function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo') |
11 | { | 13 | { |
12 | if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) { | 14 | /** |
13 | return trim(str_replace("\n", '', $matches[1])); | 15 | * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). |
14 | } | 16 | * |
15 | return false; | 17 | * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text' |
18 | * Then we extract the title and the charset and stop the download when it's done. | ||
19 | * | ||
20 | * @param resource $ch cURL resource | ||
21 | * @param string $data chunk of data being downloaded | ||
22 | * | ||
23 | * @return int|bool length of $data or false if we need to stop the download | ||
24 | */ | ||
25 | return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) { | ||
26 | $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); | ||
27 | if (!empty($responseCode) && $responseCode != 200) { | ||
28 | return false; | ||
29 | } | ||
30 | $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); | ||
31 | if (!empty($contentType) && strpos($contentType, 'text/html') === false) { | ||
32 | return false; | ||
33 | } | ||
34 | if (empty($charset)) { | ||
35 | $charset = header_extract_charset($contentType); | ||
36 | } | ||
37 | if (empty($charset)) { | ||
38 | $charset = html_extract_charset($data); | ||
39 | } | ||
40 | if (empty($title)) { | ||
41 | $title = html_extract_title($data); | ||
42 | } | ||
43 | // We got everything we want, stop the download. | ||
44 | if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) { | ||
45 | return false; | ||
46 | } | ||
47 | |||
48 | return strlen($data); | ||
49 | }; | ||
16 | } | 50 | } |
17 | 51 | ||
18 | /** | 52 | /** |
19 | * Determine charset from downloaded page. | 53 | * Extract title from an HTML document. |
20 | * Priority: | ||
21 | * 1. HTTP headers (Content type). | ||
22 | * 2. HTML content page (tag <meta charset>). | ||
23 | * 3. Use a default charset (default: UTF-8). | ||
24 | * | 54 | * |
25 | * @param array $headers HTTP headers array. | 55 | * @param string $html HTML content where to look for a title. |
26 | * @param string $htmlContent HTML content where to look for charset. | ||
27 | * @param string $defaultCharset Default charset to apply if other methods failed. | ||
28 | * | 56 | * |
29 | * @return string Determined charset. | 57 | * @return bool|string Extracted title if found, false otherwise. |
30 | */ | 58 | */ |
31 | function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8') | 59 | function html_extract_title($html) |
32 | { | 60 | { |
33 | if ($charset = headers_extract_charset($headers)) { | 61 | if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) { |
34 | return $charset; | 62 | return trim(str_replace("\n", '', $matches[1])); |
35 | } | ||
36 | |||
37 | if ($charset = html_extract_charset($htmlContent)) { | ||
38 | return $charset; | ||
39 | } | 63 | } |
40 | 64 | return false; | |
41 | return $defaultCharset; | ||
42 | } | 65 | } |
43 | 66 | ||
44 | /** | 67 | /** |
45 | * Extract charset from HTTP headers if it's defined. | 68 | * Extract charset from HTTP header if it's defined. |
46 | * | 69 | * |
47 | * @param array $headers HTTP headers array. | 70 | * @param string $header HTTP header Content-Type line. |
48 | * | 71 | * |
49 | * @return bool|string Charset string if found (lowercase), false otherwise. | 72 | * @return bool|string Charset string if found (lowercase), false otherwise. |
50 | */ | 73 | */ |
51 | function headers_extract_charset($headers) | 74 | function header_extract_charset($header) |
52 | { | 75 | { |
53 | if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) { | 76 | preg_match('/charset="?([^; ]+)/i', $header, $match); |
54 | preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match); | 77 | if (! empty($match[1])) { |
55 | if (! empty($match[1])) { | 78 | return strtolower(trim($match[1])); |
56 | return strtolower(trim($match[1])); | ||
57 | } | ||
58 | } | 79 | } |
59 | 80 | ||
60 | return false; | 81 | return false; |
diff --git a/application/Url.php b/application/Url.php index b3759377..21c17ecc 100644 --- a/application/Url.php +++ b/application/Url.php | |||
@@ -260,7 +260,7 @@ class Url | |||
260 | if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { | 260 | if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { |
261 | return $out; | 261 | return $out; |
262 | } | 262 | } |
263 | $asciiHost = idn_to_ascii($this->parts['host']); | 263 | $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46); |
264 | return str_replace($this->parts['host'], $asciiHost, $out); | 264 | return str_replace($this->parts['host'], $asciiHost, $out); |
265 | } | 265 | } |
266 | 266 | ||
diff --git a/application/config/ConfigPhp.php b/application/config/ConfigPhp.php index 2f66e8e0..8add8bcd 100644 --- a/application/config/ConfigPhp.php +++ b/application/config/ConfigPhp.php | |||
@@ -83,10 +83,10 @@ class ConfigPhp implements ConfigIO | |||
83 | 83 | ||
84 | $out = array(); | 84 | $out = array(); |
85 | foreach (self::$ROOT_KEYS as $key) { | 85 | foreach (self::$ROOT_KEYS as $key) { |
86 | $out[$key] = $GLOBALS[$key]; | 86 | $out[$key] = isset($GLOBALS[$key]) ? $GLOBALS[$key] : ''; |
87 | } | 87 | } |
88 | $out['config'] = $GLOBALS['config']; | 88 | $out['config'] = isset($GLOBALS['config']) ? $GLOBALS['config'] : []; |
89 | $out['plugins'] = !empty($GLOBALS['plugins']) ? $GLOBALS['plugins'] : array(); | 89 | $out['plugins'] = isset($GLOBALS['plugins']) ? $GLOBALS['plugins'] : []; |
90 | return $out; | 90 | return $out; |
91 | } | 91 | } |
92 | 92 | ||