aboutsummaryrefslogtreecommitdiffhomepage
path: root/application
diff options
context:
space:
mode:
Diffstat (limited to 'application')
-rw-r--r--application/HttpUtils.php21
-rw-r--r--application/Languages.php2
-rw-r--r--application/LinkUtils.php89
-rw-r--r--application/Url.php2
-rw-r--r--application/config/ConfigPhp.php6
5 files changed, 78 insertions, 42 deletions
diff --git a/application/HttpUtils.php b/application/HttpUtils.php
index ec54dcd4..83a4c5e2 100644
--- a/application/HttpUtils.php
+++ b/application/HttpUtils.php
@@ -3,9 +3,11 @@
3 * GET an HTTP URL to retrieve its content 3 * GET an HTTP URL to retrieve its content
4 * Uses the cURL library or a fallback method 4 * Uses the cURL library or a fallback method
5 * 5 *
6 * @param string $url URL to get (http://...) 6 * @param string $url URL to get (http://...)
7 * @param int $timeout network timeout (in seconds) 7 * @param int $timeout network timeout (in seconds)
8 * @param int $maxBytes maximum downloaded bytes (default: 4 MiB) 8 * @param int $maxBytes maximum downloaded bytes (default: 4 MiB)
9 * @param callable|string $curlWriteFunction Optional callback called during the download (cURL CURLOPT_WRITEFUNCTION).
10 * Can be used to add download conditions on the headers (response code, content type, etc.).
9 * 11 *
10 * @return array HTTP response headers, downloaded content 12 * @return array HTTP response headers, downloaded content
11 * 13 *
@@ -29,7 +31,7 @@
29 * @see http://stackoverflow.com/q/9183178 31 * @see http://stackoverflow.com/q/9183178
30 * @see http://stackoverflow.com/q/1462720 32 * @see http://stackoverflow.com/q/1462720
31 */ 33 */
32function get_http_response($url, $timeout = 30, $maxBytes = 4194304) 34function get_http_response($url, $timeout = 30, $maxBytes = 4194304, $curlWriteFunction = null)
33{ 35{
34 $urlObj = new Url($url); 36 $urlObj = new Url($url);
35 $cleanUrl = $urlObj->idnToAscii(); 37 $cleanUrl = $urlObj->idnToAscii();
@@ -75,6 +77,10 @@ function get_http_response($url, $timeout = 30, $maxBytes = 4194304)
75 curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); 77 curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
76 curl_setopt($ch, CURLOPT_USERAGENT, $userAgent); 78 curl_setopt($ch, CURLOPT_USERAGENT, $userAgent);
77 79
80 if (is_callable($curlWriteFunction)) {
81 curl_setopt($ch, CURLOPT_WRITEFUNCTION, $curlWriteFunction);
82 }
83
78 // Max download size management 84 // Max download size management
79 curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16); 85 curl_setopt($ch, CURLOPT_BUFFERSIZE, 1024*16);
80 curl_setopt($ch, CURLOPT_NOPROGRESS, false); 86 curl_setopt($ch, CURLOPT_NOPROGRESS, false);
@@ -302,6 +308,13 @@ function server_url($server)
302 $port = $server['HTTP_X_FORWARDED_PORT']; 308 $port = $server['HTTP_X_FORWARDED_PORT'];
303 } 309 }
304 310
311 // This is a workaround for proxies that don't forward the scheme properly.
312 // Connecting over port 443 has to be in HTTPS.
313 // See https://github.com/shaarli/Shaarli/issues/1022
314 if ($port == '443') {
315 $scheme = 'https';
316 }
317
305 if (($scheme == 'http' && $port != '80') 318 if (($scheme == 'http' && $port != '80')
306 || ($scheme == 'https' && $port != '443') 319 || ($scheme == 'https' && $port != '443')
307 ) { 320 ) {
diff --git a/application/Languages.php b/application/Languages.php
index 357c7524..3eb3388f 100644
--- a/application/Languages.php
+++ b/application/Languages.php
@@ -69,6 +69,8 @@ class Languages
69 { 69 {
70 $this->conf = $conf; 70 $this->conf = $conf;
71 $confLanguage = $this->conf->get('translation.language', 'auto'); 71 $confLanguage = $this->conf->get('translation.language', 'auto');
72 // Auto mode or invalid parameter, use the detected language.
73 // If the detected language is invalid, it doesn't matter, it will use English.
72 if ($confLanguage === 'auto' || ! $this->isValidLanguage($confLanguage)) { 74 if ($confLanguage === 'auto' || ! $this->isValidLanguage($confLanguage)) {
73 $this->language = substr($language, 0, 5); 75 $this->language = substr($language, 0, 5);
74 } else { 76 } else {
diff --git a/application/LinkUtils.php b/application/LinkUtils.php
index e3d95d08..3705f7e9 100644
--- a/application/LinkUtils.php
+++ b/application/LinkUtils.php
@@ -1,60 +1,81 @@
1<?php 1<?php
2 2
3/** 3/**
4 * Extract title from an HTML document. 4 * Get cURL callback function for CURLOPT_WRITEFUNCTION
5 * 5 *
6 * @param string $html HTML content where to look for a title. 6 * @param string $charset to extract from the downloaded page (reference)
7 * @param string $title to extract from the downloaded page (reference)
8 * @param string $curlGetInfo Optionnaly overrides curl_getinfo function
7 * 9 *
8 * @return bool|string Extracted title if found, false otherwise. 10 * @return Closure
9 */ 11 */
10function html_extract_title($html) 12function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
11{ 13{
12 if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) { 14 /**
13 return trim(str_replace("\n", '', $matches[1])); 15 * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
14 } 16 *
15 return false; 17 * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
18 * Then we extract the title and the charset and stop the download when it's done.
19 *
20 * @param resource $ch cURL resource
21 * @param string $data chunk of data being downloaded
22 *
23 * @return int|bool length of $data or false if we need to stop the download
24 */
25 return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) {
26 $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
27 if (!empty($responseCode) && $responseCode != 200) {
28 return false;
29 }
30 $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
31 if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
32 return false;
33 }
34 if (empty($charset)) {
35 $charset = header_extract_charset($contentType);
36 }
37 if (empty($charset)) {
38 $charset = html_extract_charset($data);
39 }
40 if (empty($title)) {
41 $title = html_extract_title($data);
42 }
43 // We got everything we want, stop the download.
44 if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
45 return false;
46 }
47
48 return strlen($data);
49 };
16} 50}
17 51
18/** 52/**
19 * Determine charset from downloaded page. 53 * Extract title from an HTML document.
20 * Priority:
21 * 1. HTTP headers (Content type).
22 * 2. HTML content page (tag <meta charset>).
23 * 3. Use a default charset (default: UTF-8).
24 * 54 *
25 * @param array $headers HTTP headers array. 55 * @param string $html HTML content where to look for a title.
26 * @param string $htmlContent HTML content where to look for charset.
27 * @param string $defaultCharset Default charset to apply if other methods failed.
28 * 56 *
29 * @return string Determined charset. 57 * @return bool|string Extracted title if found, false otherwise.
30 */ 58 */
31function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8') 59function html_extract_title($html)
32{ 60{
33 if ($charset = headers_extract_charset($headers)) { 61 if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
34 return $charset; 62 return trim(str_replace("\n", '', $matches[1]));
35 }
36
37 if ($charset = html_extract_charset($htmlContent)) {
38 return $charset;
39 } 63 }
40 64 return false;
41 return $defaultCharset;
42} 65}
43 66
44/** 67/**
45 * Extract charset from HTTP headers if it's defined. 68 * Extract charset from HTTP header if it's defined.
46 * 69 *
47 * @param array $headers HTTP headers array. 70 * @param string $header HTTP header Content-Type line.
48 * 71 *
49 * @return bool|string Charset string if found (lowercase), false otherwise. 72 * @return bool|string Charset string if found (lowercase), false otherwise.
50 */ 73 */
51function headers_extract_charset($headers) 74function header_extract_charset($header)
52{ 75{
53 if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) { 76 preg_match('/charset="?([^; ]+)/i', $header, $match);
54 preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match); 77 if (! empty($match[1])) {
55 if (! empty($match[1])) { 78 return strtolower(trim($match[1]));
56 return strtolower(trim($match[1]));
57 }
58 } 79 }
59 80
60 return false; 81 return false;
diff --git a/application/Url.php b/application/Url.php
index b3759377..21c17ecc 100644
--- a/application/Url.php
+++ b/application/Url.php
@@ -260,7 +260,7 @@ class Url
260 if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) { 260 if (! function_exists('idn_to_ascii') || ! isset($this->parts['host'])) {
261 return $out; 261 return $out;
262 } 262 }
263 $asciiHost = idn_to_ascii($this->parts['host']); 263 $asciiHost = idn_to_ascii($this->parts['host'], 0, INTL_IDNA_VARIANT_UTS46);
264 return str_replace($this->parts['host'], $asciiHost, $out); 264 return str_replace($this->parts['host'], $asciiHost, $out);
265 } 265 }
266 266
diff --git a/application/config/ConfigPhp.php b/application/config/ConfigPhp.php
index 2f66e8e0..8add8bcd 100644
--- a/application/config/ConfigPhp.php
+++ b/application/config/ConfigPhp.php
@@ -83,10 +83,10 @@ class ConfigPhp implements ConfigIO
83 83
84 $out = array(); 84 $out = array();
85 foreach (self::$ROOT_KEYS as $key) { 85 foreach (self::$ROOT_KEYS as $key) {
86 $out[$key] = $GLOBALS[$key]; 86 $out[$key] = isset($GLOBALS[$key]) ? $GLOBALS[$key] : '';
87 } 87 }
88 $out['config'] = $GLOBALS['config']; 88 $out['config'] = isset($GLOBALS['config']) ? $GLOBALS['config'] : [];
89 $out['plugins'] = !empty($GLOBALS['plugins']) ? $GLOBALS['plugins'] : array(); 89 $out['plugins'] = isset($GLOBALS['plugins']) ? $GLOBALS['plugins'] : [];
90 return $out; 90 return $out;
91 } 91 }
92 92