1 files changed, 68 insertions, 42 deletions
diff --git a/application/LinkUtils.php b/application/LinkUtils.php
index cf58f808..3705f7e9 100644
--- a/application/LinkUtils.php
+++ b/application/LinkUtils.php
@@ -1,60 +1,81 @@
 <?php
 /**
- * Extract title from an HTML document.
+ * Get cURL callback function for CURLOPT_WRITEFUNCTION
 *
- * @param string $html HTML content where to look for a title.
+ * @param string $charset     to extract from the downloaded page (reference)
+ * @param string $title       to extract from the downloaded page (reference)
+ * @param string $curlGetInfo Optionnaly overrides curl_getinfo function
 *
- * @return bool|string Extracted title if found, false otherwise.
+ * @return Closure
 */
-function html_extract_title($html)
+function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
 {
-    if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
+    /**
-        return trim(str_replace("\n", '', $matches[1]));
+     * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
-    }
+     *
-    return false;
+     * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
+     * Then we extract the title and the charset and stop the download when it's done.
+     *
+     * @param resource $ch   cURL resource
+     * @param string   $data chunk of data being downloaded
+     *
+     * @return int|bool length of $data or false if we need to stop the download
+     */
+    return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) {
+        $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
+        if (!empty($responseCode) && $responseCode != 200) {
+            return false;
+        }
+        $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
+        if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
+            return false;
+        }
+        if (empty($charset)) {
+            $charset = header_extract_charset($contentType);
+        }
+        if (empty($charset)) {
+            $charset = html_extract_charset($data);
+        }
+        if (empty($title)) {
+            $title = html_extract_title($data);
+        }
+        // We got everything we want, stop the download.
+        if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
+            return false;
+        }
+        return strlen($data);
+    };
 }
 /**
- * Determine charset from downloaded page.
+ * Extract title from an HTML document.
- * Priority:
- *   1. HTTP headers (Content type).
- *   2. HTML content page (tag <meta charset>).
- *   3. Use a default charset (default: UTF-8).
 *
- * @param array  $headers           HTTP headers array.
+ * @param string $html HTML content where to look for a title.
- * @param string $htmlContent       HTML content where to look for charset.
- * @param string $defaultCharset    Default charset to apply if other methods failed.
 *
- * @return string Determined charset.
+ * @return bool|string Extracted title if found, false otherwise.
 */
-function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')
+function html_extract_title($html)
 {
-    if ($charset = headers_extract_charset($headers)) {
+    if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
-        return $charset;
+        return trim(str_replace("\n", '', $matches[1]));
-    }
-    if ($charset = html_extract_charset($htmlContent)) {
-        return $charset;
    }
+    return false;
-    return $defaultCharset;
 }
 /**
- * Extract charset from HTTP headers if it's defined.
+ * Extract charset from HTTP header if it's defined.
 *
- * @param array $headers HTTP headers array.
+ * @param string $header HTTP header Content-Type line.
 *
 * @return bool|string Charset string if found (lowercase), false otherwise.
 */
-function headers_extract_charset($headers)
+function header_extract_charset($header)
 {
-    if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {
+    preg_match('/charset="?([^; ]+)/i', $header, $match);
-        preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);
+    if (! empty($match[1])) {
-        if (! empty($match[1])) {
+        return strtolower(trim($match[1]));
-            return strtolower(trim($match[1]));
-        }
    }
    return false;
@@ -89,7 +110,9 @@ function count_private($links)
 {
    $cpt = 0;
    foreach ($links as $link) {
-        $cpt = $link['private'] == true ? $cpt + 1 : $cpt;
+        if ($link['private']) {
+            $cpt += 1;
+        }
    }
    return $cpt;
@@ -100,14 +123,15 @@ function count_private($links)
 *
 * @param string $text       input string.
 * @param string $redirector if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode  Use `urlencode()` on the URL after the redirector or not.
 *
 * @return string returns $text with all links converted to HTML links.
 *
 * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
 */
-function text2clickable($text, $redirector = '')
+function text2clickable($text, $redirector = '', $urlEncode = true)
 {
-    $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[[:alnum:]]/?)!si';
+    $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
    if (empty($redirector)) {
        return preg_replace($regex, '<a href="$1">$1</a>', $text);
@@ -115,8 +139,9 @@ function text2clickable($text, $redirector = '')
    // Redirector is set, urlencode the final URL.
    return preg_replace_callback(
        $regex,
-        function ($matches) use ($redirector) {
+        function ($matches) use ($redirector, $urlEncode) {
-            return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>';
+            $url = $urlEncode ? urlencode($matches[1]) : $matches[1];
+            return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>';
        },
        $text
    );
@@ -162,12 +187,13 @@ function space2nbsp($text)
 *
 * @param string $description shaare's description.
 * @param string $redirector  if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode  Use `urlencode()` on the URL after the redirector or not.
 * @param string $indexUrl    URL to Shaarli's index.
- *
 * @return string formatted description.
 */
-function format_description($description, $redirector = '', $indexUrl = '') {
+function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '') {
-    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl)));
+    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
 }
 /**

diff --git a/application/LinkUtils.php b/application/LinkUtils.php index cf58f808..3705f7e9 100644 --- a/application/LinkUtils.php +++ b/application/LinkUtils.php
@@ -1,60 +1,81 @@
1	<?php	1	<?php
2		2
3	/**	3	/**
4	* Extract title from an HTML document.	4	* Get cURL callback function for CURLOPT_WRITEFUNCTION
5	*	5	*
6	* @param string $html HTML content where to look for a title.	6	* @param string $charset to extract from the downloaded page (reference)
		7	* @param string $title to extract from the downloaded page (reference)
		8	* @param string $curlGetInfo Optionnaly overrides curl_getinfo function
7	*	9	*
8	* @return bool\|string Extracted title if found, false otherwise.	10	* @return Closure
9	*/	11	*/
10	function html_extract_title($html)	12	function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
11	{	13	{
12	if (preg_match('!<title.?>(.?)</title>!is', $html, $matches)) {	14	/**
13	return trim(str_replace("\n", '', $matches[1]));	15	* cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
14	}	16	*
15	return false;	17	* While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
		18	* Then we extract the title and the charset and stop the download when it's done.
		19	*
		20	* @param resource $ch cURL resource
		21	* @param string $data chunk of data being downloaded
		22	*
		23	* @return int\|bool length of $data or false if we need to stop the download
		24	*/
		25	return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) {
		26	$responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
		27	if (!empty($responseCode) && $responseCode != 200) {
		28	return false;
		29	}
		30	$contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
		31	if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
		32	return false;
		33	}
		34	if (empty($charset)) {
		35	$charset = header_extract_charset($contentType);
		36	}
		37	if (empty($charset)) {
		38	$charset = html_extract_charset($data);
		39	}
		40	if (empty($title)) {
		41	$title = html_extract_title($data);
		42	}
		43	// We got everything we want, stop the download.
		44	if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
		45	return false;
		46	}
		47
		48	return strlen($data);
		49	};
16	}	50	}
17		51
18	/**	52	/**
19	* Determine charset from downloaded page.	53	* Extract title from an HTML document.
20	* Priority:
21	* 1. HTTP headers (Content type).
22	* 2. HTML content page (tag <meta charset>).
23	* 3. Use a default charset (default: UTF-8).
24	*	54	*
25	* @param array $headers HTTP headers array.	55	* @param string $html HTML content where to look for a title.
26	* @param string $htmlContent HTML content where to look for charset.
27	* @param string $defaultCharset Default charset to apply if other methods failed.
28	*	56	*
29	* @return string Determined charset.	57	* @return bool\|string Extracted title if found, false otherwise.
30	*/	58	*/
31	function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8')	59	function html_extract_title($html)
32	{	60	{
33	if ($charset = headers_extract_charset($headers)) {	61	if (preg_match('!<title.?>(.?)</title>!is', $html, $matches)) {
34	return $charset;	62	return trim(str_replace("\n", '', $matches[1]));
35	}
36
37	if ($charset = html_extract_charset($htmlContent)) {
38	return $charset;
39	}	63	}
40		64	return false;
41	return $defaultCharset;
42	}	65	}
43		66
44	/**	67	/**
45	* Extract charset from HTTP headers if it's defined.	68	* Extract charset from HTTP header if it's defined.
46	*	69	*
47	* @param array $headers HTTP headers array.	70	* @param string $header HTTP header Content-Type line.
48	*	71	*
49	* @return bool\|string Charset string if found (lowercase), false otherwise.	72	* @return bool\|string Charset string if found (lowercase), false otherwise.
50	*/	73	*/
51	function headers_extract_charset($headers)	74	function header_extract_charset($header)
52	{	75	{
53	if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) {	76	preg_match('/charset="?([^; ]+)/i', $header, $match);
54	preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match);	77	if (! empty($match[1])) {
55	if (! empty($match[1])) {	78	return strtolower(trim($match[1]));
56	return strtolower(trim($match[1]));
57	}
58	}	79	}
59		80
60	return false;	81	return false;
@@ -89,7 +110,9 @@ function count_private($links)
89	{	110	{
90	$cpt = 0;	111	$cpt = 0;
91	foreach ($links as $link) {	112	foreach ($links as $link) {
92	$cpt = $link['private'] == true ? $cpt + 1 : $cpt;	113	if ($link['private']) {
		114	$cpt += 1;
		115	}
93	}	116	}
94		117
95	return $cpt;	118	return $cpt;
@@ -100,14 +123,15 @@ function count_private($links)
100	*	123	*
101	* @param string $text input string.	124	* @param string $text input string.
102	* @param string $redirector if a redirector is set, use it to gerenate links.	125	* @param string $redirector if a redirector is set, use it to gerenate links.
		126	* @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not.
103	*	127	*
104	* @return string returns $text with all links converted to HTML links.	128	* @return string returns $text with all links converted to HTML links.
105	*	129	*
106	* @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722	130	* @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
107	*/	131	*/
108	function text2clickable($text, $redirector = '')	132	function text2clickable($text, $redirector = '', $urlEncode = true)
109	{	133	{
110	$regex = '!(((?:https?\|ftp\|file)://\|apt:\|magnet:)\S+[[:alnum:]]/?)!si';	134	$regex = '!(((?:https?\|ftp\|file)://\|apt:\|magnet:)\S+[a-z0-9\(\)]/?)!si';
111		135
112	if (empty($redirector)) {	136	if (empty($redirector)) {
113	return preg_replace($regex, '<a href="$1">$1</a>', $text);	137	return preg_replace($regex, '<a href="$1">$1</a>', $text);
@@ -115,8 +139,9 @@ function text2clickable($text, $redirector = '')
115	// Redirector is set, urlencode the final URL.	139	// Redirector is set, urlencode the final URL.
116	return preg_replace_callback(	140	return preg_replace_callback(
117	$regex,	141	$regex,
118	function ($matches) use ($redirector) {	142	function ($matches) use ($redirector, $urlEncode) {
119	return '<a href="' . $redirector . urlencode($matches[1]) .'">'. $matches[1] .'</a>';	143	$url = $urlEncode ? urlencode($matches[1]) : $matches[1];
		144	return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>';
120	},	145	},
121	$text	146	$text
122	);	147	);
@@ -162,12 +187,13 @@ function space2nbsp($text)
162	*	187	*
163	* @param string $description shaare's description.	188	* @param string $description shaare's description.
164	* @param string $redirector if a redirector is set, use it to gerenate links.	189	* @param string $redirector if a redirector is set, use it to gerenate links.
		190	* @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not.
165	* @param string $indexUrl URL to Shaarli's index.	191	* @param string $indexUrl URL to Shaarli's index.
166	*	192
167	* @return string formatted description.	193	* @return string formatted description.
168	*/	194	*/
169	function format_description($description, $redirector = '', $indexUrl = '') {	195	function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '') {
170	return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl)));	196	return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
171	}	197	}
172		198
173	/**	199	/**