From ee6f4b64a91d76070f930cdf7602ab4686714c7a Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Fri, 6 Jan 2017 18:54:29 +0100 Subject: Cleanup: use safe boolean comparisons Signed-off-by: VirtualTam --- application/LinkUtils.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'application/LinkUtils.php') diff --git a/application/LinkUtils.php b/application/LinkUtils.php index cf58f808..976474de 100644 --- a/application/LinkUtils.php +++ b/application/LinkUtils.php @@ -89,7 +89,9 @@ function count_private($links) { $cpt = 0; foreach ($links as $link) { - $cpt = $link['private'] == true ? $cpt + 1 : $cpt; + if ($link['private']) { + $cpt += 1; + } } return $cpt; -- cgit v1.2.3 From 601faf97516a836e4ae57dc4cecb9225c0a04338 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Fri, 29 Sep 2017 18:52:38 +0200 Subject: Fix parsing for description links with parentheses With markdown plugin disabled relates to #966 --- application/LinkUtils.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'application/LinkUtils.php') diff --git a/application/LinkUtils.php b/application/LinkUtils.php index 976474de..267e62cd 100644 --- a/application/LinkUtils.php +++ b/application/LinkUtils.php @@ -109,7 +109,7 @@ function count_private($links) */ function text2clickable($text, $redirector = '') { - $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[[:alnum:]]/?)!si'; + $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si'; if (empty($redirector)) { return preg_replace($regex, '$1', $text); -- cgit v1.2.3 From d65342e304f92643ba922200953cfebc51e1e482 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sat, 30 Sep 2017 11:04:13 +0200 Subject: Extract the title/charset during page download, and check content type Use CURLOPT_WRITEFUNCTION to check the response code and content type (only allow HTML). Also extract the title and charset during downloading chunk of data, and stop it when everything has been extracted. Closes #579 --- application/LinkUtils.php | 89 +++++++++++++++++++++++++++++------------------ 1 file changed, 55 insertions(+), 34 deletions(-) (limited to 'application/LinkUtils.php') diff --git a/application/LinkUtils.php b/application/LinkUtils.php index 976474de..c0dd32a6 100644 --- a/application/LinkUtils.php +++ b/application/LinkUtils.php @@ -1,60 +1,81 @@ (.*?)!is', $html, $matches)) { - return trim(str_replace("\n", '', $matches[1])); - } - return false; + /** + * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). + * + * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text' + * Then we extract the title and the charset and stop the download when it's done. + * + * @param resource $ch cURL resource + * @param string $data chunk of data being downloaded + * + * @return int|bool length of $data or false if we need to stop the download + */ + return function(&$ch, $data) use ($curlGetInfo, &$charset, &$title) { + $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); + if (!empty($responseCode) && $responseCode != 200) { + return false; + } + $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); + if (!empty($contentType) && strpos($contentType, 'text/html') === false) { + return false; + } + if (empty($charset)) { + $charset = header_extract_charset($contentType); + } + if (empty($charset)) { + $charset = html_extract_charset($data); + } + if (empty($title)) { + $title = html_extract_title($data); + } + // We got everything we want, stop the download. + if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) { + return false; + } + + return strlen($data); + }; } /** - * Determine charset from downloaded page. - * Priority: - * 1. HTTP headers (Content type). - * 2. HTML content page (tag ). - * 3. Use a default charset (default: UTF-8). + * Extract title from an HTML document. * - * @param array $headers HTTP headers array. - * @param string $htmlContent HTML content where to look for charset. - * @param string $defaultCharset Default charset to apply if other methods failed. + * @param string $html HTML content where to look for a title. * - * @return string Determined charset. + * @return bool|string Extracted title if found, false otherwise. */ -function get_charset($headers, $htmlContent, $defaultCharset = 'utf-8') +function html_extract_title($html) { - if ($charset = headers_extract_charset($headers)) { - return $charset; - } - - if ($charset = html_extract_charset($htmlContent)) { - return $charset; + if (preg_match('!(.*?)!is', $html, $matches)) { + return trim(str_replace("\n", '', $matches[1])); } - - return $defaultCharset; + return false; } /** - * Extract charset from HTTP headers if it's defined. + * Extract charset from HTTP header if it's defined. * - * @param array $headers HTTP headers array. + * @param string $header HTTP header Content-Type line. * * @return bool|string Charset string if found (lowercase), false otherwise. */ -function headers_extract_charset($headers) +function header_extract_charset($header) { - if (! empty($headers['Content-Type']) && strpos($headers['Content-Type'], 'charset=') !== false) { - preg_match('/charset="?([^; ]+)/i', $headers['Content-Type'], $match); - if (! empty($match[1])) { - return strtolower(trim($match[1])); - } + preg_match('/charset="?([^; ]+)/i', $header, $match); + if (! empty($match[1])) { + return strtolower(trim($match[1])); } return false; -- cgit v1.2.3 From fd08b50a80c3aed25f9e2a19cbfe9fb3ad35cf1f Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Tue, 7 Nov 2017 20:23:58 +0100 Subject: Don't URL encode description links if parameter 'redirector.encode_url' is set to false --- application/LinkUtils.php | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'application/LinkUtils.php') diff --git a/application/LinkUtils.php b/application/LinkUtils.php index 267e62cd..e3d95d08 100644 --- a/application/LinkUtils.php +++ b/application/LinkUtils.php @@ -102,12 +102,13 @@ function count_private($links) * * @param string $text input string. * @param string $redirector if a redirector is set, use it to gerenate links. + * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not. * * @return string returns $text with all links converted to HTML links. * * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722 */ -function text2clickable($text, $redirector = '') +function text2clickable($text, $redirector = '', $urlEncode = true) { $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si'; @@ -117,8 +118,9 @@ function text2clickable($text, $redirector = '') // Redirector is set, urlencode the final URL. return preg_replace_callback( $regex, - function ($matches) use ($redirector) { - return ''. $matches[1] .''; + function ($matches) use ($redirector, $urlEncode) { + $url = $urlEncode ? urlencode($matches[1]) : $matches[1]; + return ''. $matches[1] .''; }, $text ); @@ -164,12 +166,13 @@ function space2nbsp($text) * * @param string $description shaare's description. * @param string $redirector if a redirector is set, use it to gerenate links. + * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not. * @param string $indexUrl URL to Shaarli's index. - * + * @return string formatted description. */ -function format_description($description, $redirector = '', $indexUrl = '') { - return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector), $indexUrl))); +function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '') { + return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl))); } /** -- cgit v1.2.3