aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-09-26 13:28:38 +0200
committerArthurHoaro <arthur@hoa.ro>2020-09-30 11:11:19 +0200
commit1ea09a1b8b8b7f68ec8c7ef069393ee58a0e623a (patch)
treeaa002767f8d59c11c71fab3d1b27d22978cb85ec
parentd0ae1ba273b6decea8d35ef79bfabb055b0fb6df (diff)
downloadShaarli-1ea09a1b8b8b7f68ec8c7ef069393ee58a0e623a.tar.gz
Shaarli-1ea09a1b8b8b7f68ec8c7ef069393ee58a0e623a.tar.zst
Shaarli-1ea09a1b8b8b7f68ec8c7ef069393ee58a0e623a.zip
Fix warning if the encoding retrieved from external headers is invalid
Also fixed the regex to support this failing header: charset="utf-8"\r\n"
-rw-r--r--application/bookmark/LinkUtils.php2
-rw-r--r--application/front/controller/admin/ManageShaareController.php2
-rw-r--r--tests/bookmark/LinkUtilsTest.php13
3 files changed, 15 insertions, 2 deletions
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
index 68914fca..e7af4d55 100644
--- a/application/bookmark/LinkUtils.php
+++ b/application/bookmark/LinkUtils.php
@@ -26,7 +26,7 @@ function html_extract_title($html)
26 */ 26 */
27function header_extract_charset($header) 27function header_extract_charset($header)
28{ 28{
29 preg_match('/charset="?([^; ]+)/i', $header, $match); 29 preg_match('/charset=["\']?([^; "\']+)/i', $header, $match);
30 if (! empty($match[1])) { 30 if (! empty($match[1])) {
31 return strtolower(trim($match[1])); 31 return strtolower(trim($match[1]));
32 } 32 }
diff --git a/application/front/controller/admin/ManageShaareController.php b/application/front/controller/admin/ManageShaareController.php
index ca2da9b5..ffb0dae4 100644
--- a/application/front/controller/admin/ManageShaareController.php
+++ b/application/front/controller/admin/ManageShaareController.php
@@ -69,7 +69,7 @@ class ManageShaareController extends ShaarliAdminController
69 $retrieveDescription 69 $retrieveDescription
70 ) 70 )
71 ); 71 );
72 if (! empty($title) && strtolower($charset) !== 'utf-8') { 72 if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) {
73 $title = mb_convert_encoding($title, 'utf-8', $charset); 73 $title = mb_convert_encoding($title, 'utf-8', $charset);
74 } 74 }
75 } 75 }
diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php
index 7d4a7b89..0d07897b 100644
--- a/tests/bookmark/LinkUtilsTest.php
+++ b/tests/bookmark/LinkUtilsTest.php
@@ -43,6 +43,19 @@ class LinkUtilsTest extends TestCase
43 } 43 }
44 44
45 /** 45 /**
46 * Test headers_extract_charset() when the charset is found with odd quotes.
47 */
48 public function testHeadersExtractExistentCharsetWithQuotes()
49 {
50 $charset = 'x-MacCroatian';
51 $headers = 'text/html; charset="' . $charset . '"otherstuff="test"';
52 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
53
54 $headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"';
55 $this->assertEquals(strtolower($charset), header_extract_charset($headers));
56 }
57
58 /**
46 * Test headers_extract_charset() when the charset is not found. 59 * Test headers_extract_charset() when the charset is not found.
47 */ 60 */
48 public function testHeadersExtractNonExistentCharset() 61 public function testHeadersExtractNonExistentCharset()