From 1ea09a1b8b8b7f68ec8c7ef069393ee58a0e623a Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sat, 26 Sep 2020 13:28:38 +0200 Subject: Fix warning if the encoding retrieved from external headers is invalid Also fixed the regex to support this failing header: charset="utf-8"\r\n" --- application/bookmark/LinkUtils.php | 2 +- .../front/controller/admin/ManageShaareController.php | 2 +- tests/bookmark/LinkUtilsTest.php | 13 +++++++++++++ 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index 68914fca..e7af4d55 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php @@ -26,7 +26,7 @@ function html_extract_title($html) */ function header_extract_charset($header) { - preg_match('/charset="?([^; ]+)/i', $header, $match); + preg_match('/charset=["\']?([^; "\']+)/i', $header, $match); if (! empty($match[1])) { return strtolower(trim($match[1])); } diff --git a/application/front/controller/admin/ManageShaareController.php b/application/front/controller/admin/ManageShaareController.php index ca2da9b5..ffb0dae4 100644 --- a/application/front/controller/admin/ManageShaareController.php +++ b/application/front/controller/admin/ManageShaareController.php @@ -69,7 +69,7 @@ class ManageShaareController extends ShaarliAdminController $retrieveDescription ) ); - if (! empty($title) && strtolower($charset) !== 'utf-8') { + if (! empty($title) && strtolower($charset) !== 'utf-8' && mb_check_encoding($charset)) { $title = mb_convert_encoding($title, 'utf-8', $charset); } } diff --git a/tests/bookmark/LinkUtilsTest.php b/tests/bookmark/LinkUtilsTest.php index 7d4a7b89..0d07897b 100644 --- a/tests/bookmark/LinkUtilsTest.php +++ b/tests/bookmark/LinkUtilsTest.php @@ -42,6 +42,19 @@ class LinkUtilsTest extends TestCase $this->assertEquals(strtolower($charset), header_extract_charset($headers)); } + /** + * Test headers_extract_charset() when the charset is found with odd quotes. + */ + public function testHeadersExtractExistentCharsetWithQuotes() + { + $charset = 'x-MacCroatian'; + $headers = 'text/html; charset="' . $charset . '"otherstuff="test"'; + $this->assertEquals(strtolower($charset), header_extract_charset($headers)); + + $headers = 'text/html; charset=\'' . $charset . '\'otherstuff="test"'; + $this->assertEquals(strtolower($charset), header_extract_charset($headers)); + } + /** * Test headers_extract_charset() when the charset is not found. */ -- cgit v1.2.3