From 2925687e1e86dc113116330efd547b9db5c0f1a6 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Fri, 19 Feb 2016 19:37:13 +0100 Subject: Markdown: don't escape content + sanitize sensible tags Instead of trying to fix broken content for Markdown parsing, parse it unescaped, then sanatize sensible tags such as scripts, etc. --- application/Utils.php | 18 +++++++++++++++++- plugins/markdown/markdown.php | 37 ++++++++++++++++++++++++++++-------- tests/plugins/PluginMarkdownTest.php | 19 +++++++++++------- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/application/Utils.php b/application/Utils.php index 10d60698..868946df 100644 --- a/application/Utils.php +++ b/application/Utils.php @@ -62,13 +62,29 @@ function endsWith($haystack, $needle, $case=true) } /** - * htmlspecialchars wrapper + * Htmlspecialchars wrapper + * + * @param string $str the string to escape. + * + * @return string escaped. */ function escape($str) { return htmlspecialchars($str, ENT_COMPAT, 'UTF-8', false); } +/** + * Reverse the escape function. + * + * @param string $str the string to unescape. + * + * @return string unescaped string. + */ +function unescape($str) +{ + return htmlspecialchars_decode($str); +} + /** * Link sanitization before templating */ diff --git a/plugins/markdown/markdown.php b/plugins/markdown/markdown.php index 3630ef14..a45b6574 100644 --- a/plugins/markdown/markdown.php +++ b/plugins/markdown/markdown.php @@ -117,23 +117,43 @@ function reverse_space2nbsp($description) } /** - * Remove '>' at start of line auto generated by Shaarli core system - * to allow markdown blockquotes. + * Remove dangerous HTML tags (tags, iframe, etc.). + * Doesn't affect content (already escaped by Parsedown). * * @param string $description input description text. * - * @return string $description without HTML links. + * @return string given string escaped. */ -function reset_quote_tags($description) +function sanitize_html($description) { - return preg_replace('/^( *)> /m', '$1> ', $description); + $escapeTags = array( + 'script', + 'style', + 'link', + 'iframe', + 'frameset', + 'frame', + ); + foreach ($escapeTags as $tag) { + $description = preg_replace_callback( + '#<\s*'. $tag .'[^>]*>(.*]*>)?#is', + function ($match) { return escape($match[0]); }, + $description); + } + $description = preg_replace( + '#(<[^>]+)on[a-z]*="[^"]*"#is', + '$1', + $description); + return $description; } /** * Render shaare contents through Markdown parser. * 1. Remove HTML generated by Shaarli core. - * 2. Generate markdown descriptions. - * 3. Wrap description in 'markdown' CSS class. + * 2. Reverse the escape function. + * 3. Generate markdown descriptions. + * 4. Sanitize sensible HTML tags for security. + * 5. Wrap description in 'markdown' CSS class. * * @param string $description input description text. * @@ -147,11 +167,12 @@ function process_markdown($description) $processedDescription = reverse_text2clickable($processedDescription); $processedDescription = reverse_nl2br($processedDescription); $processedDescription = reverse_space2nbsp($processedDescription); - $processedDescription = reset_quote_tags($processedDescription); + $processedDescription = unescape($processedDescription); $processedDescription = $parsedown ->setMarkupEscaped(false) ->setBreaksEnabled(true) ->text($processedDescription); + $processedDescription = sanitize_html($processedDescription); $processedDescription = '
'. $processedDescription . '
'; return $processedDescription; diff --git a/tests/plugins/PluginMarkdownTest.php b/tests/plugins/PluginMarkdownTest.php index 455f5ba7..8e1a128a 100644 --- a/tests/plugins/PluginMarkdownTest.php +++ b/tests/plugins/PluginMarkdownTest.php @@ -100,13 +100,18 @@ class PluginMarkdownTest extends PHPUnit_Framework_TestCase } /** - * Test reset_quote_tags() + * Test sanitize_html(). */ - function testResetQuoteTags() - { - $text = '> quote1'. PHP_EOL . ' > quote2 ' . PHP_EOL . 'noquote'; - $processedText = escape($text); - $reversedText = reset_quote_tags($processedText); - $this->assertEquals($text, $reversedText); + function testSanitizeHtml() { + $input = '< script src="js.js"/>'; + $input .= '< script attr>alert(\'xss\');'; + $input .= ''; + $output = escape($input); + $input .= 'link'; + $output .= 'link'; + $this->assertEquals($output, sanitize_html($input)); + // Do not touch escaped HTML. + $input = escape($input); + $this->assertEquals($input, sanitize_html($input)); } } -- cgit v1.2.3