diff options
author | ArthurHoaro <arthur@hoa.ro> | 2016-02-19 19:37:13 +0100 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2016-02-19 19:37:13 +0100 |
commit | 2925687e1e86dc113116330efd547b9db5c0f1a6 (patch) | |
tree | 706706ddfc9472e51494db912f9bee03972ce93f | |
parent | bfec695df1205864b46ca7175e1598b184602687 (diff) | |
download | Shaarli-2925687e1e86dc113116330efd547b9db5c0f1a6.tar.gz Shaarli-2925687e1e86dc113116330efd547b9db5c0f1a6.tar.zst Shaarli-2925687e1e86dc113116330efd547b9db5c0f1a6.zip |
Markdown: don't escape content + sanitize sensible tags
Instead of trying to fix broken content for Markdown parsing, parse it unescaped, then sanatize sensible tags such as scripts, etc.
-rw-r--r-- | application/Utils.php | 18 | ||||
-rw-r--r-- | plugins/markdown/markdown.php | 37 | ||||
-rw-r--r-- | tests/plugins/PluginMarkdownTest.php | 19 |
3 files changed, 58 insertions, 16 deletions
diff --git a/application/Utils.php b/application/Utils.php index 10d60698..868946df 100644 --- a/application/Utils.php +++ b/application/Utils.php | |||
@@ -62,7 +62,11 @@ function endsWith($haystack, $needle, $case=true) | |||
62 | } | 62 | } |
63 | 63 | ||
64 | /** | 64 | /** |
65 | * htmlspecialchars wrapper | 65 | * Htmlspecialchars wrapper |
66 | * | ||
67 | * @param string $str the string to escape. | ||
68 | * | ||
69 | * @return string escaped. | ||
66 | */ | 70 | */ |
67 | function escape($str) | 71 | function escape($str) |
68 | { | 72 | { |
@@ -70,6 +74,18 @@ function escape($str) | |||
70 | } | 74 | } |
71 | 75 | ||
72 | /** | 76 | /** |
77 | * Reverse the escape function. | ||
78 | * | ||
79 | * @param string $str the string to unescape. | ||
80 | * | ||
81 | * @return string unescaped string. | ||
82 | */ | ||
83 | function unescape($str) | ||
84 | { | ||
85 | return htmlspecialchars_decode($str); | ||
86 | } | ||
87 | |||
88 | /** | ||
73 | * Link sanitization before templating | 89 | * Link sanitization before templating |
74 | */ | 90 | */ |
75 | function sanitizeLink(&$link) | 91 | function sanitizeLink(&$link) |
diff --git a/plugins/markdown/markdown.php b/plugins/markdown/markdown.php index 3630ef14..a45b6574 100644 --- a/plugins/markdown/markdown.php +++ b/plugins/markdown/markdown.php | |||
@@ -117,23 +117,43 @@ function reverse_space2nbsp($description) | |||
117 | } | 117 | } |
118 | 118 | ||
119 | /** | 119 | /** |
120 | * Remove '>' at start of line auto generated by Shaarli core system | 120 | * Remove dangerous HTML tags (tags, iframe, etc.). |
121 | * to allow markdown blockquotes. | 121 | * Doesn't affect <code> content (already escaped by Parsedown). |
122 | * | 122 | * |
123 | * @param string $description input description text. | 123 | * @param string $description input description text. |
124 | * | 124 | * |
125 | * @return string $description without HTML links. | 125 | * @return string given string escaped. |
126 | */ | 126 | */ |
127 | function reset_quote_tags($description) | 127 | function sanitize_html($description) |
128 | { | 128 | { |
129 | return preg_replace('/^( *)> /m', '$1> ', $description); | 129 | $escapeTags = array( |
130 | 'script', | ||
131 | 'style', | ||
132 | 'link', | ||
133 | 'iframe', | ||
134 | 'frameset', | ||
135 | 'frame', | ||
136 | ); | ||
137 | foreach ($escapeTags as $tag) { | ||
138 | $description = preg_replace_callback( | ||
139 | '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is', | ||
140 | function ($match) { return escape($match[0]); }, | ||
141 | $description); | ||
142 | } | ||
143 | $description = preg_replace( | ||
144 | '#(<[^>]+)on[a-z]*="[^"]*"#is', | ||
145 | '$1', | ||
146 | $description); | ||
147 | return $description; | ||
130 | } | 148 | } |
131 | 149 | ||
132 | /** | 150 | /** |
133 | * Render shaare contents through Markdown parser. | 151 | * Render shaare contents through Markdown parser. |
134 | * 1. Remove HTML generated by Shaarli core. | 152 | * 1. Remove HTML generated by Shaarli core. |
135 | * 2. Generate markdown descriptions. | 153 | * 2. Reverse the escape function. |
136 | * 3. Wrap description in 'markdown' CSS class. | 154 | * 3. Generate markdown descriptions. |
155 | * 4. Sanitize sensible HTML tags for security. | ||
156 | * 5. Wrap description in 'markdown' CSS class. | ||
137 | * | 157 | * |
138 | * @param string $description input description text. | 158 | * @param string $description input description text. |
139 | * | 159 | * |
@@ -147,11 +167,12 @@ function process_markdown($description) | |||
147 | $processedDescription = reverse_text2clickable($processedDescription); | 167 | $processedDescription = reverse_text2clickable($processedDescription); |
148 | $processedDescription = reverse_nl2br($processedDescription); | 168 | $processedDescription = reverse_nl2br($processedDescription); |
149 | $processedDescription = reverse_space2nbsp($processedDescription); | 169 | $processedDescription = reverse_space2nbsp($processedDescription); |
150 | $processedDescription = reset_quote_tags($processedDescription); | 170 | $processedDescription = unescape($processedDescription); |
151 | $processedDescription = $parsedown | 171 | $processedDescription = $parsedown |
152 | ->setMarkupEscaped(false) | 172 | ->setMarkupEscaped(false) |
153 | ->setBreaksEnabled(true) | 173 | ->setBreaksEnabled(true) |
154 | ->text($processedDescription); | 174 | ->text($processedDescription); |
175 | $processedDescription = sanitize_html($processedDescription); | ||
155 | $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; | 176 | $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; |
156 | 177 | ||
157 | return $processedDescription; | 178 | return $processedDescription; |
diff --git a/tests/plugins/PluginMarkdownTest.php b/tests/plugins/PluginMarkdownTest.php index 455f5ba7..8e1a128a 100644 --- a/tests/plugins/PluginMarkdownTest.php +++ b/tests/plugins/PluginMarkdownTest.php | |||
@@ -100,13 +100,18 @@ class PluginMarkdownTest extends PHPUnit_Framework_TestCase | |||
100 | } | 100 | } |
101 | 101 | ||
102 | /** | 102 | /** |
103 | * Test reset_quote_tags() | 103 | * Test sanitize_html(). |
104 | */ | 104 | */ |
105 | function testResetQuoteTags() | 105 | function testSanitizeHtml() { |
106 | { | 106 | $input = '< script src="js.js"/>'; |
107 | $text = '> quote1'. PHP_EOL . ' > quote2 ' . PHP_EOL . 'noquote'; | 107 | $input .= '< script attr>alert(\'xss\');</script>'; |
108 | $processedText = escape($text); | 108 | $input .= '<style> * { display: none }</style>'; |
109 | $reversedText = reset_quote_tags($processedText); | 109 | $output = escape($input); |
110 | $this->assertEquals($text, $reversedText); | 110 | $input .= '<a href="#" onmouseHover="alert(\'xss\');" attr="tt">link</a>'; |
111 | $output .= '<a href="#" attr="tt">link</a>'; | ||
112 | $this->assertEquals($output, sanitize_html($input)); | ||
113 | // Do not touch escaped HTML. | ||
114 | $input = escape($input); | ||
115 | $this->assertEquals($input, sanitize_html($input)); | ||
111 | } | 116 | } |
112 | } | 117 | } |