From 336a28fa4a09b968ce4705900bf57693e672f0bf Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sat, 25 May 2019 15:46:47 +0200 Subject: Introduce Bookmark object and Service layer to retrieve them See https://github.com/shaarli/Shaarli/issues/1307 for details --- .../formatter/BookmarkMarkdownFormatter.php | 198 +++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 application/formatter/BookmarkMarkdownFormatter.php (limited to 'application/formatter/BookmarkMarkdownFormatter.php') diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php new file mode 100644 index 00000000..f60c61f4 --- /dev/null +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -0,0 +1,198 @@ +parsedown = new \Parsedown(); + $this->escape = $conf->get('security.markdown_escape', true); + $this->allowedProtocols = $conf->get('security.allowed_protocols', []); + } + + /** + * @inheritdoc + */ + public function formatDescription($bookmark) + { + if (in_array(self::NO_MD_TAG, $bookmark->getTags())) { + return parent::formatDescription($bookmark); + } + + $processedDescription = $bookmark->getDescription(); + $processedDescription = $this->filterProtocols($processedDescription); + $processedDescription = $this->formatHashTags($processedDescription); + $processedDescription = $this->parsedown + ->setMarkupEscaped($this->escape) + ->setBreaksEnabled(true) + ->text($processedDescription); + $processedDescription = $this->sanitizeHtml($processedDescription); + + if (!empty($processedDescription)) { + $processedDescription = '
'. $processedDescription . '
'; + } + + return $processedDescription; + } + + /** + * Remove the NO markdown tag if it is present + * + * @inheritdoc + */ + protected function formatTagList($bookmark) + { + $out = parent::formatTagList($bookmark); + if (($pos = array_search(self::NO_MD_TAG, $out)) !== false) { + unset($out[$pos]); + return array_values($out); + } + return $out; + } + + /** + * Replace not whitelisted protocols with http:// in given description. + * Also adds `index_url` to relative links if it's specified + * + * @param string $description input description text. + * + * @return string $description without malicious link. + */ + protected function filterProtocols($description) + { + $allowedProtocols = $this->allowedProtocols; + $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; + + return preg_replace_callback( + '#]\((.*?)\)#is', + function ($match) use ($allowedProtocols, $indexUrl) { + $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : ''; + $link .= whitelist_protocols($match[1], $allowedProtocols); + return ']('. $link.')'; + }, + $description + ); + } + + /** + * Replace hashtag in Markdown links format + * E.g. `#hashtag` becomes `[#hashtag](?addtag=hashtag)` + * It includes the index URL if specified. + * + * @param string $description + * + * @return string + */ + protected function formatHashTags($description) + { + $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; + + /* + * To support unicode: http://stackoverflow.com/a/35498078/1484919 + * \p{Pc} - to match underscore + * \p{N} - numeric character in any script + * \p{L} - letter from any language + * \p{Mn} - any non marking space (accents, umlauts, etc) + */ + $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; + $replacement = '$1[#$2]('. $indexUrl .'?addtag=$2)'; + + $descriptionLines = explode(PHP_EOL, $description); + $descriptionOut = ''; + $codeBlockOn = false; + $lineCount = 0; + + foreach ($descriptionLines as $descriptionLine) { + // Detect line of code: starting with 4 spaces, + // except lists which can start with +/*/- or `2.` after spaces. + $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0; + // Detect and toggle block of code + if (!$codeBlockOn) { + $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0; + } elseif (preg_match('/^```/', $descriptionLine) > 0) { + $codeBlockOn = false; + } + + if (!$codeBlockOn && !$codeLineOn) { + $descriptionLine = preg_replace($regex, $replacement, $descriptionLine); + } + + $descriptionOut .= $descriptionLine; + if ($lineCount++ < count($descriptionLines) - 1) { + $descriptionOut .= PHP_EOL; + } + } + + return $descriptionOut; + } + + /** + * Remove dangerous HTML tags (tags, iframe, etc.). + * Doesn't affect content (already escaped by Parsedown). + * + * @param string $description input description text. + * + * @return string given string escaped. + */ + protected function sanitizeHtml($description) + { + $escapeTags = array( + 'script', + 'style', + 'link', + 'iframe', + 'frameset', + 'frame', + ); + foreach ($escapeTags as $tag) { + $description = preg_replace_callback( + '#<\s*'. $tag .'[^>]*>(.*]*>)?#is', + function ($match) { + return escape($match[0]); + }, + $description + ); + } + $description = preg_replace( + '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is', + '$1', + $description + ); + return $description; + } +} -- cgit v1.2.3 From cf92b4dd1521241eefc58eaf6dcd202cd83969d8 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sat, 25 May 2019 15:52:27 +0200 Subject: Apply the new system (Bookmark + Service) to the whole code base See https://github.com/shaarli/Shaarli/issues/1307 --- application/formatter/BookmarkMarkdownFormatter.php | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'application/formatter/BookmarkMarkdownFormatter.php') diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index f60c61f4..7797bfbf 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -57,6 +57,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter $processedDescription = $bookmark->getDescription(); $processedDescription = $this->filterProtocols($processedDescription); $processedDescription = $this->formatHashTags($processedDescription); + $processedDescription = $this->reverseEscapedHtml($processedDescription); $processedDescription = $this->parsedown ->setMarkupEscaped($this->escape) ->setBreaksEnabled(true) @@ -195,4 +196,9 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter ); return $description; } + + protected function reverseEscapedHtml($description) + { + return unescape($description); + } } -- cgit v1.2.3