3 namespace Shaarli\Formatter
;
5 use Shaarli\Config\ConfigManager
;
6 use Shaarli\Formatter\Parsedown\ShaarliParsedown
;
9 * Class BookmarkMarkdownFormatter
11 * Format bookmark description into Markdown format.
13 * @package Shaarli\Formatter
15 class BookmarkMarkdownFormatter
extends BookmarkDefaultFormatter
18 * When this tag is present in a bookmark, its description should not be processed with Markdown
20 public const NO_MD_TAG
= 'nomarkdown';
22 /** @var \Parsedown instance */
25 /** @var bool used to escape HTML in Markdown or not.
26 * It MUST be set to true for shared instance as HTML content can
27 * introduce XSS vulnerabilities.
32 * @var array List of allowed protocols for links inside bookmark's description.
34 protected $allowedProtocols;
37 * LinkMarkdownFormatter constructor.
39 * @param ConfigManager $conf instance
40 * @param bool $isLoggedIn
42 public function __construct(ConfigManager
$conf, bool $isLoggedIn)
44 parent
::__construct($conf, $isLoggedIn);
46 $this->parsedown
= new ShaarliParsedown();
47 $this->escape
= $conf->get('security.markdown_escape', true);
48 $this->allowedProtocols
= $conf->get('security.allowed_protocols', []);
54 public function formatDescription($bookmark)
56 if (in_array(self
::NO_MD_TAG
, $bookmark->getTags())) {
57 return parent
::formatDescription($bookmark);
60 $processedDescription = $this->tokenizeSearchHighlightField(
61 $bookmark->getDescription() ?? '',
62 $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? []
64 $processedDescription = $this->filterProtocols($processedDescription);
65 $processedDescription = $this->formatHashTags($processedDescription);
66 $processedDescription = $this->reverseEscapedHtml($processedDescription);
67 $processedDescription = $this->parsedown
68 ->setMarkupEscaped($this->escape
)
69 ->setBreaksEnabled(true)
70 ->text($processedDescription);
71 $processedDescription = $this->sanitizeHtml($processedDescription);
72 $processedDescription = $this->replaceTokens($processedDescription);
74 if (!empty($processedDescription)) {
75 $processedDescription = '<div class="markdown">' . $processedDescription . '</div>';
78 return $processedDescription;
82 * Remove the NO markdown tag if it is present
86 protected function formatTagList($bookmark)
88 $out = parent
::formatTagList($bookmark);
89 if ($this->isLoggedIn
=== false && ($pos = array_search(self
::NO_MD_TAG
, $out)) !== false) {
91 return array_values($out);
97 * Replace not whitelisted protocols with http:// in given description.
98 * Also adds `index_url` to relative links if it's specified
100 * @param string $description input description text.
102 * @return string $description without malicious link.
104 protected function filterProtocols($description)
106 $allowedProtocols = $this->allowedProtocols
;
107 $indexUrl = ! empty($this->contextData
['index_url']) ? $this->contextData
['index_url'] : '';
109 return preg_replace_callback(
111 function ($match) use ($allowedProtocols, $indexUrl) {
112 $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : '';
113 $link .= whitelist_protocols($match[1], $allowedProtocols);
114 return '](' . $link . ')';
121 * Replace hashtag in Markdown links format
122 * E.g. `#hashtag` becomes `[#hashtag](./add-tag/hashtag)`
123 * It includes the index URL if specified.
125 * @param string $description
129 protected function formatHashTags($description)
131 $indexUrl = ! empty($this->contextData
['index_url']) ? $this->contextData
['index_url'] : '';
132 $tokens = '(?:' . BookmarkDefaultFormatter
::SEARCH_HIGHLIGHT_OPEN
. ')' .
133 '(?:' . BookmarkDefaultFormatter
::SEARCH_HIGHLIGHT_CLOSE
. ')'
137 * To support unicode: http://stackoverflow.com/a/35498078/1484919
138 * \p{Pc} - to match underscore
139 * \p{N} - numeric character in any script
140 * \p{L} - letter from any language
141 * \p{Mn} - any non marking space (accents, umlauts, etc)
143 $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui';
144 $replacement = function (array $match) use ($indexUrl): string {
145 $cleanMatch = str_replace(
146 BookmarkDefaultFormatter
::SEARCH_HIGHLIGHT_OPEN
,
148 str_replace(BookmarkDefaultFormatter
::SEARCH_HIGHLIGHT_CLOSE
, '', $match[2])
150 return $match[1] . '[#' . $match[2] . '](' . $indexUrl . './add-tag/' . $cleanMatch . ')';
153 $descriptionLines = explode(PHP_EOL
, $description);
154 $descriptionOut = '';
155 $codeBlockOn = false;
158 foreach ($descriptionLines as $descriptionLine) {
159 // Detect line of code: starting with 4 spaces,
160 // except lists which can start with +/*/- or `2.` after spaces.
161 $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
162 // Detect and toggle block of code
164 $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
165 } elseif (preg_match('/^```/', $descriptionLine) > 0) {
166 $codeBlockOn = false;
169 if (!$codeBlockOn && !$codeLineOn) {
170 $descriptionLine = preg_replace_callback($regex, $replacement, $descriptionLine);
173 $descriptionOut .= $descriptionLine;
174 if ($lineCount++
< count($descriptionLines) - 1) {
175 $descriptionOut .= PHP_EOL
;
179 return $descriptionOut;
183 * Remove dangerous HTML tags (tags, iframe, etc.).
184 * Doesn't affect <code> content (already escaped by Parsedown).
186 * @param string $description input description text.
188 * @return string given string escaped.
190 protected function sanitizeHtml($description)
200 foreach ($escapeTags as $tag) {
201 $description = preg_replace_callback(
202 '#<\s*' . $tag . '[^>]*>(.*</\s*' . $tag . '[^>]*>)?#is',
204 return escape($match[0]);
209 $description = preg_replace(
210 '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',
217 protected function reverseEscapedHtml($description)
219 return unescape($description);