]>
Commit | Line | Data |
---|---|---|
336a28fa A |
1 | <?php |
2 | ||
3 | namespace Shaarli\Formatter; | |
4 | ||
5 | use Shaarli\Config\ConfigManager; | |
9ef8555a | 6 | use Shaarli\Formatter\Parsedown\ShaarliParsedown; |
336a28fa A |
7 | |
8 | /** | |
9 | * Class BookmarkMarkdownFormatter | |
10 | * | |
11 | * Format bookmark description into Markdown format. | |
12 | * | |
13 | * @package Shaarli\Formatter | |
14 | */ | |
15 | class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter | |
16 | { | |
17 | /** | |
18 | * When this tag is present in a bookmark, its description should not be processed with Markdown | |
19 | */ | |
b99e00f7 | 20 | public const NO_MD_TAG = 'nomarkdown'; |
336a28fa A |
21 | |
22 | /** @var \Parsedown instance */ | |
23 | protected $parsedown; | |
24 | ||
25 | /** @var bool used to escape HTML in Markdown or not. | |
26 | * It MUST be set to true for shared instance as HTML content can | |
27 | * introduce XSS vulnerabilities. | |
28 | */ | |
29 | protected $escape; | |
30 | ||
31 | /** | |
32 | * @var array List of allowed protocols for links inside bookmark's description. | |
33 | */ | |
34 | protected $allowedProtocols; | |
35 | ||
36 | /** | |
37 | * LinkMarkdownFormatter constructor. | |
38 | * | |
39 | * @param ConfigManager $conf instance | |
a39acb25 | 40 | * @param bool $isLoggedIn |
336a28fa | 41 | */ |
a39acb25 | 42 | public function __construct(ConfigManager $conf, bool $isLoggedIn) |
336a28fa | 43 | { |
a39acb25 A |
44 | parent::__construct($conf, $isLoggedIn); |
45 | ||
9ef8555a | 46 | $this->parsedown = new ShaarliParsedown(); |
336a28fa A |
47 | $this->escape = $conf->get('security.markdown_escape', true); |
48 | $this->allowedProtocols = $conf->get('security.allowed_protocols', []); | |
49 | } | |
50 | ||
51 | /** | |
52 | * @inheritdoc | |
53 | */ | |
54 | public function formatDescription($bookmark) | |
55 | { | |
56 | if (in_array(self::NO_MD_TAG, $bookmark->getTags())) { | |
57 | return parent::formatDescription($bookmark); | |
58 | } | |
59 | ||
4e3875c0 A |
60 | $processedDescription = $this->tokenizeSearchHighlightField( |
61 | $bookmark->getDescription() ?? '', | |
62 | $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] | |
63 | ); | |
336a28fa A |
64 | $processedDescription = $this->filterProtocols($processedDescription); |
65 | $processedDescription = $this->formatHashTags($processedDescription); | |
cf92b4dd | 66 | $processedDescription = $this->reverseEscapedHtml($processedDescription); |
336a28fa A |
67 | $processedDescription = $this->parsedown |
68 | ->setMarkupEscaped($this->escape) | |
69 | ->setBreaksEnabled(true) | |
70 | ->text($processedDescription); | |
71 | $processedDescription = $this->sanitizeHtml($processedDescription); | |
4e3875c0 | 72 | $processedDescription = $this->replaceTokens($processedDescription); |
336a28fa A |
73 | |
74 | if (!empty($processedDescription)) { | |
53054b2b | 75 | $processedDescription = '<div class="markdown">' . $processedDescription . '</div>'; |
336a28fa A |
76 | } |
77 | ||
78 | return $processedDescription; | |
79 | } | |
80 | ||
81 | /** | |
82 | * Remove the NO markdown tag if it is present | |
83 | * | |
84 | * @inheritdoc | |
85 | */ | |
86 | protected function formatTagList($bookmark) | |
87 | { | |
88 | $out = parent::formatTagList($bookmark); | |
a39acb25 | 89 | if ($this->isLoggedIn === false && ($pos = array_search(self::NO_MD_TAG, $out)) !== false) { |
336a28fa A |
90 | unset($out[$pos]); |
91 | return array_values($out); | |
92 | } | |
93 | return $out; | |
94 | } | |
95 | ||
96 | /** | |
97 | * Replace not whitelisted protocols with http:// in given description. | |
98 | * Also adds `index_url` to relative links if it's specified | |
99 | * | |
100 | * @param string $description input description text. | |
101 | * | |
102 | * @return string $description without malicious link. | |
103 | */ | |
104 | protected function filterProtocols($description) | |
105 | { | |
106 | $allowedProtocols = $this->allowedProtocols; | |
107 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | |
108 | ||
109 | return preg_replace_callback( | |
110 | '#]\((.*?)\)#is', | |
111 | function ($match) use ($allowedProtocols, $indexUrl) { | |
112 | $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : ''; | |
113 | $link .= whitelist_protocols($match[1], $allowedProtocols); | |
53054b2b | 114 | return '](' . $link . ')'; |
336a28fa A |
115 | }, |
116 | $description | |
117 | ); | |
118 | } | |
119 | ||
120 | /** | |
121 | * Replace hashtag in Markdown links format | |
03340c18 | 122 | * E.g. `#hashtag` becomes `[#hashtag](./add-tag/hashtag)` |
336a28fa A |
123 | * It includes the index URL if specified. |
124 | * | |
125 | * @param string $description | |
126 | * | |
127 | * @return string | |
128 | */ | |
129 | protected function formatHashTags($description) | |
130 | { | |
131 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | |
9ef8555a A |
132 | $tokens = '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN . ')' . |
133 | '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE . ')' | |
134 | ; | |
336a28fa A |
135 | |
136 | /* | |
137 | * To support unicode: http://stackoverflow.com/a/35498078/1484919 | |
138 | * \p{Pc} - to match underscore | |
139 | * \p{N} - numeric character in any script | |
140 | * \p{L} - letter from any language | |
141 | * \p{Mn} - any non marking space (accents, umlauts, etc) | |
142 | */ | |
9ef8555a A |
143 | $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui'; |
144 | $replacement = function (array $match) use ($indexUrl): string { | |
145 | $cleanMatch = str_replace( | |
146 | BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN, | |
147 | '', | |
148 | str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[2]) | |
149 | ); | |
150 | return $match[1] . '[#' . $match[2] . '](' . $indexUrl . './add-tag/' . $cleanMatch . ')'; | |
151 | }; | |
336a28fa A |
152 | |
153 | $descriptionLines = explode(PHP_EOL, $description); | |
154 | $descriptionOut = ''; | |
155 | $codeBlockOn = false; | |
156 | $lineCount = 0; | |
157 | ||
158 | foreach ($descriptionLines as $descriptionLine) { | |
159 | // Detect line of code: starting with 4 spaces, | |
160 | // except lists which can start with +/*/- or `2.` after spaces. | |
161 | $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0; | |
162 | // Detect and toggle block of code | |
163 | if (!$codeBlockOn) { | |
164 | $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0; | |
165 | } elseif (preg_match('/^```/', $descriptionLine) > 0) { | |
166 | $codeBlockOn = false; | |
167 | } | |
168 | ||
169 | if (!$codeBlockOn && !$codeLineOn) { | |
9ef8555a | 170 | $descriptionLine = preg_replace_callback($regex, $replacement, $descriptionLine); |
336a28fa A |
171 | } |
172 | ||
173 | $descriptionOut .= $descriptionLine; | |
174 | if ($lineCount++ < count($descriptionLines) - 1) { | |
175 | $descriptionOut .= PHP_EOL; | |
176 | } | |
177 | } | |
178 | ||
179 | return $descriptionOut; | |
180 | } | |
181 | ||
182 | /** | |
183 | * Remove dangerous HTML tags (tags, iframe, etc.). | |
184 | * Doesn't affect <code> content (already escaped by Parsedown). | |
185 | * | |
186 | * @param string $description input description text. | |
187 | * | |
188 | * @return string given string escaped. | |
189 | */ | |
190 | protected function sanitizeHtml($description) | |
191 | { | |
53054b2b | 192 | $escapeTags = [ |
336a28fa A |
193 | 'script', |
194 | 'style', | |
195 | 'link', | |
196 | 'iframe', | |
197 | 'frameset', | |
198 | 'frame', | |
53054b2b | 199 | ]; |
336a28fa A |
200 | foreach ($escapeTags as $tag) { |
201 | $description = preg_replace_callback( | |
53054b2b | 202 | '#<\s*' . $tag . '[^>]*>(.*</\s*' . $tag . '[^>]*>)?#is', |
336a28fa A |
203 | function ($match) { |
204 | return escape($match[0]); | |
205 | }, | |
206 | $description | |
207 | ); | |
208 | } | |
209 | $description = preg_replace( | |
210 | '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is', | |
211 | '$1', | |
212 | $description | |
213 | ); | |
214 | return $description; | |
215 | } | |
cf92b4dd A |
216 | |
217 | protected function reverseEscapedHtml($description) | |
218 | { | |
219 | return unescape($description); | |
220 | } | |
336a28fa | 221 | } |