]>
Commit | Line | Data |
---|---|---|
336a28fa A |
1 | <?php |
2 | ||
3 | namespace Shaarli\Formatter; | |
4 | ||
5 | use Shaarli\Config\ConfigManager; | |
6 | ||
7 | /** | |
8 | * Class BookmarkMarkdownFormatter | |
9 | * | |
10 | * Format bookmark description into Markdown format. | |
11 | * | |
12 | * @package Shaarli\Formatter | |
13 | */ | |
14 | class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter | |
15 | { | |
16 | /** | |
17 | * When this tag is present in a bookmark, its description should not be processed with Markdown | |
18 | */ | |
19 | const NO_MD_TAG = 'nomarkdown'; | |
20 | ||
21 | /** @var \Parsedown instance */ | |
22 | protected $parsedown; | |
23 | ||
24 | /** @var bool used to escape HTML in Markdown or not. | |
25 | * It MUST be set to true for shared instance as HTML content can | |
26 | * introduce XSS vulnerabilities. | |
27 | */ | |
28 | protected $escape; | |
29 | ||
30 | /** | |
31 | * @var array List of allowed protocols for links inside bookmark's description. | |
32 | */ | |
33 | protected $allowedProtocols; | |
34 | ||
35 | /** | |
36 | * LinkMarkdownFormatter constructor. | |
37 | * | |
38 | * @param ConfigManager $conf instance | |
39 | */ | |
40 | public function __construct(ConfigManager $conf) | |
41 | { | |
42 | parent::__construct($conf); | |
43 | $this->parsedown = new \Parsedown(); | |
44 | $this->escape = $conf->get('security.markdown_escape', true); | |
45 | $this->allowedProtocols = $conf->get('security.allowed_protocols', []); | |
46 | } | |
47 | ||
48 | /** | |
49 | * @inheritdoc | |
50 | */ | |
51 | public function formatDescription($bookmark) | |
52 | { | |
53 | if (in_array(self::NO_MD_TAG, $bookmark->getTags())) { | |
54 | return parent::formatDescription($bookmark); | |
55 | } | |
56 | ||
57 | $processedDescription = $bookmark->getDescription(); | |
58 | $processedDescription = $this->filterProtocols($processedDescription); | |
59 | $processedDescription = $this->formatHashTags($processedDescription); | |
60 | $processedDescription = $this->parsedown | |
61 | ->setMarkupEscaped($this->escape) | |
62 | ->setBreaksEnabled(true) | |
63 | ->text($processedDescription); | |
64 | $processedDescription = $this->sanitizeHtml($processedDescription); | |
65 | ||
66 | if (!empty($processedDescription)) { | |
67 | $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; | |
68 | } | |
69 | ||
70 | return $processedDescription; | |
71 | } | |
72 | ||
73 | /** | |
74 | * Remove the NO markdown tag if it is present | |
75 | * | |
76 | * @inheritdoc | |
77 | */ | |
78 | protected function formatTagList($bookmark) | |
79 | { | |
80 | $out = parent::formatTagList($bookmark); | |
81 | if (($pos = array_search(self::NO_MD_TAG, $out)) !== false) { | |
82 | unset($out[$pos]); | |
83 | return array_values($out); | |
84 | } | |
85 | return $out; | |
86 | } | |
87 | ||
88 | /** | |
89 | * Replace not whitelisted protocols with http:// in given description. | |
90 | * Also adds `index_url` to relative links if it's specified | |
91 | * | |
92 | * @param string $description input description text. | |
93 | * | |
94 | * @return string $description without malicious link. | |
95 | */ | |
96 | protected function filterProtocols($description) | |
97 | { | |
98 | $allowedProtocols = $this->allowedProtocols; | |
99 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | |
100 | ||
101 | return preg_replace_callback( | |
102 | '#]\((.*?)\)#is', | |
103 | function ($match) use ($allowedProtocols, $indexUrl) { | |
104 | $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : ''; | |
105 | $link .= whitelist_protocols($match[1], $allowedProtocols); | |
106 | return ']('. $link.')'; | |
107 | }, | |
108 | $description | |
109 | ); | |
110 | } | |
111 | ||
112 | /** | |
113 | * Replace hashtag in Markdown links format | |
114 | * E.g. `#hashtag` becomes `[#hashtag](?addtag=hashtag)` | |
115 | * It includes the index URL if specified. | |
116 | * | |
117 | * @param string $description | |
118 | * | |
119 | * @return string | |
120 | */ | |
121 | protected function formatHashTags($description) | |
122 | { | |
123 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | |
124 | ||
125 | /* | |
126 | * To support unicode: http://stackoverflow.com/a/35498078/1484919 | |
127 | * \p{Pc} - to match underscore | |
128 | * \p{N} - numeric character in any script | |
129 | * \p{L} - letter from any language | |
130 | * \p{Mn} - any non marking space (accents, umlauts, etc) | |
131 | */ | |
132 | $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; | |
133 | $replacement = '$1[#$2]('. $indexUrl .'?addtag=$2)'; | |
134 | ||
135 | $descriptionLines = explode(PHP_EOL, $description); | |
136 | $descriptionOut = ''; | |
137 | $codeBlockOn = false; | |
138 | $lineCount = 0; | |
139 | ||
140 | foreach ($descriptionLines as $descriptionLine) { | |
141 | // Detect line of code: starting with 4 spaces, | |
142 | // except lists which can start with +/*/- or `2.` after spaces. | |
143 | $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0; | |
144 | // Detect and toggle block of code | |
145 | if (!$codeBlockOn) { | |
146 | $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0; | |
147 | } elseif (preg_match('/^```/', $descriptionLine) > 0) { | |
148 | $codeBlockOn = false; | |
149 | } | |
150 | ||
151 | if (!$codeBlockOn && !$codeLineOn) { | |
152 | $descriptionLine = preg_replace($regex, $replacement, $descriptionLine); | |
153 | } | |
154 | ||
155 | $descriptionOut .= $descriptionLine; | |
156 | if ($lineCount++ < count($descriptionLines) - 1) { | |
157 | $descriptionOut .= PHP_EOL; | |
158 | } | |
159 | } | |
160 | ||
161 | return $descriptionOut; | |
162 | } | |
163 | ||
164 | /** | |
165 | * Remove dangerous HTML tags (tags, iframe, etc.). | |
166 | * Doesn't affect <code> content (already escaped by Parsedown). | |
167 | * | |
168 | * @param string $description input description text. | |
169 | * | |
170 | * @return string given string escaped. | |
171 | */ | |
172 | protected function sanitizeHtml($description) | |
173 | { | |
174 | $escapeTags = array( | |
175 | 'script', | |
176 | 'style', | |
177 | 'link', | |
178 | 'iframe', | |
179 | 'frameset', | |
180 | 'frame', | |
181 | ); | |
182 | foreach ($escapeTags as $tag) { | |
183 | $description = preg_replace_callback( | |
184 | '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is', | |
185 | function ($match) { | |
186 | return escape($match[0]); | |
187 | }, | |
188 | $description | |
189 | ); | |
190 | } | |
191 | $description = preg_replace( | |
192 | '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is', | |
193 | '$1', | |
194 | $description | |
195 | ); | |
196 | return $description; | |
197 | } | |
198 | } |