diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:05:08 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-10-13 12:05:08 +0200 |
commit | b6f678a5a1d15acf284ebcec16c905e976671ce1 (patch) | |
tree | 33c7da831482ed79c44896ef19c73c72ada84f2e /application/formatter/BookmarkMarkdownFormatter.php | |
parent | b14687036b9b800681197f51fdc47e62f0c88e2e (diff) | |
parent | 1c1520b6b98ab20201bfe15577782a52320339df (diff) | |
download | Shaarli-b6f678a5a1d15acf284ebcec16c905e976671ce1.tar.gz Shaarli-b6f678a5a1d15acf284ebcec16c905e976671ce1.tar.zst Shaarli-b6f678a5a1d15acf284ebcec16c905e976671ce1.zip |
Merge branch 'v0.12' into latest
Diffstat (limited to 'application/formatter/BookmarkMarkdownFormatter.php')
-rw-r--r-- | application/formatter/BookmarkMarkdownFormatter.php | 206 |
1 files changed, 206 insertions, 0 deletions
diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php new file mode 100644 index 00000000..5d244d4c --- /dev/null +++ b/application/formatter/BookmarkMarkdownFormatter.php | |||
@@ -0,0 +1,206 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Shaarli\Formatter; | ||
4 | |||
5 | use Shaarli\Config\ConfigManager; | ||
6 | |||
7 | /** | ||
8 | * Class BookmarkMarkdownFormatter | ||
9 | * | ||
10 | * Format bookmark description into Markdown format. | ||
11 | * | ||
12 | * @package Shaarli\Formatter | ||
13 | */ | ||
14 | class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter | ||
15 | { | ||
16 | /** | ||
17 | * When this tag is present in a bookmark, its description should not be processed with Markdown | ||
18 | */ | ||
19 | const NO_MD_TAG = 'nomarkdown'; | ||
20 | |||
21 | /** @var \Parsedown instance */ | ||
22 | protected $parsedown; | ||
23 | |||
24 | /** @var bool used to escape HTML in Markdown or not. | ||
25 | * It MUST be set to true for shared instance as HTML content can | ||
26 | * introduce XSS vulnerabilities. | ||
27 | */ | ||
28 | protected $escape; | ||
29 | |||
30 | /** | ||
31 | * @var array List of allowed protocols for links inside bookmark's description. | ||
32 | */ | ||
33 | protected $allowedProtocols; | ||
34 | |||
35 | /** | ||
36 | * LinkMarkdownFormatter constructor. | ||
37 | * | ||
38 | * @param ConfigManager $conf instance | ||
39 | * @param bool $isLoggedIn | ||
40 | */ | ||
41 | public function __construct(ConfigManager $conf, bool $isLoggedIn) | ||
42 | { | ||
43 | parent::__construct($conf, $isLoggedIn); | ||
44 | |||
45 | $this->parsedown = new \Parsedown(); | ||
46 | $this->escape = $conf->get('security.markdown_escape', true); | ||
47 | $this->allowedProtocols = $conf->get('security.allowed_protocols', []); | ||
48 | } | ||
49 | |||
50 | /** | ||
51 | * @inheritdoc | ||
52 | */ | ||
53 | public function formatDescription($bookmark) | ||
54 | { | ||
55 | if (in_array(self::NO_MD_TAG, $bookmark->getTags())) { | ||
56 | return parent::formatDescription($bookmark); | ||
57 | } | ||
58 | |||
59 | $processedDescription = $bookmark->getDescription(); | ||
60 | $processedDescription = $this->filterProtocols($processedDescription); | ||
61 | $processedDescription = $this->formatHashTags($processedDescription); | ||
62 | $processedDescription = $this->reverseEscapedHtml($processedDescription); | ||
63 | $processedDescription = $this->parsedown | ||
64 | ->setMarkupEscaped($this->escape) | ||
65 | ->setBreaksEnabled(true) | ||
66 | ->text($processedDescription); | ||
67 | $processedDescription = $this->sanitizeHtml($processedDescription); | ||
68 | |||
69 | if (!empty($processedDescription)) { | ||
70 | $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; | ||
71 | } | ||
72 | |||
73 | return $processedDescription; | ||
74 | } | ||
75 | |||
76 | /** | ||
77 | * Remove the NO markdown tag if it is present | ||
78 | * | ||
79 | * @inheritdoc | ||
80 | */ | ||
81 | protected function formatTagList($bookmark) | ||
82 | { | ||
83 | $out = parent::formatTagList($bookmark); | ||
84 | if ($this->isLoggedIn === false && ($pos = array_search(self::NO_MD_TAG, $out)) !== false) { | ||
85 | unset($out[$pos]); | ||
86 | return array_values($out); | ||
87 | } | ||
88 | return $out; | ||
89 | } | ||
90 | |||
91 | /** | ||
92 | * Replace not whitelisted protocols with http:// in given description. | ||
93 | * Also adds `index_url` to relative links if it's specified | ||
94 | * | ||
95 | * @param string $description input description text. | ||
96 | * | ||
97 | * @return string $description without malicious link. | ||
98 | */ | ||
99 | protected function filterProtocols($description) | ||
100 | { | ||
101 | $allowedProtocols = $this->allowedProtocols; | ||
102 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | ||
103 | |||
104 | return preg_replace_callback( | ||
105 | '#]\((.*?)\)#is', | ||
106 | function ($match) use ($allowedProtocols, $indexUrl) { | ||
107 | $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : ''; | ||
108 | $link .= whitelist_protocols($match[1], $allowedProtocols); | ||
109 | return ']('. $link.')'; | ||
110 | }, | ||
111 | $description | ||
112 | ); | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * Replace hashtag in Markdown links format | ||
117 | * E.g. `#hashtag` becomes `[#hashtag](./add-tag/hashtag)` | ||
118 | * It includes the index URL if specified. | ||
119 | * | ||
120 | * @param string $description | ||
121 | * | ||
122 | * @return string | ||
123 | */ | ||
124 | protected function formatHashTags($description) | ||
125 | { | ||
126 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | ||
127 | |||
128 | /* | ||
129 | * To support unicode: http://stackoverflow.com/a/35498078/1484919 | ||
130 | * \p{Pc} - to match underscore | ||
131 | * \p{N} - numeric character in any script | ||
132 | * \p{L} - letter from any language | ||
133 | * \p{Mn} - any non marking space (accents, umlauts, etc) | ||
134 | */ | ||
135 | $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; | ||
136 | $replacement = '$1[#$2]('. $indexUrl .'./add-tag/$2)'; | ||
137 | |||
138 | $descriptionLines = explode(PHP_EOL, $description); | ||
139 | $descriptionOut = ''; | ||
140 | $codeBlockOn = false; | ||
141 | $lineCount = 0; | ||
142 | |||
143 | foreach ($descriptionLines as $descriptionLine) { | ||
144 | // Detect line of code: starting with 4 spaces, | ||
145 | // except lists which can start with +/*/- or `2.` after spaces. | ||
146 | $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0; | ||
147 | // Detect and toggle block of code | ||
148 | if (!$codeBlockOn) { | ||
149 | $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0; | ||
150 | } elseif (preg_match('/^```/', $descriptionLine) > 0) { | ||
151 | $codeBlockOn = false; | ||
152 | } | ||
153 | |||
154 | if (!$codeBlockOn && !$codeLineOn) { | ||
155 | $descriptionLine = preg_replace($regex, $replacement, $descriptionLine); | ||
156 | } | ||
157 | |||
158 | $descriptionOut .= $descriptionLine; | ||
159 | if ($lineCount++ < count($descriptionLines) - 1) { | ||
160 | $descriptionOut .= PHP_EOL; | ||
161 | } | ||
162 | } | ||
163 | |||
164 | return $descriptionOut; | ||
165 | } | ||
166 | |||
167 | /** | ||
168 | * Remove dangerous HTML tags (tags, iframe, etc.). | ||
169 | * Doesn't affect <code> content (already escaped by Parsedown). | ||
170 | * | ||
171 | * @param string $description input description text. | ||
172 | * | ||
173 | * @return string given string escaped. | ||
174 | */ | ||
175 | protected function sanitizeHtml($description) | ||
176 | { | ||
177 | $escapeTags = array( | ||
178 | 'script', | ||
179 | 'style', | ||
180 | 'link', | ||
181 | 'iframe', | ||
182 | 'frameset', | ||
183 | 'frame', | ||
184 | ); | ||
185 | foreach ($escapeTags as $tag) { | ||
186 | $description = preg_replace_callback( | ||
187 | '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is', | ||
188 | function ($match) { | ||
189 | return escape($match[0]); | ||
190 | }, | ||
191 | $description | ||
192 | ); | ||
193 | } | ||
194 | $description = preg_replace( | ||
195 | '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is', | ||
196 | '$1', | ||
197 | $description | ||
198 | ); | ||
199 | return $description; | ||
200 | } | ||
201 | |||
202 | protected function reverseEscapedHtml($description) | ||
203 | { | ||
204 | return unescape($description); | ||
205 | } | ||
206 | } | ||