aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/formatter/BookmarkMarkdownFormatter.php
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-10-13 12:05:08 +0200
committerArthurHoaro <arthur@hoa.ro>2020-10-13 12:05:08 +0200
commitb6f678a5a1d15acf284ebcec16c905e976671ce1 (patch)
tree33c7da831482ed79c44896ef19c73c72ada84f2e /application/formatter/BookmarkMarkdownFormatter.php
parentb14687036b9b800681197f51fdc47e62f0c88e2e (diff)
parent1c1520b6b98ab20201bfe15577782a52320339df (diff)
downloadShaarli-b6f678a5a1d15acf284ebcec16c905e976671ce1.tar.gz
Shaarli-b6f678a5a1d15acf284ebcec16c905e976671ce1.tar.zst
Shaarli-b6f678a5a1d15acf284ebcec16c905e976671ce1.zip
Merge branch 'v0.12' into latest
Diffstat (limited to 'application/formatter/BookmarkMarkdownFormatter.php')
-rw-r--r--application/formatter/BookmarkMarkdownFormatter.php206
1 files changed, 206 insertions, 0 deletions
diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php
new file mode 100644
index 00000000..5d244d4c
--- /dev/null
+++ b/application/formatter/BookmarkMarkdownFormatter.php
@@ -0,0 +1,206 @@
1<?php
2
3namespace Shaarli\Formatter;
4
5use Shaarli\Config\ConfigManager;
6
7/**
8 * Class BookmarkMarkdownFormatter
9 *
10 * Format bookmark description into Markdown format.
11 *
12 * @package Shaarli\Formatter
13 */
14class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
15{
16 /**
17 * When this tag is present in a bookmark, its description should not be processed with Markdown
18 */
19 const NO_MD_TAG = 'nomarkdown';
20
21 /** @var \Parsedown instance */
22 protected $parsedown;
23
24 /** @var bool used to escape HTML in Markdown or not.
25 * It MUST be set to true for shared instance as HTML content can
26 * introduce XSS vulnerabilities.
27 */
28 protected $escape;
29
30 /**
31 * @var array List of allowed protocols for links inside bookmark's description.
32 */
33 protected $allowedProtocols;
34
35 /**
36 * LinkMarkdownFormatter constructor.
37 *
38 * @param ConfigManager $conf instance
39 * @param bool $isLoggedIn
40 */
41 public function __construct(ConfigManager $conf, bool $isLoggedIn)
42 {
43 parent::__construct($conf, $isLoggedIn);
44
45 $this->parsedown = new \Parsedown();
46 $this->escape = $conf->get('security.markdown_escape', true);
47 $this->allowedProtocols = $conf->get('security.allowed_protocols', []);
48 }
49
50 /**
51 * @inheritdoc
52 */
53 public function formatDescription($bookmark)
54 {
55 if (in_array(self::NO_MD_TAG, $bookmark->getTags())) {
56 return parent::formatDescription($bookmark);
57 }
58
59 $processedDescription = $bookmark->getDescription();
60 $processedDescription = $this->filterProtocols($processedDescription);
61 $processedDescription = $this->formatHashTags($processedDescription);
62 $processedDescription = $this->reverseEscapedHtml($processedDescription);
63 $processedDescription = $this->parsedown
64 ->setMarkupEscaped($this->escape)
65 ->setBreaksEnabled(true)
66 ->text($processedDescription);
67 $processedDescription = $this->sanitizeHtml($processedDescription);
68
69 if (!empty($processedDescription)) {
70 $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
71 }
72
73 return $processedDescription;
74 }
75
76 /**
77 * Remove the NO markdown tag if it is present
78 *
79 * @inheritdoc
80 */
81 protected function formatTagList($bookmark)
82 {
83 $out = parent::formatTagList($bookmark);
84 if ($this->isLoggedIn === false && ($pos = array_search(self::NO_MD_TAG, $out)) !== false) {
85 unset($out[$pos]);
86 return array_values($out);
87 }
88 return $out;
89 }
90
91 /**
92 * Replace not whitelisted protocols with http:// in given description.
93 * Also adds `index_url` to relative links if it's specified
94 *
95 * @param string $description input description text.
96 *
97 * @return string $description without malicious link.
98 */
99 protected function filterProtocols($description)
100 {
101 $allowedProtocols = $this->allowedProtocols;
102 $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
103
104 return preg_replace_callback(
105 '#]\((.*?)\)#is',
106 function ($match) use ($allowedProtocols, $indexUrl) {
107 $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : '';
108 $link .= whitelist_protocols($match[1], $allowedProtocols);
109 return ']('. $link.')';
110 },
111 $description
112 );
113 }
114
115 /**
116 * Replace hashtag in Markdown links format
117 * E.g. `#hashtag` becomes `[#hashtag](./add-tag/hashtag)`
118 * It includes the index URL if specified.
119 *
120 * @param string $description
121 *
122 * @return string
123 */
124 protected function formatHashTags($description)
125 {
126 $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
127
128 /*
129 * To support unicode: http://stackoverflow.com/a/35498078/1484919
130 * \p{Pc} - to match underscore
131 * \p{N} - numeric character in any script
132 * \p{L} - letter from any language
133 * \p{Mn} - any non marking space (accents, umlauts, etc)
134 */
135 $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
136 $replacement = '$1[#$2]('. $indexUrl .'./add-tag/$2)';
137
138 $descriptionLines = explode(PHP_EOL, $description);
139 $descriptionOut = '';
140 $codeBlockOn = false;
141 $lineCount = 0;
142
143 foreach ($descriptionLines as $descriptionLine) {
144 // Detect line of code: starting with 4 spaces,
145 // except lists which can start with +/*/- or `2.` after spaces.
146 $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
147 // Detect and toggle block of code
148 if (!$codeBlockOn) {
149 $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
150 } elseif (preg_match('/^```/', $descriptionLine) > 0) {
151 $codeBlockOn = false;
152 }
153
154 if (!$codeBlockOn && !$codeLineOn) {
155 $descriptionLine = preg_replace($regex, $replacement, $descriptionLine);
156 }
157
158 $descriptionOut .= $descriptionLine;
159 if ($lineCount++ < count($descriptionLines) - 1) {
160 $descriptionOut .= PHP_EOL;
161 }
162 }
163
164 return $descriptionOut;
165 }
166
167 /**
168 * Remove dangerous HTML tags (tags, iframe, etc.).
169 * Doesn't affect <code> content (already escaped by Parsedown).
170 *
171 * @param string $description input description text.
172 *
173 * @return string given string escaped.
174 */
175 protected function sanitizeHtml($description)
176 {
177 $escapeTags = array(
178 'script',
179 'style',
180 'link',
181 'iframe',
182 'frameset',
183 'frame',
184 );
185 foreach ($escapeTags as $tag) {
186 $description = preg_replace_callback(
187 '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
188 function ($match) {
189 return escape($match[0]);
190 },
191 $description
192 );
193 }
194 $description = preg_replace(
195 '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',
196 '$1',
197 $description
198 );
199 return $description;
200 }
201
202 protected function reverseEscapedHtml($description)
203 {
204 return unescape($description);
205 }
206}