]> git.immae.eu Git - github/shaarli/Shaarli.git/blob - application/formatter/BookmarkMarkdownFormatter.php
Apply the new system (Bookmark + Service) to the whole code base
[github/shaarli/Shaarli.git] / application / formatter / BookmarkMarkdownFormatter.php
1 <?php
2
3 namespace Shaarli\Formatter;
4
5 use Shaarli\Config\ConfigManager;
6
7 /**
8 * Class BookmarkMarkdownFormatter
9 *
10 * Format bookmark description into Markdown format.
11 *
12 * @package Shaarli\Formatter
13 */
14 class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
15 {
16 /**
17 * When this tag is present in a bookmark, its description should not be processed with Markdown
18 */
19 const NO_MD_TAG = 'nomarkdown';
20
21 /** @var \Parsedown instance */
22 protected $parsedown;
23
24 /** @var bool used to escape HTML in Markdown or not.
25 * It MUST be set to true for shared instance as HTML content can
26 * introduce XSS vulnerabilities.
27 */
28 protected $escape;
29
30 /**
31 * @var array List of allowed protocols for links inside bookmark's description.
32 */
33 protected $allowedProtocols;
34
35 /**
36 * LinkMarkdownFormatter constructor.
37 *
38 * @param ConfigManager $conf instance
39 */
40 public function __construct(ConfigManager $conf)
41 {
42 parent::__construct($conf);
43 $this->parsedown = new \Parsedown();
44 $this->escape = $conf->get('security.markdown_escape', true);
45 $this->allowedProtocols = $conf->get('security.allowed_protocols', []);
46 }
47
48 /**
49 * @inheritdoc
50 */
51 public function formatDescription($bookmark)
52 {
53 if (in_array(self::NO_MD_TAG, $bookmark->getTags())) {
54 return parent::formatDescription($bookmark);
55 }
56
57 $processedDescription = $bookmark->getDescription();
58 $processedDescription = $this->filterProtocols($processedDescription);
59 $processedDescription = $this->formatHashTags($processedDescription);
60 $processedDescription = $this->reverseEscapedHtml($processedDescription);
61 $processedDescription = $this->parsedown
62 ->setMarkupEscaped($this->escape)
63 ->setBreaksEnabled(true)
64 ->text($processedDescription);
65 $processedDescription = $this->sanitizeHtml($processedDescription);
66
67 if (!empty($processedDescription)) {
68 $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
69 }
70
71 return $processedDescription;
72 }
73
74 /**
75 * Remove the NO markdown tag if it is present
76 *
77 * @inheritdoc
78 */
79 protected function formatTagList($bookmark)
80 {
81 $out = parent::formatTagList($bookmark);
82 if (($pos = array_search(self::NO_MD_TAG, $out)) !== false) {
83 unset($out[$pos]);
84 return array_values($out);
85 }
86 return $out;
87 }
88
89 /**
90 * Replace not whitelisted protocols with http:// in given description.
91 * Also adds `index_url` to relative links if it's specified
92 *
93 * @param string $description input description text.
94 *
95 * @return string $description without malicious link.
96 */
97 protected function filterProtocols($description)
98 {
99 $allowedProtocols = $this->allowedProtocols;
100 $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
101
102 return preg_replace_callback(
103 '#]\((.*?)\)#is',
104 function ($match) use ($allowedProtocols, $indexUrl) {
105 $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : '';
106 $link .= whitelist_protocols($match[1], $allowedProtocols);
107 return ']('. $link.')';
108 },
109 $description
110 );
111 }
112
113 /**
114 * Replace hashtag in Markdown links format
115 * E.g. `#hashtag` becomes `[#hashtag](?addtag=hashtag)`
116 * It includes the index URL if specified.
117 *
118 * @param string $description
119 *
120 * @return string
121 */
122 protected function formatHashTags($description)
123 {
124 $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
125
126 /*
127 * To support unicode: http://stackoverflow.com/a/35498078/1484919
128 * \p{Pc} - to match underscore
129 * \p{N} - numeric character in any script
130 * \p{L} - letter from any language
131 * \p{Mn} - any non marking space (accents, umlauts, etc)
132 */
133 $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
134 $replacement = '$1[#$2]('. $indexUrl .'?addtag=$2)';
135
136 $descriptionLines = explode(PHP_EOL, $description);
137 $descriptionOut = '';
138 $codeBlockOn = false;
139 $lineCount = 0;
140
141 foreach ($descriptionLines as $descriptionLine) {
142 // Detect line of code: starting with 4 spaces,
143 // except lists which can start with +/*/- or `2.` after spaces.
144 $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
145 // Detect and toggle block of code
146 if (!$codeBlockOn) {
147 $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
148 } elseif (preg_match('/^```/', $descriptionLine) > 0) {
149 $codeBlockOn = false;
150 }
151
152 if (!$codeBlockOn && !$codeLineOn) {
153 $descriptionLine = preg_replace($regex, $replacement, $descriptionLine);
154 }
155
156 $descriptionOut .= $descriptionLine;
157 if ($lineCount++ < count($descriptionLines) - 1) {
158 $descriptionOut .= PHP_EOL;
159 }
160 }
161
162 return $descriptionOut;
163 }
164
165 /**
166 * Remove dangerous HTML tags (tags, iframe, etc.).
167 * Doesn't affect <code> content (already escaped by Parsedown).
168 *
169 * @param string $description input description text.
170 *
171 * @return string given string escaped.
172 */
173 protected function sanitizeHtml($description)
174 {
175 $escapeTags = array(
176 'script',
177 'style',
178 'link',
179 'iframe',
180 'frameset',
181 'frame',
182 );
183 foreach ($escapeTags as $tag) {
184 $description = preg_replace_callback(
185 '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
186 function ($match) {
187 return escape($match[0]);
188 },
189 $description
190 );
191 }
192 $description = preg_replace(
193 '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',
194 '$1',
195 $description
196 );
197 return $description;
198 }
199
200 protected function reverseEscapedHtml($description)
201 {
202 return unescape($description);
203 }
204 }