3 namespace Shaarli\Formatter
;
5 use Shaarli\Config\ConfigManager
;
8 * Class BookmarkMarkdownFormatter
10 * Format bookmark description into Markdown format.
12 * @package Shaarli\Formatter
14 class BookmarkMarkdownFormatter
extends BookmarkDefaultFormatter
17 * When this tag is present in a bookmark, its description should not be processed with Markdown
19 const NO_MD_TAG
= 'nomarkdown';
21 /** @var \Parsedown instance */
24 /** @var bool used to escape HTML in Markdown or not.
25 * It MUST be set to true for shared instance as HTML content can
26 * introduce XSS vulnerabilities.
31 * @var array List of allowed protocols for links inside bookmark's description.
33 protected $allowedProtocols;
36 * LinkMarkdownFormatter constructor.
38 * @param ConfigManager $conf instance
40 public function __construct(ConfigManager
$conf)
42 parent
::__construct($conf);
43 $this->parsedown
= new \
Parsedown();
44 $this->escape
= $conf->get('security.markdown_escape', true);
45 $this->allowedProtocols
= $conf->get('security.allowed_protocols', []);
51 public function formatDescription($bookmark)
53 if (in_array(self
::NO_MD_TAG
, $bookmark->getTags())) {
54 return parent
::formatDescription($bookmark);
57 $processedDescription = $bookmark->getDescription();
58 $processedDescription = $this->filterProtocols($processedDescription);
59 $processedDescription = $this->formatHashTags($processedDescription);
60 $processedDescription = $this->parsedown
61 ->setMarkupEscaped($this->escape
)
62 ->setBreaksEnabled(true)
63 ->text($processedDescription);
64 $processedDescription = $this->sanitizeHtml($processedDescription);
66 if (!empty($processedDescription)) {
67 $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
70 return $processedDescription;
74 * Remove the NO markdown tag if it is present
78 protected function formatTagList($bookmark)
80 $out = parent
::formatTagList($bookmark);
81 if (($pos = array_search(self
::NO_MD_TAG
, $out)) !== false) {
83 return array_values($out);
89 * Replace not whitelisted protocols with http:// in given description.
90 * Also adds `index_url` to relative links if it's specified
92 * @param string $description input description text.
94 * @return string $description without malicious link.
96 protected function filterProtocols($description)
98 $allowedProtocols = $this->allowedProtocols
;
99 $indexUrl = ! empty($this->contextData
['index_url']) ? $this->contextData
['index_url'] : '';
101 return preg_replace_callback(
103 function ($match) use ($allowedProtocols, $indexUrl) {
104 $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : '';
105 $link .= whitelist_protocols($match[1], $allowedProtocols);
106 return ']('. $link.')';
113 * Replace hashtag in Markdown links format
114 * E.g. `#hashtag` becomes `[#hashtag](?addtag=hashtag)`
115 * It includes the index URL if specified.
117 * @param string $description
121 protected function formatHashTags($description)
123 $indexUrl = ! empty($this->contextData
['index_url']) ? $this->contextData
['index_url'] : '';
126 * To support unicode: http://stackoverflow.com/a/35498078/1484919
127 * \p{Pc} - to match underscore
128 * \p{N} - numeric character in any script
129 * \p{L} - letter from any language
130 * \p{Mn} - any non marking space (accents, umlauts, etc)
132 $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
133 $replacement = '$1[#$2]('. $indexUrl .'?addtag=$2)';
135 $descriptionLines = explode(PHP_EOL
, $description);
136 $descriptionOut = '';
137 $codeBlockOn = false;
140 foreach ($descriptionLines as $descriptionLine) {
141 // Detect line of code: starting with 4 spaces,
142 // except lists which can start with +/*/- or `2.` after spaces.
143 $codeLineOn = preg_match('/^ +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
144 // Detect and toggle block of code
146 $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
147 } elseif (preg_match('/^```/', $descriptionLine) > 0) {
148 $codeBlockOn = false;
151 if (!$codeBlockOn && !$codeLineOn) {
152 $descriptionLine = preg_replace($regex, $replacement, $descriptionLine);
155 $descriptionOut .= $descriptionLine;
156 if ($lineCount++
< count($descriptionLines) - 1) {
157 $descriptionOut .= PHP_EOL
;
161 return $descriptionOut;
165 * Remove dangerous HTML tags (tags, iframe, etc.).
166 * Doesn't affect <code> content (already escaped by Parsedown).
168 * @param string $description input description text.
170 * @return string given string escaped.
172 protected function sanitizeHtml($description)
182 foreach ($escapeTags as $tag) {
183 $description = preg_replace_callback(
184 '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
186 return escape($match[0]);
191 $description = preg_replace(
192 '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',