]> git.immae.eu Git - github/shaarli/Shaarli.git/blobdiff - application/formatter/BookmarkMarkdownFormatter.php
Introduce Bookmark object and Service layer to retrieve them
[github/shaarli/Shaarli.git] / application / formatter / BookmarkMarkdownFormatter.php
diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php
new file mode 100644 (file)
index 0000000..f60c61f
--- /dev/null
@@ -0,0 +1,198 @@
+<?php
+
+namespace Shaarli\Formatter;
+
+use Shaarli\Config\ConfigManager;
+
+/**
+ * Class BookmarkMarkdownFormatter
+ *
+ * Format bookmark description into Markdown format.
+ *
+ * @package Shaarli\Formatter
+ */
+class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
+{
+    /**
+     * When this tag is present in a bookmark, its description should not be processed with Markdown
+     */
+    const NO_MD_TAG = 'nomarkdown';
+
+    /** @var \Parsedown instance */
+    protected $parsedown;
+
+    /** @var bool used to escape HTML in Markdown or not.
+     *            It MUST be set to true for shared instance as HTML content can
+     *            introduce XSS vulnerabilities.
+     */
+    protected $escape;
+
+    /**
+     * @var array List of allowed protocols for links inside bookmark's description.
+     */
+    protected $allowedProtocols;
+
+    /**
+     * LinkMarkdownFormatter constructor.
+     *
+     * @param ConfigManager $conf instance
+     */
+    public function __construct(ConfigManager $conf)
+    {
+        parent::__construct($conf);
+        $this->parsedown = new \Parsedown();
+        $this->escape = $conf->get('security.markdown_escape', true);
+        $this->allowedProtocols = $conf->get('security.allowed_protocols', []);
+    }
+
+    /**
+     * @inheritdoc
+     */
+    public function formatDescription($bookmark)
+    {
+        if (in_array(self::NO_MD_TAG, $bookmark->getTags())) {
+            return parent::formatDescription($bookmark);
+        }
+
+        $processedDescription = $bookmark->getDescription();
+        $processedDescription = $this->filterProtocols($processedDescription);
+        $processedDescription = $this->formatHashTags($processedDescription);
+        $processedDescription = $this->parsedown
+            ->setMarkupEscaped($this->escape)
+            ->setBreaksEnabled(true)
+            ->text($processedDescription);
+        $processedDescription = $this->sanitizeHtml($processedDescription);
+
+        if (!empty($processedDescription)) {
+            $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
+        }
+
+        return $processedDescription;
+    }
+
+    /**
+     * Remove the NO markdown tag if it is present
+     *
+     * @inheritdoc
+     */
+    protected function formatTagList($bookmark)
+    {
+        $out = parent::formatTagList($bookmark);
+        if (($pos = array_search(self::NO_MD_TAG, $out)) !== false) {
+            unset($out[$pos]);
+            return array_values($out);
+        }
+        return $out;
+    }
+
+    /**
+     * Replace not whitelisted protocols with http:// in given description.
+     * Also adds `index_url` to relative links if it's specified
+     *
+     * @param string $description      input description text.
+     *
+     * @return string $description without malicious link.
+     */
+    protected function filterProtocols($description)
+    {
+        $allowedProtocols = $this->allowedProtocols;
+        $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
+
+        return preg_replace_callback(
+            '#]\((.*?)\)#is',
+            function ($match) use ($allowedProtocols, $indexUrl) {
+                $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : '';
+                $link .= whitelist_protocols($match[1], $allowedProtocols);
+                return ']('. $link.')';
+            },
+            $description
+        );
+    }
+
+    /**
+     * Replace hashtag in Markdown links format
+     * E.g. `#hashtag` becomes `[#hashtag](?addtag=hashtag)`
+     * It includes the index URL if specified.
+     *
+     * @param string $description
+     *
+     * @return string
+     */
+    protected function formatHashTags($description)
+    {
+        $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
+
+        /*
+         * To support unicode: http://stackoverflow.com/a/35498078/1484919
+         * \p{Pc} - to match underscore
+         * \p{N} - numeric character in any script
+         * \p{L} - letter from any language
+         * \p{Mn} - any non marking space (accents, umlauts, etc)
+         */
+        $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
+        $replacement = '$1[#$2]('. $indexUrl .'?addtag=$2)';
+
+        $descriptionLines = explode(PHP_EOL, $description);
+        $descriptionOut = '';
+        $codeBlockOn = false;
+        $lineCount = 0;
+
+        foreach ($descriptionLines as $descriptionLine) {
+            // Detect line of code: starting with 4 spaces,
+            // except lists which can start with +/*/- or `2.` after spaces.
+            $codeLineOn = preg_match('/^    +(?=[^\+\*\-])(?=(?!\d\.).)/', $descriptionLine) > 0;
+            // Detect and toggle block of code
+            if (!$codeBlockOn) {
+                $codeBlockOn = preg_match('/^```/', $descriptionLine) > 0;
+            } elseif (preg_match('/^```/', $descriptionLine) > 0) {
+                $codeBlockOn = false;
+            }
+
+            if (!$codeBlockOn && !$codeLineOn) {
+                $descriptionLine = preg_replace($regex, $replacement, $descriptionLine);
+            }
+
+            $descriptionOut .= $descriptionLine;
+            if ($lineCount++ < count($descriptionLines) - 1) {
+                $descriptionOut .= PHP_EOL;
+            }
+        }
+
+        return $descriptionOut;
+    }
+
+    /**
+     * Remove dangerous HTML tags (tags, iframe, etc.).
+     * Doesn't affect <code> content (already escaped by Parsedown).
+     *
+     * @param string $description input description text.
+     *
+     * @return string given string escaped.
+     */
+    protected function sanitizeHtml($description)
+    {
+        $escapeTags = array(
+            'script',
+            'style',
+            'link',
+            'iframe',
+            'frameset',
+            'frame',
+        );
+        foreach ($escapeTags as $tag) {
+            $description = preg_replace_callback(
+                '#<\s*'. $tag .'[^>]*>(.*</\s*'. $tag .'[^>]*>)?#is',
+                function ($match) {
+                    return escape($match[0]);
+                },
+                $description
+            );
+        }
+        $description = preg_replace(
+            '#(<[^>]+\s)on[a-z]*="?[^ "]*"?#is',
+            '$1',
+            $description
+        );
+        return $description;
+    }
+}