From 9ef8555ad298668bcb8537ccdd2ab6560f44177f Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Tue, 19 Jan 2021 17:49:19 +0100 Subject: Support search highlights when matching URL content DefaultFormatter: - format 'a' tag content and not href attribute - format hashtags properly Markdown(Extra)Formatter: - Extend Parsedown to format highlight properly: https://github.com/erusev/parsedown/wiki/Tutorial:-Create-Extensions Fixes #1681 --- application/bookmark/LinkUtils.php | 34 +++++++++++++-- application/formatter/BookmarkDefaultFormatter.php | 4 +- .../formatter/BookmarkMarkdownExtraFormatter.php | 4 +- .../formatter/BookmarkMarkdownFormatter.php | 19 ++++++-- .../formatter/Parsedown/ShaarliParsedown.php | 10 +++++ .../formatter/Parsedown/ShaarliParsedownExtra.php | 10 +++++ .../formatter/Parsedown/ShaarliParsedownTrait.php | 50 ++++++++++++++++++++++ .../controller/visitor/BookmarkListController.php | 2 + tests/formatter/BookmarkDefaultFormatterTest.php | 11 ++++- tests/formatter/BookmarkMarkdownFormatterTest.php | 43 +++++++++++++++++++ 10 files changed, 173 insertions(+), 14 deletions(-) create mode 100644 application/formatter/Parsedown/ShaarliParsedown.php create mode 100644 application/formatter/Parsedown/ShaarliParsedownExtra.php create mode 100644 application/formatter/Parsedown/ShaarliParsedownTrait.php diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index 0ab2d213..8fa2953a 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php @@ -1,6 +1,7 @@ $1', $text); + $format = function (array $match): string { + return '' . $match[1] . '' + ; + }; + + return preg_replace_callback($regex, $format, $text); } /** @@ -111,6 +123,9 @@ function text2clickable($text) */ function hashtag_autolink($description, $indexUrl = '') { + $tokens = '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN . ')' . + '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE . ')' + ; /* * To support unicode: http://stackoverflow.com/a/35498078/1484919 * \p{Pc} - to match underscore @@ -118,9 +133,20 @@ function hashtag_autolink($description, $indexUrl = '') * \p{L} - letter from any language * \p{Mn} - any non marking space (accents, umlauts, etc) */ - $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; - $replacement = '$1#$2'; - return preg_replace($regex, $replacement, $description); + $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui'; + $format = function (array $match) use ($indexUrl): string { + $cleanMatch = str_replace( + BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN, + '', + str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[2]) + ); + return $match[1] . '' . + '#' . $match[2] . + ''; + }; + + return preg_replace_callback($regex, $format, $description); } /** diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index 7e0afafc..7e93bf71 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php @@ -12,8 +12,8 @@ namespace Shaarli\Formatter; */ class BookmarkDefaultFormatter extends BookmarkFormatter { - protected const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT'; - protected const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|'; + public const SEARCH_HIGHLIGHT_OPEN = '||O_HIGHLIGHT'; + public const SEARCH_HIGHLIGHT_CLOSE = '||C_HIGHLIGHT'; /** * @inheritdoc diff --git a/application/formatter/BookmarkMarkdownExtraFormatter.php b/application/formatter/BookmarkMarkdownExtraFormatter.php index 0694b23f..da539bfd 100644 --- a/application/formatter/BookmarkMarkdownExtraFormatter.php +++ b/application/formatter/BookmarkMarkdownExtraFormatter.php @@ -3,6 +3,7 @@ namespace Shaarli\Formatter; use Shaarli\Config\ConfigManager; +use Shaarli\Formatter\Parsedown\ShaarliParsedownExtra; /** * Class BookmarkMarkdownExtraFormatter @@ -18,7 +19,6 @@ class BookmarkMarkdownExtraFormatter extends BookmarkMarkdownFormatter public function __construct(ConfigManager $conf, bool $isLoggedIn) { parent::__construct($conf, $isLoggedIn); - - $this->parsedown = new \ParsedownExtra(); + $this->parsedown = new ShaarliParsedownExtra(); } } diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index ee4e8dca..d4dccee6 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -3,6 +3,7 @@ namespace Shaarli\Formatter; use Shaarli\Config\ConfigManager; +use Shaarli\Formatter\Parsedown\ShaarliParsedown; /** * Class BookmarkMarkdownFormatter @@ -42,7 +43,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter { parent::__construct($conf, $isLoggedIn); - $this->parsedown = new \Parsedown(); + $this->parsedown = new ShaarliParsedown(); $this->escape = $conf->get('security.markdown_escape', true); $this->allowedProtocols = $conf->get('security.allowed_protocols', []); } @@ -128,6 +129,9 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter protected function formatHashTags($description) { $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; + $tokens = '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN . ')' . + '(?:' . BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE . ')' + ; /* * To support unicode: http://stackoverflow.com/a/35498078/1484919 @@ -136,8 +140,15 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter * \p{L} - letter from any language * \p{Mn} - any non marking space (accents, umlauts, etc) */ - $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; - $replacement = '$1[#$2](' . $indexUrl . './add-tag/$2)'; + $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}' . $tokens . ']+)/mui'; + $replacement = function (array $match) use ($indexUrl): string { + $cleanMatch = str_replace( + BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_OPEN, + '', + str_replace(BookmarkDefaultFormatter::SEARCH_HIGHLIGHT_CLOSE, '', $match[2]) + ); + return $match[1] . '[#' . $match[2] . '](' . $indexUrl . './add-tag/' . $cleanMatch . ')'; + }; $descriptionLines = explode(PHP_EOL, $description); $descriptionOut = ''; @@ -156,7 +167,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter } if (!$codeBlockOn && !$codeLineOn) { - $descriptionLine = preg_replace($regex, $replacement, $descriptionLine); + $descriptionLine = preg_replace_callback($regex, $replacement, $descriptionLine); } $descriptionOut .= $descriptionLine; diff --git a/application/formatter/Parsedown/ShaarliParsedown.php b/application/formatter/Parsedown/ShaarliParsedown.php new file mode 100644 index 00000000..d577bdfa --- /dev/null +++ b/application/formatter/Parsedown/ShaarliParsedown.php @@ -0,0 +1,10 @@ +shaarliFormatLink(parent::inlineLink($excerpt), true); + } + + protected function inlineUrl($excerpt) + { + return $this->shaarliFormatLink(parent::inlineUrl($excerpt), false); + } + + protected function shaarliFormatLink(?array $link, bool $fullWrap): ?array + { + if ( + is_array($link) + && strpos($link['element']['attributes']['href'], Formatter::SEARCH_HIGHLIGHT_OPEN) !== false + && strpos($link['element']['attributes']['href'], Formatter::SEARCH_HIGHLIGHT_CLOSE) !== false + ) { + $link['element']['attributes']['href'] = $this->shaarliRemoveSearchTokens( + $link['element']['attributes']['href'] + ); + + if ($fullWrap) { + $link['element']['text'] = Formatter::SEARCH_HIGHLIGHT_OPEN . + $link['element']['text'] . + Formatter::SEARCH_HIGHLIGHT_CLOSE + ; + } + } + + return $link; + } + + protected function shaarliRemoveSearchTokens(string $entry): string + { + $entry = str_replace(Formatter::SEARCH_HIGHLIGHT_OPEN, '', $entry); + $entry = str_replace(Formatter::SEARCH_HIGHLIGHT_CLOSE, '', $entry); + + return $entry; + } +} diff --git a/application/front/controller/visitor/BookmarkListController.php b/application/front/controller/visitor/BookmarkListController.php index fe8231be..106440b6 100644 --- a/application/front/controller/visitor/BookmarkListController.php +++ b/application/front/controller/visitor/BookmarkListController.php @@ -33,6 +33,7 @@ class BookmarkListController extends ShaarliVisitorController $formatter = $this->container->formatterFactory->getFormatter(); $formatter->addContextData('base_path', $this->container->basePath); + $formatter->addContextData('index_url', index_url($this->container->environment)); $searchTags = normalize_spaces($request->getParam('searchtags') ?? ''); $searchTerm = escape(normalize_spaces($request->getParam('searchterm') ?? '')); @@ -157,6 +158,7 @@ class BookmarkListController extends ShaarliVisitorController $formatter = $this->container->formatterFactory->getFormatter(); $formatter->addContextData('base_path', $this->container->basePath); + $formatter->addContextData('index_url', index_url($this->container->environment)); $data = array_merge( $this->initializeTemplateVars(), diff --git a/tests/formatter/BookmarkDefaultFormatterTest.php b/tests/formatter/BookmarkDefaultFormatterTest.php index 4fcc5dd1..983960b6 100644 --- a/tests/formatter/BookmarkDefaultFormatterTest.php +++ b/tests/formatter/BookmarkDefaultFormatterTest.php @@ -211,13 +211,17 @@ class BookmarkDefaultFormatterTest extends TestCase $this->formatter = new BookmarkDefaultFormatter($this->conf, false); $bookmark = new Bookmark(); - $bookmark->setDescription('This guide extends and expands on PSR-1, the basic coding standard.'); + $bookmark->setDescription( + 'This guide extends and expands on PSR-1, the basic coding standard.' . PHP_EOL . + 'https://www.php-fig.org/psr/psr-1/' + ); $bookmark->addAdditionalContentEntry( 'search_highlight', ['description' => [ ['start' => 0, 'end' => 10], // "This guide" ['start' => 45, 'end' => 50], // basic ['start' => 58, 'end' => 67], // standard. + ['start' => 84, 'end' => 87], // fig ]] ); @@ -226,7 +230,10 @@ class BookmarkDefaultFormatterTest extends TestCase $this->assertSame( 'This guide extends and expands on PSR-1, the ' . 'basic coding ' . - 'standard.', + 'standard.
' . PHP_EOL . + '' . + 'https://www.php-fig.org/psr/psr-1/' . + '', $link['description'] ); } diff --git a/tests/formatter/BookmarkMarkdownFormatterTest.php b/tests/formatter/BookmarkMarkdownFormatterTest.php index ab6b4080..32f7b444 100644 --- a/tests/formatter/BookmarkMarkdownFormatterTest.php +++ b/tests/formatter/BookmarkMarkdownFormatterTest.php @@ -132,6 +132,49 @@ class BookmarkMarkdownFormatterTest extends TestCase $this->assertEquals($description, $link['description']); } + /** + * Make sure that the description is properly formatted by the default formatter. + */ + public function testFormatDescriptionWithSearchHighlight() + { + $description = 'This a description'. PHP_EOL; + $description .= 'text https://sub.domain.tld?query=here&for=real#hash more text'. PHP_EOL; + $description .= 'Also, there is an #hashtag added'. PHP_EOL; + $description .= ' A N D KEEP SPACES ! '. PHP_EOL; + $description .= 'And [yet another link](https://other.domain.tld)'. PHP_EOL; + + $bookmark = new Bookmark(); + $bookmark->setDescription($description); + $bookmark->addAdditionalContentEntry( + 'search_highlight', + ['description' => [ + ['start' => 18, 'end' => 26], // cription + ['start' => 49, 'end' => 52], // sub + ['start' => 84, 'end' => 88], // hash + ['start' => 118, 'end' => 123], // hasht + ['start' => 203, 'end' => 215], // other.domain + ]] + ); + + $link = $this->formatter->format($bookmark); + + $description = '

'; + $description .= 'This a <strong>description</strong>
' . + PHP_EOL; + $url = 'https://sub.domain.tld?query=here&for=real#hash'; + $highlighted = 'https://sub.domain.tld'; + $highlighted .= '?query=here&for=real#hash'; + $description .= 'text '. $highlighted .' more text
'. PHP_EOL; + $description .= 'Also, there is an #hasht' . + 'ag added
'. PHP_EOL; + $description .= 'A N D KEEP SPACES !
' . PHP_EOL; + $description .= 'And ' . + 'yet another link'; + $description .= '

'; + + $this->assertEquals($description, $link['description']); + } + /** * Test formatting URL with an index_url set * It should prepend relative links. -- cgit v1.2.3 From a1cd7a3b2ff32bf6a0f6083007f59104a85eb4bf Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 4 Feb 2021 10:53:23 +0100 Subject: ShaarliParsedown: add PHPDoc/comments --- .../formatter/Parsedown/ShaarliParsedown.php | 5 ++++ .../formatter/Parsedown/ShaarliParsedownExtra.php | 5 ++++ .../formatter/Parsedown/ShaarliParsedownTrait.php | 35 ++++++++++++++++++++-- 3 files changed, 43 insertions(+), 2 deletions(-) diff --git a/application/formatter/Parsedown/ShaarliParsedown.php b/application/formatter/Parsedown/ShaarliParsedown.php index d577bdfa..8eb48fda 100644 --- a/application/formatter/Parsedown/ShaarliParsedown.php +++ b/application/formatter/Parsedown/ShaarliParsedown.php @@ -4,6 +4,11 @@ declare(strict_types=1); namespace Shaarli\Formatter\Parsedown; +/** + * Parsedown extension for Shaarli. + * + * Extension for both Parsedown and ParsedownExtra centralized in ShaarliParsedownTrait. + */ class ShaarliParsedown extends \Parsedown { use ShaarliParsedownTrait; diff --git a/application/formatter/Parsedown/ShaarliParsedownExtra.php b/application/formatter/Parsedown/ShaarliParsedownExtra.php index 92ad26ca..15a35da4 100644 --- a/application/formatter/Parsedown/ShaarliParsedownExtra.php +++ b/application/formatter/Parsedown/ShaarliParsedownExtra.php @@ -4,6 +4,11 @@ declare(strict_types=1); namespace Shaarli\Formatter\Parsedown; +/** + * ParsedownExtra extension for Shaarli. + * + * Extension for both Parsedown and ParsedownExtra centralized in ShaarliParsedownTrait. + */ class ShaarliParsedownExtra extends \ParsedownExtra { use ShaarliParsedownTrait; diff --git a/application/formatter/Parsedown/ShaarliParsedownTrait.php b/application/formatter/Parsedown/ShaarliParsedownTrait.php index e6f4dabb..ed7b1747 100644 --- a/application/formatter/Parsedown/ShaarliParsedownTrait.php +++ b/application/formatter/Parsedown/ShaarliParsedownTrait.php @@ -6,24 +6,48 @@ namespace Shaarli\Formatter\Parsedown; use Shaarli\Formatter\BookmarkDefaultFormatter as Formatter; +/** + * Trait used for Parsedown and ParsedownExtra extension. + * + * Extended: + * - Format links properly in search context + */ trait ShaarliParsedownTrait { + /** + * @inheritDoc + */ protected function inlineLink($excerpt) { return $this->shaarliFormatLink(parent::inlineLink($excerpt), true); } + /** + * @inheritDoc + */ protected function inlineUrl($excerpt) { return $this->shaarliFormatLink(parent::inlineUrl($excerpt), false); } + /** + * Properly format markdown link: + * - remove highlight tags from HREF attribute + * - (optional) add highlight tags to link caption + * + * @param array|null $link Parsedown formatted link array. + * It can be empty. + * @param bool $fullWrap Add highlight tags the whole link caption + * + * @return array|null + */ protected function shaarliFormatLink(?array $link, bool $fullWrap): ?array { + // If open and clean search tokens are found in the link, process. if ( is_array($link) - && strpos($link['element']['attributes']['href'], Formatter::SEARCH_HIGHLIGHT_OPEN) !== false - && strpos($link['element']['attributes']['href'], Formatter::SEARCH_HIGHLIGHT_CLOSE) !== false + && strpos($link['element']['attributes']['href'] ?? '', Formatter::SEARCH_HIGHLIGHT_OPEN) !== false + && strpos($link['element']['attributes']['href'] ?? '', Formatter::SEARCH_HIGHLIGHT_CLOSE) !== false ) { $link['element']['attributes']['href'] = $this->shaarliRemoveSearchTokens( $link['element']['attributes']['href'] @@ -40,6 +64,13 @@ trait ShaarliParsedownTrait return $link; } + /** + * Remove open and close tags from provided string. + * + * @param string $entry input + * + * @return string Striped input + */ protected function shaarliRemoveSearchTokens(string $entry): string { $entry = str_replace(Formatter::SEARCH_HIGHLIGHT_OPEN, '', $entry); -- cgit v1.2.3