From 8fabcd0224b1122a48b495326854bb3562cd2e9d Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 27 Aug 2020 15:25:18 +0200 Subject: Add Markdown Extra formatter Library: [Parsedown Extra](https://github.com/erusev/parsedown-extra) Also sort dependencies alphabetically. Fixes #1169 --- .../formatter/BookmarkMarkdownExtraFormatter.php | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 application/formatter/BookmarkMarkdownExtraFormatter.php (limited to 'application/formatter') diff --git a/application/formatter/BookmarkMarkdownExtraFormatter.php b/application/formatter/BookmarkMarkdownExtraFormatter.php new file mode 100644 index 00000000..0694b23f --- /dev/null +++ b/application/formatter/BookmarkMarkdownExtraFormatter.php @@ -0,0 +1,24 @@ +parsedown = new \ParsedownExtra(); + } +} -- cgit v1.2.3 From 4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Mon, 12 Oct 2020 11:35:55 +0200 Subject: Feature: highlight fulltext search results How it works: 1. when a fulltext search is made, Shaarli looks for the first occurence position of every term matching the search. No change here, but we store these positions in an array, in Bookmark's additionalContent. 2. when formatting bookmarks (through BookmarkFormatter implementation): 1. first we insert specific tokens at every search result positions 2. we format the content (escape HTML, apply markdown, etc.) 3. as a last step, we replace our token with displayable span elements Cons: this tightens coupling between search filters and formatters Pros: it was absolutely necessary not to perform the search twice. this solution has close to no impact on performances. Fixes #205 --- application/formatter/BookmarkDefaultFormatter.php | 132 ++++++++++++++++++++- application/formatter/BookmarkFormatter.php | 79 ++++++++++-- .../formatter/BookmarkMarkdownFormatter.php | 6 +- 3 files changed, 203 insertions(+), 14 deletions(-) (limited to 'application/formatter') diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index 9d4a0fa0..d58a5e39 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php @@ -12,10 +12,13 @@ namespace Shaarli\Formatter; */ class BookmarkDefaultFormatter extends BookmarkFormatter { + const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT'; + const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|'; + /** * @inheritdoc */ - public function formatTitle($bookmark) + protected function formatTitle($bookmark) { return escape($bookmark->getTitle()); } @@ -23,10 +26,28 @@ class BookmarkDefaultFormatter extends BookmarkFormatter /** * @inheritdoc */ - public function formatDescription($bookmark) + protected function formatTitleHtml($bookmark) + { + $title = $this->tokenizeSearchHighlightField( + $bookmark->getTitle() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['title'] ?? [] + ); + + return $this->replaceTokens(escape($title)); + } + + /** + * @inheritdoc + */ + protected function formatDescription($bookmark) { $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; - return format_description(escape($bookmark->getDescription()), $indexUrl); + $description = $this->tokenizeSearchHighlightField( + $bookmark->getDescription() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] + ); + + return $this->replaceTokens(format_description(escape($description), $indexUrl)); } /** @@ -40,7 +61,27 @@ class BookmarkDefaultFormatter extends BookmarkFormatter /** * @inheritdoc */ - public function formatTagString($bookmark) + protected function formatTagListHtml($bookmark) + { + if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) { + return $this->formatTagList($bookmark); + } + + $tags = $this->tokenizeSearchHighlightField( + $bookmark->getTagsString(), + $bookmark->getAdditionalContentEntry('search_highlight')['tags'] + ); + $tags = $this->filterTagList(explode(' ', $tags)); + $tags = escape($tags); + $tags = $this->replaceTokensArray($tags); + + return $tags; + } + + /** + * @inheritdoc + */ + protected function formatTagString($bookmark) { return implode(' ', $this->formatTagList($bookmark)); } @@ -48,7 +89,7 @@ class BookmarkDefaultFormatter extends BookmarkFormatter /** * @inheritdoc */ - public function formatUrl($bookmark) + protected function formatUrl($bookmark) { if ($bookmark->isNote() && isset($this->contextData['index_url'])) { return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/')); @@ -77,6 +118,19 @@ class BookmarkDefaultFormatter extends BookmarkFormatter return escape($bookmark->getUrl()); } + /** + * @inheritdoc + */ + protected function formatUrlHtml($bookmark) + { + $url = $this->tokenizeSearchHighlightField( + $bookmark->getUrl() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['url'] ?? [] + ); + + return $this->replaceTokens(escape($url)); + } + /** * @inheritdoc */ @@ -84,4 +138,72 @@ class BookmarkDefaultFormatter extends BookmarkFormatter { return escape($bookmark->getThumbnail()); } + + /** + * Insert search highlight token in provided field content based on a list of search result positions + * + * @param string $fieldContent + * @param array|null $positions List of of search results with 'start' and 'end' positions. + * + * @return string Updated $fieldContent. + */ + protected function tokenizeSearchHighlightField(string $fieldContent, ?array $positions): string + { + if (empty($positions)) { + return $fieldContent; + } + + $insertedTokens = 0; + $tokenLength = strlen(static::SEARCH_HIGHLIGHT_OPEN); + foreach ($positions as $position) { + $position = [ + 'start' => $position['start'] + ($insertedTokens * $tokenLength), + 'end' => $position['end'] + ($insertedTokens * $tokenLength), + ]; + + $content = mb_substr($fieldContent, 0, $position['start']); + $content .= static::SEARCH_HIGHLIGHT_OPEN; + $content .= mb_substr($fieldContent, $position['start'], $position['end'] - $position['start']); + $content .= static::SEARCH_HIGHLIGHT_CLOSE; + $content .= mb_substr($fieldContent, $position['end']); + + $fieldContent = $content; + + $insertedTokens += 2; + } + + return $fieldContent; + } + + /** + * Replace search highlight tokens with HTML highlighted span. + * + * @param string $fieldContent + * + * @return string updated content. + */ + protected function replaceTokens(string $fieldContent): string + { + return str_replace( + [static::SEARCH_HIGHLIGHT_OPEN, static::SEARCH_HIGHLIGHT_CLOSE], + ['', ''], + $fieldContent + ); + } + + /** + * Apply replaceTokens to an array of content strings. + * + * @param string[] $fieldContents + * + * @return array + */ + protected function replaceTokensArray(array $fieldContents): array + { + foreach ($fieldContents as &$entry) { + $entry = $this->replaceTokens($entry); + } + + return $fieldContents; + } } diff --git a/application/formatter/BookmarkFormatter.php b/application/formatter/BookmarkFormatter.php index 0042dafe..e1b7f705 100644 --- a/application/formatter/BookmarkFormatter.php +++ b/application/formatter/BookmarkFormatter.php @@ -2,7 +2,7 @@ namespace Shaarli\Formatter; -use DateTime; +use DateTimeInterface; use Shaarli\Bookmark\Bookmark; use Shaarli\Config\ConfigManager; @@ -11,6 +11,29 @@ use Shaarli\Config\ConfigManager; * * Abstract class processing all bookmark attributes through methods designed to be overridden. * + * List of available formatted fields: + * - id ID + * - shorturl Unique identifier, used in permalinks + * - url URL, can be altered in some way, e.g. passing through an HTTP reverse proxy + * - real_url (legacy) same as `url` + * - url_html URL to be displayed in HTML content (it can contain HTML tags) + * - title Title + * - title_html Title to be displayed in HTML content (it can contain HTML tags) + * - description Description content. It most likely contains HTML tags + * - thumbnail Thumbnail: path to local cache file, false if there is none, null if hasn't been retrieved + * - taglist List of tags (array) + * - taglist_urlencoded List of tags (array) URL encoded: it must be used to create a link to a URL containing a tag + * - taglist_html List of tags (array) to be displayed in HTML content (it can contain HTML tags) + * - tags Tags separated by a single whitespace + * - tags_urlencoded Tags separated by a single whitespace, URL encoded: must be used to create a link + * - sticky Is sticky (bool) + * - private Is private (bool) + * - class Additional CSS class + * - created Creation DateTime + * - updated Last edit DateTime + * - timestamp Creation timestamp + * - updated_timestamp Last edit timestamp + * * @package Shaarli\Formatter */ abstract class BookmarkFormatter @@ -55,13 +78,16 @@ abstract class BookmarkFormatter $out['shorturl'] = $this->formatShortUrl($bookmark); $out['url'] = $this->formatUrl($bookmark); $out['real_url'] = $this->formatRealUrl($bookmark); + $out['url_html'] = $this->formatUrlHtml($bookmark); $out['title'] = $this->formatTitle($bookmark); + $out['title_html'] = $this->formatTitleHtml($bookmark); $out['description'] = $this->formatDescription($bookmark); $out['thumbnail'] = $this->formatThumbnail($bookmark); - $out['urlencoded_taglist'] = $this->formatUrlEncodedTagList($bookmark); $out['taglist'] = $this->formatTagList($bookmark); - $out['urlencoded_tags'] = $this->formatUrlEncodedTagString($bookmark); + $out['taglist_urlencoded'] = $this->formatTagListUrlEncoded($bookmark); + $out['taglist_html'] = $this->formatTagListHtml($bookmark); $out['tags'] = $this->formatTagString($bookmark); + $out['tags_urlencoded'] = $this->formatTagStringUrlEncoded($bookmark); $out['sticky'] = $bookmark->isSticky(); $out['private'] = $bookmark->isPrivate(); $out['class'] = $this->formatClass($bookmark); @@ -69,6 +95,7 @@ abstract class BookmarkFormatter $out['updated'] = $this->formatUpdated($bookmark); $out['timestamp'] = $this->formatCreatedTimestamp($bookmark); $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark); + return $out; } @@ -135,6 +162,18 @@ abstract class BookmarkFormatter return $this->formatUrl($bookmark); } + /** + * Format Url Html: to be displayed in HTML content, it can contains HTML tags. + * + * @param Bookmark $bookmark instance + * + * @return string formatted Url HTML + */ + protected function formatUrlHtml($bookmark) + { + return $this->formatUrl($bookmark); + } + /** * Format Title * @@ -147,6 +186,18 @@ abstract class BookmarkFormatter return $bookmark->getTitle(); } + /** + * Format Title HTML: to be displayed in HTML content, it can contains HTML tags. + * + * @param Bookmark $bookmark instance + * + * @return string formatted Title + */ + protected function formatTitleHtml($bookmark) + { + return $bookmark->getTitle(); + } + /** * Format Description * @@ -190,11 +241,23 @@ abstract class BookmarkFormatter * * @return array formatted Tags */ - protected function formatUrlEncodedTagList($bookmark) + protected function formatTagListUrlEncoded($bookmark) { return array_map('urlencode', $this->filterTagList($bookmark->getTags())); } + /** + * Format Tags HTML: to be displayed in HTML content, it can contains HTML tags. + * + * @param Bookmark $bookmark instance + * + * @return array formatted Tags + */ + protected function formatTagListHtml($bookmark) + { + return $this->formatTagList($bookmark); + } + /** * Format TagString * @@ -214,9 +277,9 @@ abstract class BookmarkFormatter * * @return string formatted TagString */ - protected function formatUrlEncodedTagString($bookmark) + protected function formatTagStringUrlEncoded($bookmark) { - return implode(' ', $this->formatUrlEncodedTagList($bookmark)); + return implode(' ', $this->formatTagListUrlEncoded($bookmark)); } /** @@ -237,7 +300,7 @@ abstract class BookmarkFormatter * * @param Bookmark $bookmark instance * - * @return DateTime instance + * @return DateTimeInterface instance */ protected function formatCreated(Bookmark $bookmark) { @@ -249,7 +312,7 @@ abstract class BookmarkFormatter * * @param Bookmark $bookmark instance * - * @return DateTime instance + * @return DateTimeInterface instance */ protected function formatUpdated(Bookmark $bookmark) { diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index 5d244d4c..f7714be9 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -56,7 +56,10 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter return parent::formatDescription($bookmark); } - $processedDescription = $bookmark->getDescription(); + $processedDescription = $this->tokenizeSearchHighlightField( + $bookmark->getDescription() ?? '', + $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] + ); $processedDescription = $this->filterProtocols($processedDescription); $processedDescription = $this->formatHashTags($processedDescription); $processedDescription = $this->reverseEscapedHtml($processedDescription); @@ -65,6 +68,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter ->setBreaksEnabled(true) ->text($processedDescription); $processedDescription = $this->sanitizeHtml($processedDescription); + $processedDescription = $this->replaceTokens($processedDescription); if (!empty($processedDescription)) { $processedDescription = '
'. $processedDescription . '
'; -- cgit v1.2.3 From 740b32b520e6b1723512c6f9b78cef6575b1725b Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Tue, 3 Nov 2020 12:38:38 +0100 Subject: Default formatter: add a setting to disable auto-linkification + update documentation + single parameter for both URL and hashtags Fixes #1094 --- application/formatter/BookmarkDefaultFormatter.php | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'application/formatter') diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index d58a5e39..149a3eb9 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php @@ -46,8 +46,13 @@ class BookmarkDefaultFormatter extends BookmarkFormatter $bookmark->getDescription() ?? '', $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] ); + $description = format_description( + escape($description), + $indexUrl, + $this->conf->get('formatter_settings.autolink', true) + ); - return $this->replaceTokens(format_description(escape($description), $indexUrl)); + return $this->replaceTokens($description); } /** -- cgit v1.2.3 From b3bd8c3e8d367975980043e772f7cd78b7f96bc6 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 22 Oct 2020 16:21:03 +0200 Subject: Feature: support any tag separator So it allows to have multiple words tags. Breaking change: commas ',' are no longer a default separator. Fixes #594 --- application/formatter/BookmarkDefaultFormatter.php | 7 ++++--- application/formatter/BookmarkFormatter.php | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'application/formatter') diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index 149a3eb9..51bea0f1 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php @@ -68,15 +68,16 @@ class BookmarkDefaultFormatter extends BookmarkFormatter */ protected function formatTagListHtml($bookmark) { + $tagsSeparator = $this->conf->get('general.tags_separator', ' '); if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) { return $this->formatTagList($bookmark); } $tags = $this->tokenizeSearchHighlightField( - $bookmark->getTagsString(), + $bookmark->getTagsString($tagsSeparator), $bookmark->getAdditionalContentEntry('search_highlight')['tags'] ); - $tags = $this->filterTagList(explode(' ', $tags)); + $tags = $this->filterTagList(tags_str2array($tags, $tagsSeparator)); $tags = escape($tags); $tags = $this->replaceTokensArray($tags); @@ -88,7 +89,7 @@ class BookmarkDefaultFormatter extends BookmarkFormatter */ protected function formatTagString($bookmark) { - return implode(' ', $this->formatTagList($bookmark)); + return implode($this->conf->get('general.tags_separator'), $this->formatTagList($bookmark)); } /** diff --git a/application/formatter/BookmarkFormatter.php b/application/formatter/BookmarkFormatter.php index e1b7f705..124ce78b 100644 --- a/application/formatter/BookmarkFormatter.php +++ b/application/formatter/BookmarkFormatter.php @@ -267,7 +267,7 @@ abstract class BookmarkFormatter */ protected function formatTagString($bookmark) { - return implode(' ', $this->formatTagList($bookmark)); + return implode($this->conf->get('general.tags_separator', ' '), $this->formatTagList($bookmark)); } /** @@ -351,6 +351,7 @@ abstract class BookmarkFormatter /** * Format tag list, e.g. remove private tags if the user is not logged in. + * TODO: this method is called multiple time to format tags, the result should be cached. * * @param array $tags * -- cgit v1.2.3 From 53054b2bf6a919fd4ff9b44b6ad1986f21f488b6 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Tue, 22 Sep 2020 20:25:47 +0200 Subject: Apply PHP Code Beautifier on source code for linter automatic fixes --- application/formatter/BookmarkMarkdownFormatter.php | 12 ++++++------ application/formatter/BookmarkRawFormatter.php | 4 +++- application/formatter/FormatterFactory.php | 2 +- 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'application/formatter') diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index f7714be9..052333ca 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -71,7 +71,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter $processedDescription = $this->replaceTokens($processedDescription); if (!empty($processedDescription)) { - $processedDescription = '
'. $processedDescription . '
'; + $processedDescription = '
' . $processedDescription . '
'; } return $processedDescription; @@ -110,7 +110,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter function ($match) use ($allowedProtocols, $indexUrl) { $link = startsWith($match[1], '?') || startsWith($match[1], '/') ? $indexUrl : ''; $link .= whitelist_protocols($match[1], $allowedProtocols); - return ']('. $link.')'; + return '](' . $link . ')'; }, $description ); @@ -137,7 +137,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter * \p{Mn} - any non marking space (accents, umlauts, etc) */ $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; - $replacement = '$1[#$2]('. $indexUrl .'./add-tag/$2)'; + $replacement = '$1[#$2](' . $indexUrl . './add-tag/$2)'; $descriptionLines = explode(PHP_EOL, $description); $descriptionOut = ''; @@ -178,17 +178,17 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter */ protected function sanitizeHtml($description) { - $escapeTags = array( + $escapeTags = [ 'script', 'style', 'link', 'iframe', 'frameset', 'frame', - ); + ]; foreach ($escapeTags as $tag) { $description = preg_replace_callback( - '#<\s*'. $tag .'[^>]*>(.*]*>)?#is', + '#<\s*' . $tag . '[^>]*>(.*]*>)?#is', function ($match) { return escape($match[0]); }, diff --git a/application/formatter/BookmarkRawFormatter.php b/application/formatter/BookmarkRawFormatter.php index bc372273..4ff07cdf 100644 --- a/application/formatter/BookmarkRawFormatter.php +++ b/application/formatter/BookmarkRawFormatter.php @@ -10,4 +10,6 @@ namespace Shaarli\Formatter; * * @package Shaarli\Formatter */ -class BookmarkRawFormatter extends BookmarkFormatter {} +class BookmarkRawFormatter extends BookmarkFormatter +{ +} diff --git a/application/formatter/FormatterFactory.php b/application/formatter/FormatterFactory.php index a029579f..bb865aed 100644 --- a/application/formatter/FormatterFactory.php +++ b/application/formatter/FormatterFactory.php @@ -41,7 +41,7 @@ class FormatterFactory public function getFormatter(string $type = null): BookmarkFormatter { $type = $type ? $type : $this->conf->get('formatter', 'default'); - $className = '\\Shaarli\\Formatter\\Bookmark'. ucfirst($type) .'Formatter'; + $className = '\\Shaarli\\Formatter\\Bookmark' . ucfirst($type) . 'Formatter'; if (!class_exists($className)) { $className = '\\Shaarli\\Formatter\\BookmarkDefaultFormatter'; } -- cgit v1.2.3 From b99e00f7cd5f7e2090f44cd97bfb426db55340c2 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Sun, 8 Nov 2020 15:02:45 +0100 Subject: Manually fix remaining PHPCS errors --- application/formatter/BookmarkDefaultFormatter.php | 4 ++-- application/formatter/BookmarkMarkdownFormatter.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'application/formatter') diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index 51bea0f1..7e0afafc 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php @@ -12,8 +12,8 @@ namespace Shaarli\Formatter; */ class BookmarkDefaultFormatter extends BookmarkFormatter { - const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT'; - const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|'; + protected const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT'; + protected const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|'; /** * @inheritdoc diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index 052333ca..ee4e8dca 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php @@ -16,7 +16,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter /** * When this tag is present in a bookmark, its description should not be processed with Markdown */ - const NO_MD_TAG = 'nomarkdown'; + public const NO_MD_TAG = 'nomarkdown'; /** @var \Parsedown instance */ protected $parsedown; -- cgit v1.2.3