diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-10-12 11:35:55 +0200 |
---|---|---|
committer | ArthurHoaro <arthur@hoa.ro> | 2020-10-16 20:31:12 +0200 |
commit | 4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b (patch) | |
tree | 4deb157f03ce7d5402dbfeb65743951d97e527cf /application/formatter | |
parent | 64cac2562661c55f679dba5a7c308e7764f430b5 (diff) | |
download | Shaarli-4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b.tar.gz Shaarli-4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b.tar.zst Shaarli-4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b.zip |
Feature: highlight fulltext search results
How it works:
1. when a fulltext search is made, Shaarli looks for the first
occurence position of every term matching the search. No change here,
but we store these positions in an array, in Bookmark's additionalContent.
2. when formatting bookmarks (through BookmarkFormatter
implementation):
1. first we insert specific tokens at every search result positions
2. we format the content (escape HTML, apply markdown, etc.)
3. as a last step, we replace our token with displayable span
elements
Cons: this tightens coupling between search filters and formatters
Pros: it was absolutely necessary not to perform the
search twice. this solution has close to no impact on performances.
Fixes #205
Diffstat (limited to 'application/formatter')
-rw-r--r-- | application/formatter/BookmarkDefaultFormatter.php | 132 | ||||
-rw-r--r-- | application/formatter/BookmarkFormatter.php | 79 | ||||
-rw-r--r-- | application/formatter/BookmarkMarkdownFormatter.php | 6 |
3 files changed, 203 insertions, 14 deletions
diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php index 9d4a0fa0..d58a5e39 100644 --- a/application/formatter/BookmarkDefaultFormatter.php +++ b/application/formatter/BookmarkDefaultFormatter.php | |||
@@ -12,10 +12,13 @@ namespace Shaarli\Formatter; | |||
12 | */ | 12 | */ |
13 | class BookmarkDefaultFormatter extends BookmarkFormatter | 13 | class BookmarkDefaultFormatter extends BookmarkFormatter |
14 | { | 14 | { |
15 | const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT'; | ||
16 | const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|'; | ||
17 | |||
15 | /** | 18 | /** |
16 | * @inheritdoc | 19 | * @inheritdoc |
17 | */ | 20 | */ |
18 | public function formatTitle($bookmark) | 21 | protected function formatTitle($bookmark) |
19 | { | 22 | { |
20 | return escape($bookmark->getTitle()); | 23 | return escape($bookmark->getTitle()); |
21 | } | 24 | } |
@@ -23,10 +26,28 @@ class BookmarkDefaultFormatter extends BookmarkFormatter | |||
23 | /** | 26 | /** |
24 | * @inheritdoc | 27 | * @inheritdoc |
25 | */ | 28 | */ |
26 | public function formatDescription($bookmark) | 29 | protected function formatTitleHtml($bookmark) |
30 | { | ||
31 | $title = $this->tokenizeSearchHighlightField( | ||
32 | $bookmark->getTitle() ?? '', | ||
33 | $bookmark->getAdditionalContentEntry('search_highlight')['title'] ?? [] | ||
34 | ); | ||
35 | |||
36 | return $this->replaceTokens(escape($title)); | ||
37 | } | ||
38 | |||
39 | /** | ||
40 | * @inheritdoc | ||
41 | */ | ||
42 | protected function formatDescription($bookmark) | ||
27 | { | 43 | { |
28 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; | 44 | $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; |
29 | return format_description(escape($bookmark->getDescription()), $indexUrl); | 45 | $description = $this->tokenizeSearchHighlightField( |
46 | $bookmark->getDescription() ?? '', | ||
47 | $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] | ||
48 | ); | ||
49 | |||
50 | return $this->replaceTokens(format_description(escape($description), $indexUrl)); | ||
30 | } | 51 | } |
31 | 52 | ||
32 | /** | 53 | /** |
@@ -40,7 +61,27 @@ class BookmarkDefaultFormatter extends BookmarkFormatter | |||
40 | /** | 61 | /** |
41 | * @inheritdoc | 62 | * @inheritdoc |
42 | */ | 63 | */ |
43 | public function formatTagString($bookmark) | 64 | protected function formatTagListHtml($bookmark) |
65 | { | ||
66 | if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) { | ||
67 | return $this->formatTagList($bookmark); | ||
68 | } | ||
69 | |||
70 | $tags = $this->tokenizeSearchHighlightField( | ||
71 | $bookmark->getTagsString(), | ||
72 | $bookmark->getAdditionalContentEntry('search_highlight')['tags'] | ||
73 | ); | ||
74 | $tags = $this->filterTagList(explode(' ', $tags)); | ||
75 | $tags = escape($tags); | ||
76 | $tags = $this->replaceTokensArray($tags); | ||
77 | |||
78 | return $tags; | ||
79 | } | ||
80 | |||
81 | /** | ||
82 | * @inheritdoc | ||
83 | */ | ||
84 | protected function formatTagString($bookmark) | ||
44 | { | 85 | { |
45 | return implode(' ', $this->formatTagList($bookmark)); | 86 | return implode(' ', $this->formatTagList($bookmark)); |
46 | } | 87 | } |
@@ -48,7 +89,7 @@ class BookmarkDefaultFormatter extends BookmarkFormatter | |||
48 | /** | 89 | /** |
49 | * @inheritdoc | 90 | * @inheritdoc |
50 | */ | 91 | */ |
51 | public function formatUrl($bookmark) | 92 | protected function formatUrl($bookmark) |
52 | { | 93 | { |
53 | if ($bookmark->isNote() && isset($this->contextData['index_url'])) { | 94 | if ($bookmark->isNote() && isset($this->contextData['index_url'])) { |
54 | return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/')); | 95 | return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/')); |
@@ -80,8 +121,89 @@ class BookmarkDefaultFormatter extends BookmarkFormatter | |||
80 | /** | 121 | /** |
81 | * @inheritdoc | 122 | * @inheritdoc |
82 | */ | 123 | */ |
124 | protected function formatUrlHtml($bookmark) | ||
125 | { | ||
126 | $url = $this->tokenizeSearchHighlightField( | ||
127 | $bookmark->getUrl() ?? '', | ||
128 | $bookmark->getAdditionalContentEntry('search_highlight')['url'] ?? [] | ||
129 | ); | ||
130 | |||
131 | return $this->replaceTokens(escape($url)); | ||
132 | } | ||
133 | |||
134 | /** | ||
135 | * @inheritdoc | ||
136 | */ | ||
83 | protected function formatThumbnail($bookmark) | 137 | protected function formatThumbnail($bookmark) |
84 | { | 138 | { |
85 | return escape($bookmark->getThumbnail()); | 139 | return escape($bookmark->getThumbnail()); |
86 | } | 140 | } |
141 | |||
142 | /** | ||
143 | * Insert search highlight token in provided field content based on a list of search result positions | ||
144 | * | ||
145 | * @param string $fieldContent | ||
146 | * @param array|null $positions List of of search results with 'start' and 'end' positions. | ||
147 | * | ||
148 | * @return string Updated $fieldContent. | ||
149 | */ | ||
150 | protected function tokenizeSearchHighlightField(string $fieldContent, ?array $positions): string | ||
151 | { | ||
152 | if (empty($positions)) { | ||
153 | return $fieldContent; | ||
154 | } | ||
155 | |||
156 | $insertedTokens = 0; | ||
157 | $tokenLength = strlen(static::SEARCH_HIGHLIGHT_OPEN); | ||
158 | foreach ($positions as $position) { | ||
159 | $position = [ | ||
160 | 'start' => $position['start'] + ($insertedTokens * $tokenLength), | ||
161 | 'end' => $position['end'] + ($insertedTokens * $tokenLength), | ||
162 | ]; | ||
163 | |||
164 | $content = mb_substr($fieldContent, 0, $position['start']); | ||
165 | $content .= static::SEARCH_HIGHLIGHT_OPEN; | ||
166 | $content .= mb_substr($fieldContent, $position['start'], $position['end'] - $position['start']); | ||
167 | $content .= static::SEARCH_HIGHLIGHT_CLOSE; | ||
168 | $content .= mb_substr($fieldContent, $position['end']); | ||
169 | |||
170 | $fieldContent = $content; | ||
171 | |||
172 | $insertedTokens += 2; | ||
173 | } | ||
174 | |||
175 | return $fieldContent; | ||
176 | } | ||
177 | |||
178 | /** | ||
179 | * Replace search highlight tokens with HTML highlighted span. | ||
180 | * | ||
181 | * @param string $fieldContent | ||
182 | * | ||
183 | * @return string updated content. | ||
184 | */ | ||
185 | protected function replaceTokens(string $fieldContent): string | ||
186 | { | ||
187 | return str_replace( | ||
188 | [static::SEARCH_HIGHLIGHT_OPEN, static::SEARCH_HIGHLIGHT_CLOSE], | ||
189 | ['<span class="search-highlight">', '</span>'], | ||
190 | $fieldContent | ||
191 | ); | ||
192 | } | ||
193 | |||
194 | /** | ||
195 | * Apply replaceTokens to an array of content strings. | ||
196 | * | ||
197 | * @param string[] $fieldContents | ||
198 | * | ||
199 | * @return array | ||
200 | */ | ||
201 | protected function replaceTokensArray(array $fieldContents): array | ||
202 | { | ||
203 | foreach ($fieldContents as &$entry) { | ||
204 | $entry = $this->replaceTokens($entry); | ||
205 | } | ||
206 | |||
207 | return $fieldContents; | ||
208 | } | ||
87 | } | 209 | } |
diff --git a/application/formatter/BookmarkFormatter.php b/application/formatter/BookmarkFormatter.php index 0042dafe..e1b7f705 100644 --- a/application/formatter/BookmarkFormatter.php +++ b/application/formatter/BookmarkFormatter.php | |||
@@ -2,7 +2,7 @@ | |||
2 | 2 | ||
3 | namespace Shaarli\Formatter; | 3 | namespace Shaarli\Formatter; |
4 | 4 | ||
5 | use DateTime; | 5 | use DateTimeInterface; |
6 | use Shaarli\Bookmark\Bookmark; | 6 | use Shaarli\Bookmark\Bookmark; |
7 | use Shaarli\Config\ConfigManager; | 7 | use Shaarli\Config\ConfigManager; |
8 | 8 | ||
@@ -11,6 +11,29 @@ use Shaarli\Config\ConfigManager; | |||
11 | * | 11 | * |
12 | * Abstract class processing all bookmark attributes through methods designed to be overridden. | 12 | * Abstract class processing all bookmark attributes through methods designed to be overridden. |
13 | * | 13 | * |
14 | * List of available formatted fields: | ||
15 | * - id ID | ||
16 | * - shorturl Unique identifier, used in permalinks | ||
17 | * - url URL, can be altered in some way, e.g. passing through an HTTP reverse proxy | ||
18 | * - real_url (legacy) same as `url` | ||
19 | * - url_html URL to be displayed in HTML content (it can contain HTML tags) | ||
20 | * - title Title | ||
21 | * - title_html Title to be displayed in HTML content (it can contain HTML tags) | ||
22 | * - description Description content. It most likely contains HTML tags | ||
23 | * - thumbnail Thumbnail: path to local cache file, false if there is none, null if hasn't been retrieved | ||
24 | * - taglist List of tags (array) | ||
25 | * - taglist_urlencoded List of tags (array) URL encoded: it must be used to create a link to a URL containing a tag | ||
26 | * - taglist_html List of tags (array) to be displayed in HTML content (it can contain HTML tags) | ||
27 | * - tags Tags separated by a single whitespace | ||
28 | * - tags_urlencoded Tags separated by a single whitespace, URL encoded: must be used to create a link | ||
29 | * - sticky Is sticky (bool) | ||
30 | * - private Is private (bool) | ||
31 | * - class Additional CSS class | ||
32 | * - created Creation DateTime | ||
33 | * - updated Last edit DateTime | ||
34 | * - timestamp Creation timestamp | ||
35 | * - updated_timestamp Last edit timestamp | ||
36 | * | ||
14 | * @package Shaarli\Formatter | 37 | * @package Shaarli\Formatter |
15 | */ | 38 | */ |
16 | abstract class BookmarkFormatter | 39 | abstract class BookmarkFormatter |
@@ -55,13 +78,16 @@ abstract class BookmarkFormatter | |||
55 | $out['shorturl'] = $this->formatShortUrl($bookmark); | 78 | $out['shorturl'] = $this->formatShortUrl($bookmark); |
56 | $out['url'] = $this->formatUrl($bookmark); | 79 | $out['url'] = $this->formatUrl($bookmark); |
57 | $out['real_url'] = $this->formatRealUrl($bookmark); | 80 | $out['real_url'] = $this->formatRealUrl($bookmark); |
81 | $out['url_html'] = $this->formatUrlHtml($bookmark); | ||
58 | $out['title'] = $this->formatTitle($bookmark); | 82 | $out['title'] = $this->formatTitle($bookmark); |
83 | $out['title_html'] = $this->formatTitleHtml($bookmark); | ||
59 | $out['description'] = $this->formatDescription($bookmark); | 84 | $out['description'] = $this->formatDescription($bookmark); |
60 | $out['thumbnail'] = $this->formatThumbnail($bookmark); | 85 | $out['thumbnail'] = $this->formatThumbnail($bookmark); |
61 | $out['urlencoded_taglist'] = $this->formatUrlEncodedTagList($bookmark); | ||
62 | $out['taglist'] = $this->formatTagList($bookmark); | 86 | $out['taglist'] = $this->formatTagList($bookmark); |
63 | $out['urlencoded_tags'] = $this->formatUrlEncodedTagString($bookmark); | 87 | $out['taglist_urlencoded'] = $this->formatTagListUrlEncoded($bookmark); |
88 | $out['taglist_html'] = $this->formatTagListHtml($bookmark); | ||
64 | $out['tags'] = $this->formatTagString($bookmark); | 89 | $out['tags'] = $this->formatTagString($bookmark); |
90 | $out['tags_urlencoded'] = $this->formatTagStringUrlEncoded($bookmark); | ||
65 | $out['sticky'] = $bookmark->isSticky(); | 91 | $out['sticky'] = $bookmark->isSticky(); |
66 | $out['private'] = $bookmark->isPrivate(); | 92 | $out['private'] = $bookmark->isPrivate(); |
67 | $out['class'] = $this->formatClass($bookmark); | 93 | $out['class'] = $this->formatClass($bookmark); |
@@ -69,6 +95,7 @@ abstract class BookmarkFormatter | |||
69 | $out['updated'] = $this->formatUpdated($bookmark); | 95 | $out['updated'] = $this->formatUpdated($bookmark); |
70 | $out['timestamp'] = $this->formatCreatedTimestamp($bookmark); | 96 | $out['timestamp'] = $this->formatCreatedTimestamp($bookmark); |
71 | $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark); | 97 | $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark); |
98 | |||
72 | return $out; | 99 | return $out; |
73 | } | 100 | } |
74 | 101 | ||
@@ -136,6 +163,18 @@ abstract class BookmarkFormatter | |||
136 | } | 163 | } |
137 | 164 | ||
138 | /** | 165 | /** |
166 | * Format Url Html: to be displayed in HTML content, it can contains HTML tags. | ||
167 | * | ||
168 | * @param Bookmark $bookmark instance | ||
169 | * | ||
170 | * @return string formatted Url HTML | ||
171 | */ | ||
172 | protected function formatUrlHtml($bookmark) | ||
173 | { | ||
174 | return $this->formatUrl($bookmark); | ||
175 | } | ||
176 | |||
177 | /** | ||
139 | * Format Title | 178 | * Format Title |
140 | * | 179 | * |
141 | * @param Bookmark $bookmark instance | 180 | * @param Bookmark $bookmark instance |
@@ -148,6 +187,18 @@ abstract class BookmarkFormatter | |||
148 | } | 187 | } |
149 | 188 | ||
150 | /** | 189 | /** |
190 | * Format Title HTML: to be displayed in HTML content, it can contains HTML tags. | ||
191 | * | ||
192 | * @param Bookmark $bookmark instance | ||
193 | * | ||
194 | * @return string formatted Title | ||
195 | */ | ||
196 | protected function formatTitleHtml($bookmark) | ||
197 | { | ||
198 | return $bookmark->getTitle(); | ||
199 | } | ||
200 | |||
201 | /** | ||
151 | * Format Description | 202 | * Format Description |
152 | * | 203 | * |
153 | * @param Bookmark $bookmark instance | 204 | * @param Bookmark $bookmark instance |
@@ -190,12 +241,24 @@ abstract class BookmarkFormatter | |||
190 | * | 241 | * |
191 | * @return array formatted Tags | 242 | * @return array formatted Tags |
192 | */ | 243 | */ |
193 | protected function formatUrlEncodedTagList($bookmark) | 244 | protected function formatTagListUrlEncoded($bookmark) |
194 | { | 245 | { |
195 | return array_map('urlencode', $this->filterTagList($bookmark->getTags())); | 246 | return array_map('urlencode', $this->filterTagList($bookmark->getTags())); |
196 | } | 247 | } |
197 | 248 | ||
198 | /** | 249 | /** |
250 | * Format Tags HTML: to be displayed in HTML content, it can contains HTML tags. | ||
251 | * | ||
252 | * @param Bookmark $bookmark instance | ||
253 | * | ||
254 | * @return array formatted Tags | ||
255 | */ | ||
256 | protected function formatTagListHtml($bookmark) | ||
257 | { | ||
258 | return $this->formatTagList($bookmark); | ||
259 | } | ||
260 | |||
261 | /** | ||
199 | * Format TagString | 262 | * Format TagString |
200 | * | 263 | * |
201 | * @param Bookmark $bookmark instance | 264 | * @param Bookmark $bookmark instance |
@@ -214,9 +277,9 @@ abstract class BookmarkFormatter | |||
214 | * | 277 | * |
215 | * @return string formatted TagString | 278 | * @return string formatted TagString |
216 | */ | 279 | */ |
217 | protected function formatUrlEncodedTagString($bookmark) | 280 | protected function formatTagStringUrlEncoded($bookmark) |
218 | { | 281 | { |
219 | return implode(' ', $this->formatUrlEncodedTagList($bookmark)); | 282 | return implode(' ', $this->formatTagListUrlEncoded($bookmark)); |
220 | } | 283 | } |
221 | 284 | ||
222 | /** | 285 | /** |
@@ -237,7 +300,7 @@ abstract class BookmarkFormatter | |||
237 | * | 300 | * |
238 | * @param Bookmark $bookmark instance | 301 | * @param Bookmark $bookmark instance |
239 | * | 302 | * |
240 | * @return DateTime instance | 303 | * @return DateTimeInterface instance |
241 | */ | 304 | */ |
242 | protected function formatCreated(Bookmark $bookmark) | 305 | protected function formatCreated(Bookmark $bookmark) |
243 | { | 306 | { |
@@ -249,7 +312,7 @@ abstract class BookmarkFormatter | |||
249 | * | 312 | * |
250 | * @param Bookmark $bookmark instance | 313 | * @param Bookmark $bookmark instance |
251 | * | 314 | * |
252 | * @return DateTime instance | 315 | * @return DateTimeInterface instance |
253 | */ | 316 | */ |
254 | protected function formatUpdated(Bookmark $bookmark) | 317 | protected function formatUpdated(Bookmark $bookmark) |
255 | { | 318 | { |
diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php index 5d244d4c..f7714be9 100644 --- a/application/formatter/BookmarkMarkdownFormatter.php +++ b/application/formatter/BookmarkMarkdownFormatter.php | |||
@@ -56,7 +56,10 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter | |||
56 | return parent::formatDescription($bookmark); | 56 | return parent::formatDescription($bookmark); |
57 | } | 57 | } |
58 | 58 | ||
59 | $processedDescription = $bookmark->getDescription(); | 59 | $processedDescription = $this->tokenizeSearchHighlightField( |
60 | $bookmark->getDescription() ?? '', | ||
61 | $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? [] | ||
62 | ); | ||
60 | $processedDescription = $this->filterProtocols($processedDescription); | 63 | $processedDescription = $this->filterProtocols($processedDescription); |
61 | $processedDescription = $this->formatHashTags($processedDescription); | 64 | $processedDescription = $this->formatHashTags($processedDescription); |
62 | $processedDescription = $this->reverseEscapedHtml($processedDescription); | 65 | $processedDescription = $this->reverseEscapedHtml($processedDescription); |
@@ -65,6 +68,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter | |||
65 | ->setBreaksEnabled(true) | 68 | ->setBreaksEnabled(true) |
66 | ->text($processedDescription); | 69 | ->text($processedDescription); |
67 | $processedDescription = $this->sanitizeHtml($processedDescription); | 70 | $processedDescription = $this->sanitizeHtml($processedDescription); |
71 | $processedDescription = $this->replaceTokens($processedDescription); | ||
68 | 72 | ||
69 | if (!empty($processedDescription)) { | 73 | if (!empty($processedDescription)) { |
70 | $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; | 74 | $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; |