aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/formatter
diff options
context:
space:
mode:
authorArthurHoaro <arthur@hoa.ro>2020-10-12 11:35:55 +0200
committerArthurHoaro <arthur@hoa.ro>2020-10-16 20:31:12 +0200
commit4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b (patch)
tree4deb157f03ce7d5402dbfeb65743951d97e527cf /application/formatter
parent64cac2562661c55f679dba5a7c308e7764f430b5 (diff)
downloadShaarli-4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b.tar.gz
Shaarli-4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b.tar.zst
Shaarli-4e3875c0ce7f3b17e3d358dc5ecb1f8bed64546b.zip
Feature: highlight fulltext search results
How it works: 1. when a fulltext search is made, Shaarli looks for the first occurence position of every term matching the search. No change here, but we store these positions in an array, in Bookmark's additionalContent. 2. when formatting bookmarks (through BookmarkFormatter implementation): 1. first we insert specific tokens at every search result positions 2. we format the content (escape HTML, apply markdown, etc.) 3. as a last step, we replace our token with displayable span elements Cons: this tightens coupling between search filters and formatters Pros: it was absolutely necessary not to perform the search twice. this solution has close to no impact on performances. Fixes #205
Diffstat (limited to 'application/formatter')
-rw-r--r--application/formatter/BookmarkDefaultFormatter.php132
-rw-r--r--application/formatter/BookmarkFormatter.php79
-rw-r--r--application/formatter/BookmarkMarkdownFormatter.php6
3 files changed, 203 insertions, 14 deletions
diff --git a/application/formatter/BookmarkDefaultFormatter.php b/application/formatter/BookmarkDefaultFormatter.php
index 9d4a0fa0..d58a5e39 100644
--- a/application/formatter/BookmarkDefaultFormatter.php
+++ b/application/formatter/BookmarkDefaultFormatter.php
@@ -12,10 +12,13 @@ namespace Shaarli\Formatter;
12 */ 12 */
13class BookmarkDefaultFormatter extends BookmarkFormatter 13class BookmarkDefaultFormatter extends BookmarkFormatter
14{ 14{
15 const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT';
16 const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|';
17
15 /** 18 /**
16 * @inheritdoc 19 * @inheritdoc
17 */ 20 */
18 public function formatTitle($bookmark) 21 protected function formatTitle($bookmark)
19 { 22 {
20 return escape($bookmark->getTitle()); 23 return escape($bookmark->getTitle());
21 } 24 }
@@ -23,10 +26,28 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
23 /** 26 /**
24 * @inheritdoc 27 * @inheritdoc
25 */ 28 */
26 public function formatDescription($bookmark) 29 protected function formatTitleHtml($bookmark)
30 {
31 $title = $this->tokenizeSearchHighlightField(
32 $bookmark->getTitle() ?? '',
33 $bookmark->getAdditionalContentEntry('search_highlight')['title'] ?? []
34 );
35
36 return $this->replaceTokens(escape($title));
37 }
38
39 /**
40 * @inheritdoc
41 */
42 protected function formatDescription($bookmark)
27 { 43 {
28 $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : ''; 44 $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
29 return format_description(escape($bookmark->getDescription()), $indexUrl); 45 $description = $this->tokenizeSearchHighlightField(
46 $bookmark->getDescription() ?? '',
47 $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? []
48 );
49
50 return $this->replaceTokens(format_description(escape($description), $indexUrl));
30 } 51 }
31 52
32 /** 53 /**
@@ -40,7 +61,27 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
40 /** 61 /**
41 * @inheritdoc 62 * @inheritdoc
42 */ 63 */
43 public function formatTagString($bookmark) 64 protected function formatTagListHtml($bookmark)
65 {
66 if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) {
67 return $this->formatTagList($bookmark);
68 }
69
70 $tags = $this->tokenizeSearchHighlightField(
71 $bookmark->getTagsString(),
72 $bookmark->getAdditionalContentEntry('search_highlight')['tags']
73 );
74 $tags = $this->filterTagList(explode(' ', $tags));
75 $tags = escape($tags);
76 $tags = $this->replaceTokensArray($tags);
77
78 return $tags;
79 }
80
81 /**
82 * @inheritdoc
83 */
84 protected function formatTagString($bookmark)
44 { 85 {
45 return implode(' ', $this->formatTagList($bookmark)); 86 return implode(' ', $this->formatTagList($bookmark));
46 } 87 }
@@ -48,7 +89,7 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
48 /** 89 /**
49 * @inheritdoc 90 * @inheritdoc
50 */ 91 */
51 public function formatUrl($bookmark) 92 protected function formatUrl($bookmark)
52 { 93 {
53 if ($bookmark->isNote() && isset($this->contextData['index_url'])) { 94 if ($bookmark->isNote() && isset($this->contextData['index_url'])) {
54 return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/')); 95 return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/'));
@@ -80,8 +121,89 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
80 /** 121 /**
81 * @inheritdoc 122 * @inheritdoc
82 */ 123 */
124 protected function formatUrlHtml($bookmark)
125 {
126 $url = $this->tokenizeSearchHighlightField(
127 $bookmark->getUrl() ?? '',
128 $bookmark->getAdditionalContentEntry('search_highlight')['url'] ?? []
129 );
130
131 return $this->replaceTokens(escape($url));
132 }
133
134 /**
135 * @inheritdoc
136 */
83 protected function formatThumbnail($bookmark) 137 protected function formatThumbnail($bookmark)
84 { 138 {
85 return escape($bookmark->getThumbnail()); 139 return escape($bookmark->getThumbnail());
86 } 140 }
141
142 /**
143 * Insert search highlight token in provided field content based on a list of search result positions
144 *
145 * @param string $fieldContent
146 * @param array|null $positions List of of search results with 'start' and 'end' positions.
147 *
148 * @return string Updated $fieldContent.
149 */
150 protected function tokenizeSearchHighlightField(string $fieldContent, ?array $positions): string
151 {
152 if (empty($positions)) {
153 return $fieldContent;
154 }
155
156 $insertedTokens = 0;
157 $tokenLength = strlen(static::SEARCH_HIGHLIGHT_OPEN);
158 foreach ($positions as $position) {
159 $position = [
160 'start' => $position['start'] + ($insertedTokens * $tokenLength),
161 'end' => $position['end'] + ($insertedTokens * $tokenLength),
162 ];
163
164 $content = mb_substr($fieldContent, 0, $position['start']);
165 $content .= static::SEARCH_HIGHLIGHT_OPEN;
166 $content .= mb_substr($fieldContent, $position['start'], $position['end'] - $position['start']);
167 $content .= static::SEARCH_HIGHLIGHT_CLOSE;
168 $content .= mb_substr($fieldContent, $position['end']);
169
170 $fieldContent = $content;
171
172 $insertedTokens += 2;
173 }
174
175 return $fieldContent;
176 }
177
178 /**
179 * Replace search highlight tokens with HTML highlighted span.
180 *
181 * @param string $fieldContent
182 *
183 * @return string updated content.
184 */
185 protected function replaceTokens(string $fieldContent): string
186 {
187 return str_replace(
188 [static::SEARCH_HIGHLIGHT_OPEN, static::SEARCH_HIGHLIGHT_CLOSE],
189 ['<span class="search-highlight">', '</span>'],
190 $fieldContent
191 );
192 }
193
194 /**
195 * Apply replaceTokens to an array of content strings.
196 *
197 * @param string[] $fieldContents
198 *
199 * @return array
200 */
201 protected function replaceTokensArray(array $fieldContents): array
202 {
203 foreach ($fieldContents as &$entry) {
204 $entry = $this->replaceTokens($entry);
205 }
206
207 return $fieldContents;
208 }
87} 209}
diff --git a/application/formatter/BookmarkFormatter.php b/application/formatter/BookmarkFormatter.php
index 0042dafe..e1b7f705 100644
--- a/application/formatter/BookmarkFormatter.php
+++ b/application/formatter/BookmarkFormatter.php
@@ -2,7 +2,7 @@
2 2
3namespace Shaarli\Formatter; 3namespace Shaarli\Formatter;
4 4
5use DateTime; 5use DateTimeInterface;
6use Shaarli\Bookmark\Bookmark; 6use Shaarli\Bookmark\Bookmark;
7use Shaarli\Config\ConfigManager; 7use Shaarli\Config\ConfigManager;
8 8
@@ -11,6 +11,29 @@ use Shaarli\Config\ConfigManager;
11 * 11 *
12 * Abstract class processing all bookmark attributes through methods designed to be overridden. 12 * Abstract class processing all bookmark attributes through methods designed to be overridden.
13 * 13 *
14 * List of available formatted fields:
15 * - id ID
16 * - shorturl Unique identifier, used in permalinks
17 * - url URL, can be altered in some way, e.g. passing through an HTTP reverse proxy
18 * - real_url (legacy) same as `url`
19 * - url_html URL to be displayed in HTML content (it can contain HTML tags)
20 * - title Title
21 * - title_html Title to be displayed in HTML content (it can contain HTML tags)
22 * - description Description content. It most likely contains HTML tags
23 * - thumbnail Thumbnail: path to local cache file, false if there is none, null if hasn't been retrieved
24 * - taglist List of tags (array)
25 * - taglist_urlencoded List of tags (array) URL encoded: it must be used to create a link to a URL containing a tag
26 * - taglist_html List of tags (array) to be displayed in HTML content (it can contain HTML tags)
27 * - tags Tags separated by a single whitespace
28 * - tags_urlencoded Tags separated by a single whitespace, URL encoded: must be used to create a link
29 * - sticky Is sticky (bool)
30 * - private Is private (bool)
31 * - class Additional CSS class
32 * - created Creation DateTime
33 * - updated Last edit DateTime
34 * - timestamp Creation timestamp
35 * - updated_timestamp Last edit timestamp
36 *
14 * @package Shaarli\Formatter 37 * @package Shaarli\Formatter
15 */ 38 */
16abstract class BookmarkFormatter 39abstract class BookmarkFormatter
@@ -55,13 +78,16 @@ abstract class BookmarkFormatter
55 $out['shorturl'] = $this->formatShortUrl($bookmark); 78 $out['shorturl'] = $this->formatShortUrl($bookmark);
56 $out['url'] = $this->formatUrl($bookmark); 79 $out['url'] = $this->formatUrl($bookmark);
57 $out['real_url'] = $this->formatRealUrl($bookmark); 80 $out['real_url'] = $this->formatRealUrl($bookmark);
81 $out['url_html'] = $this->formatUrlHtml($bookmark);
58 $out['title'] = $this->formatTitle($bookmark); 82 $out['title'] = $this->formatTitle($bookmark);
83 $out['title_html'] = $this->formatTitleHtml($bookmark);
59 $out['description'] = $this->formatDescription($bookmark); 84 $out['description'] = $this->formatDescription($bookmark);
60 $out['thumbnail'] = $this->formatThumbnail($bookmark); 85 $out['thumbnail'] = $this->formatThumbnail($bookmark);
61 $out['urlencoded_taglist'] = $this->formatUrlEncodedTagList($bookmark);
62 $out['taglist'] = $this->formatTagList($bookmark); 86 $out['taglist'] = $this->formatTagList($bookmark);
63 $out['urlencoded_tags'] = $this->formatUrlEncodedTagString($bookmark); 87 $out['taglist_urlencoded'] = $this->formatTagListUrlEncoded($bookmark);
88 $out['taglist_html'] = $this->formatTagListHtml($bookmark);
64 $out['tags'] = $this->formatTagString($bookmark); 89 $out['tags'] = $this->formatTagString($bookmark);
90 $out['tags_urlencoded'] = $this->formatTagStringUrlEncoded($bookmark);
65 $out['sticky'] = $bookmark->isSticky(); 91 $out['sticky'] = $bookmark->isSticky();
66 $out['private'] = $bookmark->isPrivate(); 92 $out['private'] = $bookmark->isPrivate();
67 $out['class'] = $this->formatClass($bookmark); 93 $out['class'] = $this->formatClass($bookmark);
@@ -69,6 +95,7 @@ abstract class BookmarkFormatter
69 $out['updated'] = $this->formatUpdated($bookmark); 95 $out['updated'] = $this->formatUpdated($bookmark);
70 $out['timestamp'] = $this->formatCreatedTimestamp($bookmark); 96 $out['timestamp'] = $this->formatCreatedTimestamp($bookmark);
71 $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark); 97 $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark);
98
72 return $out; 99 return $out;
73 } 100 }
74 101
@@ -136,6 +163,18 @@ abstract class BookmarkFormatter
136 } 163 }
137 164
138 /** 165 /**
166 * Format Url Html: to be displayed in HTML content, it can contains HTML tags.
167 *
168 * @param Bookmark $bookmark instance
169 *
170 * @return string formatted Url HTML
171 */
172 protected function formatUrlHtml($bookmark)
173 {
174 return $this->formatUrl($bookmark);
175 }
176
177 /**
139 * Format Title 178 * Format Title
140 * 179 *
141 * @param Bookmark $bookmark instance 180 * @param Bookmark $bookmark instance
@@ -148,6 +187,18 @@ abstract class BookmarkFormatter
148 } 187 }
149 188
150 /** 189 /**
190 * Format Title HTML: to be displayed in HTML content, it can contains HTML tags.
191 *
192 * @param Bookmark $bookmark instance
193 *
194 * @return string formatted Title
195 */
196 protected function formatTitleHtml($bookmark)
197 {
198 return $bookmark->getTitle();
199 }
200
201 /**
151 * Format Description 202 * Format Description
152 * 203 *
153 * @param Bookmark $bookmark instance 204 * @param Bookmark $bookmark instance
@@ -190,12 +241,24 @@ abstract class BookmarkFormatter
190 * 241 *
191 * @return array formatted Tags 242 * @return array formatted Tags
192 */ 243 */
193 protected function formatUrlEncodedTagList($bookmark) 244 protected function formatTagListUrlEncoded($bookmark)
194 { 245 {
195 return array_map('urlencode', $this->filterTagList($bookmark->getTags())); 246 return array_map('urlencode', $this->filterTagList($bookmark->getTags()));
196 } 247 }
197 248
198 /** 249 /**
250 * Format Tags HTML: to be displayed in HTML content, it can contains HTML tags.
251 *
252 * @param Bookmark $bookmark instance
253 *
254 * @return array formatted Tags
255 */
256 protected function formatTagListHtml($bookmark)
257 {
258 return $this->formatTagList($bookmark);
259 }
260
261 /**
199 * Format TagString 262 * Format TagString
200 * 263 *
201 * @param Bookmark $bookmark instance 264 * @param Bookmark $bookmark instance
@@ -214,9 +277,9 @@ abstract class BookmarkFormatter
214 * 277 *
215 * @return string formatted TagString 278 * @return string formatted TagString
216 */ 279 */
217 protected function formatUrlEncodedTagString($bookmark) 280 protected function formatTagStringUrlEncoded($bookmark)
218 { 281 {
219 return implode(' ', $this->formatUrlEncodedTagList($bookmark)); 282 return implode(' ', $this->formatTagListUrlEncoded($bookmark));
220 } 283 }
221 284
222 /** 285 /**
@@ -237,7 +300,7 @@ abstract class BookmarkFormatter
237 * 300 *
238 * @param Bookmark $bookmark instance 301 * @param Bookmark $bookmark instance
239 * 302 *
240 * @return DateTime instance 303 * @return DateTimeInterface instance
241 */ 304 */
242 protected function formatCreated(Bookmark $bookmark) 305 protected function formatCreated(Bookmark $bookmark)
243 { 306 {
@@ -249,7 +312,7 @@ abstract class BookmarkFormatter
249 * 312 *
250 * @param Bookmark $bookmark instance 313 * @param Bookmark $bookmark instance
251 * 314 *
252 * @return DateTime instance 315 * @return DateTimeInterface instance
253 */ 316 */
254 protected function formatUpdated(Bookmark $bookmark) 317 protected function formatUpdated(Bookmark $bookmark)
255 { 318 {
diff --git a/application/formatter/BookmarkMarkdownFormatter.php b/application/formatter/BookmarkMarkdownFormatter.php
index 5d244d4c..f7714be9 100644
--- a/application/formatter/BookmarkMarkdownFormatter.php
+++ b/application/formatter/BookmarkMarkdownFormatter.php
@@ -56,7 +56,10 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
56 return parent::formatDescription($bookmark); 56 return parent::formatDescription($bookmark);
57 } 57 }
58 58
59 $processedDescription = $bookmark->getDescription(); 59 $processedDescription = $this->tokenizeSearchHighlightField(
60 $bookmark->getDescription() ?? '',
61 $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? []
62 );
60 $processedDescription = $this->filterProtocols($processedDescription); 63 $processedDescription = $this->filterProtocols($processedDescription);
61 $processedDescription = $this->formatHashTags($processedDescription); 64 $processedDescription = $this->formatHashTags($processedDescription);
62 $processedDescription = $this->reverseEscapedHtml($processedDescription); 65 $processedDescription = $this->reverseEscapedHtml($processedDescription);
@@ -65,6 +68,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
65 ->setBreaksEnabled(true) 68 ->setBreaksEnabled(true)
66 ->text($processedDescription); 69 ->text($processedDescription);
67 $processedDescription = $this->sanitizeHtml($processedDescription); 70 $processedDescription = $this->sanitizeHtml($processedDescription);
71 $processedDescription = $this->replaceTokens($processedDescription);
68 72
69 if (!empty($processedDescription)) { 73 if (!empty($processedDescription)) {
70 $processedDescription = '<div class="markdown">'. $processedDescription . '</div>'; 74 $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';