diff options
Diffstat (limited to 'application/bookmark')
-rw-r--r-- | application/bookmark/Bookmark.php | 46 | ||||
-rw-r--r-- | application/bookmark/BookmarkFilter.php | 111 |
2 files changed, 139 insertions, 18 deletions
diff --git a/application/bookmark/Bookmark.php b/application/bookmark/Bookmark.php index fa45d2fc..ea565d1f 100644 --- a/application/bookmark/Bookmark.php +++ b/application/bookmark/Bookmark.php | |||
@@ -54,6 +54,9 @@ class Bookmark | |||
54 | /** @var bool True if the bookmark can only be seen while logged in */ | 54 | /** @var bool True if the bookmark can only be seen while logged in */ |
55 | protected $private; | 55 | protected $private; |
56 | 56 | ||
57 | /** @var mixed[] Available to store any additional content for a bookmark. Currently used for search highlight. */ | ||
58 | protected $additionalContent = []; | ||
59 | |||
57 | /** | 60 | /** |
58 | * Initialize a link from array data. Especially useful to create a Bookmark from former link storage format. | 61 | * Initialize a link from array data. Especially useful to create a Bookmark from former link storage format. |
59 | * | 62 | * |
@@ -95,6 +98,8 @@ class Bookmark | |||
95 | * - the URL with the permalink | 98 | * - the URL with the permalink |
96 | * - the title with the URL | 99 | * - the title with the URL |
97 | * | 100 | * |
101 | * Also make sure that we do not save search highlights in the datastore. | ||
102 | * | ||
98 | * @throws InvalidBookmarkException | 103 | * @throws InvalidBookmarkException |
99 | */ | 104 | */ |
100 | public function validate(): void | 105 | public function validate(): void |
@@ -112,6 +117,9 @@ class Bookmark | |||
112 | if (empty($this->title)) { | 117 | if (empty($this->title)) { |
113 | $this->title = $this->url; | 118 | $this->title = $this->url; |
114 | } | 119 | } |
120 | if (array_key_exists('search_highlight', $this->additionalContent)) { | ||
121 | unset($this->additionalContent['search_highlight']); | ||
122 | } | ||
115 | } | 123 | } |
116 | 124 | ||
117 | /** | 125 | /** |
@@ -436,6 +444,44 @@ class Bookmark | |||
436 | } | 444 | } |
437 | 445 | ||
438 | /** | 446 | /** |
447 | * Get entire additionalContent array. | ||
448 | * | ||
449 | * @return mixed[] | ||
450 | */ | ||
451 | public function getAdditionalContent(): array | ||
452 | { | ||
453 | return $this->additionalContent; | ||
454 | } | ||
455 | |||
456 | /** | ||
457 | * Set a single entry in additionalContent, by key. | ||
458 | * | ||
459 | * @param string $key | ||
460 | * @param mixed|null $value Any type of value can be set. | ||
461 | * | ||
462 | * @return $this | ||
463 | */ | ||
464 | public function addAdditionalContentEntry(string $key, $value): self | ||
465 | { | ||
466 | $this->additionalContent[$key] = $value; | ||
467 | |||
468 | return $this; | ||
469 | } | ||
470 | |||
471 | /** | ||
472 | * Get a single entry in additionalContent, by key. | ||
473 | * | ||
474 | * @param string $key | ||
475 | * @param mixed|null $default | ||
476 | * | ||
477 | * @return mixed|null can be any type or even null. | ||
478 | */ | ||
479 | public function getAdditionalContentEntry(string $key, $default = null) | ||
480 | { | ||
481 | return array_key_exists($key, $this->additionalContent) ? $this->additionalContent[$key] : $default; | ||
482 | } | ||
483 | |||
484 | /** | ||
439 | * Rename a tag in tags list. | 485 | * Rename a tag in tags list. |
440 | * | 486 | * |
441 | * @param string $fromTag | 487 | * @param string $fromTag |
diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php index 4232f114..c79386ea 100644 --- a/application/bookmark/BookmarkFilter.php +++ b/application/bookmark/BookmarkFilter.php | |||
@@ -201,7 +201,7 @@ class BookmarkFilter | |||
201 | return $this->noFilter($visibility); | 201 | return $this->noFilter($visibility); |
202 | } | 202 | } |
203 | 203 | ||
204 | $filtered = array(); | 204 | $filtered = []; |
205 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | 205 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
206 | $exactRegex = '/"([^"]+)"/'; | 206 | $exactRegex = '/"([^"]+)"/'; |
207 | // Retrieve exact search terms. | 207 | // Retrieve exact search terms. |
@@ -213,8 +213,8 @@ class BookmarkFilter | |||
213 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | 213 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); |
214 | 214 | ||
215 | // Filter excluding terms and update andSearch. | 215 | // Filter excluding terms and update andSearch. |
216 | $excludeSearch = array(); | 216 | $excludeSearch = []; |
217 | $andSearch = array(); | 217 | $andSearch = []; |
218 | foreach ($explodedSearchAnd as $needle) { | 218 | foreach ($explodedSearchAnd as $needle) { |
219 | if ($needle[0] == '-' && strlen($needle) > 1) { | 219 | if ($needle[0] == '-' && strlen($needle) > 1) { |
220 | $excludeSearch[] = substr($needle, 1); | 220 | $excludeSearch[] = substr($needle, 1); |
@@ -234,33 +234,38 @@ class BookmarkFilter | |||
234 | } | 234 | } |
235 | } | 235 | } |
236 | 236 | ||
237 | // Concatenate link fields to search across fields. | 237 | $lengths = []; |
238 | // Adds a '\' separator for exact search terms. | 238 | $content = $this->buildFullTextSearchableLink($link, $lengths); |
239 | $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
240 | $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
241 | $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
242 | $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
243 | 239 | ||
244 | // Be optimistic | 240 | // Be optimistic |
245 | $found = true; | 241 | $found = true; |
242 | $foundPositions = []; | ||
246 | 243 | ||
247 | // First, we look for exact term search | 244 | // First, we look for exact term search |
248 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | 245 | // Then iterate over keywords, if keyword is not found, |
249 | $found = strpos($content, $exactSearch[$i]) !== false; | ||
250 | } | ||
251 | |||
252 | // Iterate over keywords, if keyword is not found, | ||
253 | // no need to check for the others. We want all or nothing. | 246 | // no need to check for the others. We want all or nothing. |
254 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | 247 | foreach ([$exactSearch, $andSearch] as $search) { |
255 | $found = strpos($content, $andSearch[$i]) !== false; | 248 | for ($i = 0; $i < count($search) && $found !== false; $i++) { |
249 | $found = mb_strpos($content, $search[$i]); | ||
250 | if ($found === false) { | ||
251 | break; | ||
252 | } | ||
253 | |||
254 | $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])]; | ||
255 | } | ||
256 | } | 256 | } |
257 | 257 | ||
258 | // Exclude terms. | 258 | // Exclude terms. |
259 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | 259 | for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) { |
260 | $found = strpos($content, $excludeSearch[$i]) === false; | 260 | $found = strpos($content, $excludeSearch[$i]) === false; |
261 | } | 261 | } |
262 | 262 | ||
263 | if ($found) { | 263 | if ($found !== false) { |
264 | $link->addAdditionalContentEntry( | ||
265 | 'search_highlight', | ||
266 | $this->postProcessFoundPositions($lengths, $foundPositions) | ||
267 | ); | ||
268 | |||
264 | $filtered[$id] = $link; | 269 | $filtered[$id] = $link; |
265 | } | 270 | } |
266 | } | 271 | } |
@@ -477,4 +482,74 @@ class BookmarkFilter | |||
477 | 482 | ||
478 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); | 483 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); |
479 | } | 484 | } |
485 | |||
486 | /** | ||
487 | * This method finalize the content of the foundPositions array, | ||
488 | * by associated all search results to their associated bookmark field, | ||
489 | * making sure that there is no overlapping results, etc. | ||
490 | * | ||
491 | * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content. | ||
492 | * @param array $foundPositions Positions where the search results were found in the aggregated content. | ||
493 | * | ||
494 | * @return array Updated $foundPositions, by bookmark field. | ||
495 | */ | ||
496 | protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array | ||
497 | { | ||
498 | // Sort results by starting position ASC. | ||
499 | usort($foundPositions, function (array $entryA, array $entryB): int { | ||
500 | return $entryA['start'] > $entryB['start'] ? 1 : -1; | ||
501 | }); | ||
502 | |||
503 | $out = []; | ||
504 | $currentMax = -1; | ||
505 | foreach ($foundPositions as $foundPosition) { | ||
506 | // we do not allow overlapping highlights | ||
507 | if ($foundPosition['start'] < $currentMax) { | ||
508 | continue; | ||
509 | } | ||
510 | |||
511 | $currentMax = $foundPosition['end']; | ||
512 | foreach ($fieldLengths as $part => $length) { | ||
513 | if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) { | ||
514 | continue; | ||
515 | } | ||
516 | |||
517 | $out[$part][] = [ | ||
518 | 'start' => $foundPosition['start'] - $length['start'], | ||
519 | 'end' => $foundPosition['end'] - $length['start'], | ||
520 | ]; | ||
521 | break; | ||
522 | } | ||
523 | } | ||
524 | |||
525 | return $out; | ||
526 | } | ||
527 | |||
528 | /** | ||
529 | * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms. | ||
530 | * Also populate $length array with starting and ending positions of every bookmark field | ||
531 | * inside concatenated content. | ||
532 | * | ||
533 | * @param Bookmark $link | ||
534 | * @param array $lengths (by reference) | ||
535 | * | ||
536 | * @return string Lowercase concatenated fields content. | ||
537 | */ | ||
538 | protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string | ||
539 | { | ||
540 | $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
541 | $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
542 | $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
543 | $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; | ||
544 | |||
545 | $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())]; | ||
546 | $nextField = $lengths['title']['end'] + 1; | ||
547 | $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())]; | ||
548 | $nextField = $lengths['description']['end'] + 1; | ||
549 | $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())]; | ||
550 | $nextField = $lengths['url']['end'] + 1; | ||
551 | $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())]; | ||
552 | |||
553 | return $content; | ||
554 | } | ||
480 | } | 555 | } |