aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/bookmark/BookmarkFilter.php
diff options
context:
space:
mode:
Diffstat (limited to 'application/bookmark/BookmarkFilter.php')
-rw-r--r--application/bookmark/BookmarkFilter.php111
1 files changed, 93 insertions, 18 deletions
diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php
index 4232f114..c79386ea 100644
--- a/application/bookmark/BookmarkFilter.php
+++ b/application/bookmark/BookmarkFilter.php
@@ -201,7 +201,7 @@ class BookmarkFilter
201 return $this->noFilter($visibility); 201 return $this->noFilter($visibility);
202 } 202 }
203 203
204 $filtered = array(); 204 $filtered = [];
205 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); 205 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
206 $exactRegex = '/"([^"]+)"/'; 206 $exactRegex = '/"([^"]+)"/';
207 // Retrieve exact search terms. 207 // Retrieve exact search terms.
@@ -213,8 +213,8 @@ class BookmarkFilter
213 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); 213 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
214 214
215 // Filter excluding terms and update andSearch. 215 // Filter excluding terms and update andSearch.
216 $excludeSearch = array(); 216 $excludeSearch = [];
217 $andSearch = array(); 217 $andSearch = [];
218 foreach ($explodedSearchAnd as $needle) { 218 foreach ($explodedSearchAnd as $needle) {
219 if ($needle[0] == '-' && strlen($needle) > 1) { 219 if ($needle[0] == '-' && strlen($needle) > 1) {
220 $excludeSearch[] = substr($needle, 1); 220 $excludeSearch[] = substr($needle, 1);
@@ -234,33 +234,38 @@ class BookmarkFilter
234 } 234 }
235 } 235 }
236 236
237 // Concatenate link fields to search across fields. 237 $lengths = [];
238 // Adds a '\' separator for exact search terms. 238 $content = $this->buildFullTextSearchableLink($link, $lengths);
239 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\';
240 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\';
241 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\';
242 $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\';
243 239
244 // Be optimistic 240 // Be optimistic
245 $found = true; 241 $found = true;
242 $foundPositions = [];
246 243
247 // First, we look for exact term search 244 // First, we look for exact term search
248 for ($i = 0; $i < count($exactSearch) && $found; $i++) { 245 // Then iterate over keywords, if keyword is not found,
249 $found = strpos($content, $exactSearch[$i]) !== false;
250 }
251
252 // Iterate over keywords, if keyword is not found,
253 // no need to check for the others. We want all or nothing. 246 // no need to check for the others. We want all or nothing.
254 for ($i = 0; $i < count($andSearch) && $found; $i++) { 247 foreach ([$exactSearch, $andSearch] as $search) {
255 $found = strpos($content, $andSearch[$i]) !== false; 248 for ($i = 0; $i < count($search) && $found !== false; $i++) {
249 $found = mb_strpos($content, $search[$i]);
250 if ($found === false) {
251 break;
252 }
253
254 $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])];
255 }
256 } 256 }
257 257
258 // Exclude terms. 258 // Exclude terms.
259 for ($i = 0; $i < count($excludeSearch) && $found; $i++) { 259 for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) {
260 $found = strpos($content, $excludeSearch[$i]) === false; 260 $found = strpos($content, $excludeSearch[$i]) === false;
261 } 261 }
262 262
263 if ($found) { 263 if ($found !== false) {
264 $link->addAdditionalContentEntry(
265 'search_highlight',
266 $this->postProcessFoundPositions($lengths, $foundPositions)
267 );
268
264 $filtered[$id] = $link; 269 $filtered[$id] = $link;
265 } 270 }
266 } 271 }
@@ -477,4 +482,74 @@ class BookmarkFilter
477 482
478 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); 483 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
479 } 484 }
485
486 /**
487 * This method finalize the content of the foundPositions array,
488 * by associated all search results to their associated bookmark field,
489 * making sure that there is no overlapping results, etc.
490 *
491 * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content.
492 * @param array $foundPositions Positions where the search results were found in the aggregated content.
493 *
494 * @return array Updated $foundPositions, by bookmark field.
495 */
496 protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
497 {
498 // Sort results by starting position ASC.
499 usort($foundPositions, function (array $entryA, array $entryB): int {
500 return $entryA['start'] > $entryB['start'] ? 1 : -1;
501 });
502
503 $out = [];
504 $currentMax = -1;
505 foreach ($foundPositions as $foundPosition) {
506 // we do not allow overlapping highlights
507 if ($foundPosition['start'] < $currentMax) {
508 continue;
509 }
510
511 $currentMax = $foundPosition['end'];
512 foreach ($fieldLengths as $part => $length) {
513 if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) {
514 continue;
515 }
516
517 $out[$part][] = [
518 'start' => $foundPosition['start'] - $length['start'],
519 'end' => $foundPosition['end'] - $length['start'],
520 ];
521 break;
522 }
523 }
524
525 return $out;
526 }
527
528 /**
529 * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms.
530 * Also populate $length array with starting and ending positions of every bookmark field
531 * inside concatenated content.
532 *
533 * @param Bookmark $link
534 * @param array $lengths (by reference)
535 *
536 * @return string Lowercase concatenated fields content.
537 */
538 protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string
539 {
540 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\';
541 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\';
542 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\';
543 $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\';
544
545 $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
546 $nextField = $lengths['title']['end'] + 1;
547 $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())];
548 $nextField = $lengths['description']['end'] + 1;
549 $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())];
550 $nextField = $lengths['url']['end'] + 1;
551 $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())];
552
553 return $content;
554 }
480} 555}