aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/bookmark/BookmarkFilter.php
diff options
context:
space:
mode:
Diffstat (limited to 'application/bookmark/BookmarkFilter.php')
-rw-r--r--application/bookmark/BookmarkFilter.php203
1 files changed, 150 insertions, 53 deletions
diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php
index 6636bbfe..db83c51c 100644
--- a/application/bookmark/BookmarkFilter.php
+++ b/application/bookmark/BookmarkFilter.php
@@ -1,9 +1,12 @@
1<?php 1<?php
2 2
3declare(strict_types=1);
4
3namespace Shaarli\Bookmark; 5namespace Shaarli\Bookmark;
4 6
5use Exception; 7use Exception;
6use Shaarli\Bookmark\Exception\BookmarkNotFoundException; 8use Shaarli\Bookmark\Exception\BookmarkNotFoundException;
9use Shaarli\Config\ConfigManager;
7 10
8/** 11/**
9 * Class LinkFilter. 12 * Class LinkFilter.
@@ -56,12 +59,16 @@ class BookmarkFilter
56 */ 59 */
57 private $bookmarks; 60 private $bookmarks;
58 61
62 /** @var ConfigManager */
63 protected $conf;
64
59 /** 65 /**
60 * @param Bookmark[] $bookmarks initialization. 66 * @param Bookmark[] $bookmarks initialization.
61 */ 67 */
62 public function __construct($bookmarks) 68 public function __construct($bookmarks, ConfigManager $conf)
63 { 69 {
64 $this->bookmarks = $bookmarks; 70 $this->bookmarks = $bookmarks;
71 $this->conf = $conf;
65 } 72 }
66 73
67 /** 74 /**
@@ -77,8 +84,13 @@ class BookmarkFilter
77 * 84 *
78 * @throws BookmarkNotFoundException 85 * @throws BookmarkNotFoundException
79 */ 86 */
80 public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) 87 public function filter(
81 { 88 string $type,
89 $request,
90 bool $casesensitive = false,
91 string $visibility = 'all',
92 bool $untaggedonly = false
93 ) {
82 if (!in_array($visibility, ['all', 'public', 'private'])) { 94 if (!in_array($visibility, ['all', 'public', 'private'])) {
83 $visibility = 'all'; 95 $visibility = 'all';
84 } 96 }
@@ -100,10 +112,14 @@ class BookmarkFilter
100 $filtered = $this->bookmarks; 112 $filtered = $this->bookmarks;
101 } 113 }
102 if (!empty($request[0])) { 114 if (!empty($request[0])) {
103 $filtered = (new BookmarkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); 115 $filtered = (new BookmarkFilter($filtered, $this->conf))
116 ->filterTags($request[0], $casesensitive, $visibility)
117 ;
104 } 118 }
105 if (!empty($request[1])) { 119 if (!empty($request[1])) {
106 $filtered = (new BookmarkFilter($filtered))->filterFulltext($request[1], $visibility); 120 $filtered = (new BookmarkFilter($filtered, $this->conf))
121 ->filterFulltext($request[1], $visibility)
122 ;
107 } 123 }
108 return $filtered; 124 return $filtered;
109 case self::$FILTER_TEXT: 125 case self::$FILTER_TEXT:
@@ -128,13 +144,13 @@ class BookmarkFilter
128 * 144 *
129 * @return Bookmark[] filtered bookmarks. 145 * @return Bookmark[] filtered bookmarks.
130 */ 146 */
131 private function noFilter($visibility = 'all') 147 private function noFilter(string $visibility = 'all')
132 { 148 {
133 if ($visibility === 'all') { 149 if ($visibility === 'all') {
134 return $this->bookmarks; 150 return $this->bookmarks;
135 } 151 }
136 152
137 $out = array(); 153 $out = [];
138 foreach ($this->bookmarks as $key => $value) { 154 foreach ($this->bookmarks as $key => $value) {
139 if ($value->isPrivate() && $visibility === 'private') { 155 if ($value->isPrivate() && $visibility === 'private') {
140 $out[$key] = $value; 156 $out[$key] = $value;
@@ -151,11 +167,11 @@ class BookmarkFilter
151 * 167 *
152 * @param string $smallHash permalink hash. 168 * @param string $smallHash permalink hash.
153 * 169 *
154 * @return array $filtered array containing permalink data. 170 * @return Bookmark[] $filtered array containing permalink data.
155 * 171 *
156 * @throws \Shaarli\Bookmark\Exception\BookmarkNotFoundException if the smallhash doesn't match any link. 172 * @throws BookmarkNotFoundException if the smallhash doesn't match any link.
157 */ 173 */
158 private function filterSmallHash($smallHash) 174 private function filterSmallHash(string $smallHash)
159 { 175 {
160 foreach ($this->bookmarks as $key => $l) { 176 foreach ($this->bookmarks as $key => $l) {
161 if ($smallHash == $l->getShortUrl()) { 177 if ($smallHash == $l->getShortUrl()) {
@@ -186,15 +202,15 @@ class BookmarkFilter
186 * @param string $searchterms search query. 202 * @param string $searchterms search query.
187 * @param string $visibility Optional: return only all/private/public bookmarks. 203 * @param string $visibility Optional: return only all/private/public bookmarks.
188 * 204 *
189 * @return array search results. 205 * @return Bookmark[] search results.
190 */ 206 */
191 private function filterFulltext($searchterms, $visibility = 'all') 207 private function filterFulltext(string $searchterms, string $visibility = 'all')
192 { 208 {
193 if (empty($searchterms)) { 209 if (empty($searchterms)) {
194 return $this->noFilter($visibility); 210 return $this->noFilter($visibility);
195 } 211 }
196 212
197 $filtered = array(); 213 $filtered = [];
198 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); 214 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
199 $exactRegex = '/"([^"]+)"/'; 215 $exactRegex = '/"([^"]+)"/';
200 // Retrieve exact search terms. 216 // Retrieve exact search terms.
@@ -206,8 +222,8 @@ class BookmarkFilter
206 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); 222 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
207 223
208 // Filter excluding terms and update andSearch. 224 // Filter excluding terms and update andSearch.
209 $excludeSearch = array(); 225 $excludeSearch = [];
210 $andSearch = array(); 226 $andSearch = [];
211 foreach ($explodedSearchAnd as $needle) { 227 foreach ($explodedSearchAnd as $needle) {
212 if ($needle[0] == '-' && strlen($needle) > 1) { 228 if ($needle[0] == '-' && strlen($needle) > 1) {
213 $excludeSearch[] = substr($needle, 1); 229 $excludeSearch[] = substr($needle, 1);
@@ -227,33 +243,38 @@ class BookmarkFilter
227 } 243 }
228 } 244 }
229 245
230 // Concatenate link fields to search across fields. 246 $lengths = [];
231 // Adds a '\' separator for exact search terms. 247 $content = $this->buildFullTextSearchableLink($link, $lengths);
232 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\';
233 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\';
234 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\';
235 $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\';
236 248
237 // Be optimistic 249 // Be optimistic
238 $found = true; 250 $found = true;
251 $foundPositions = [];
239 252
240 // First, we look for exact term search 253 // First, we look for exact term search
241 for ($i = 0; $i < count($exactSearch) && $found; $i++) { 254 // Then iterate over keywords, if keyword is not found,
242 $found = strpos($content, $exactSearch[$i]) !== false;
243 }
244
245 // Iterate over keywords, if keyword is not found,
246 // no need to check for the others. We want all or nothing. 255 // no need to check for the others. We want all or nothing.
247 for ($i = 0; $i < count($andSearch) && $found; $i++) { 256 foreach ([$exactSearch, $andSearch] as $search) {
248 $found = strpos($content, $andSearch[$i]) !== false; 257 for ($i = 0; $i < count($search) && $found !== false; $i++) {
258 $found = mb_strpos($content, $search[$i]);
259 if ($found === false) {
260 break;
261 }
262
263 $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])];
264 }
249 } 265 }
250 266
251 // Exclude terms. 267 // Exclude terms.
252 for ($i = 0; $i < count($excludeSearch) && $found; $i++) { 268 for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) {
253 $found = strpos($content, $excludeSearch[$i]) === false; 269 $found = strpos($content, $excludeSearch[$i]) === false;
254 } 270 }
255 271
256 if ($found) { 272 if ($found !== false) {
273 $link->addAdditionalContentEntry(
274 'search_highlight',
275 $this->postProcessFoundPositions($lengths, $foundPositions)
276 );
277
257 $filtered[$id] = $link; 278 $filtered[$id] = $link;
258 } 279 }
259 } 280 }
@@ -268,8 +289,9 @@ class BookmarkFilter
268 * 289 *
269 * @return string generated regex fragment 290 * @return string generated regex fragment
270 */ 291 */
271 private static function tag2regex($tag) 292 protected function tag2regex(string $tag): string
272 { 293 {
294 $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
273 $len = strlen($tag); 295 $len = strlen($tag);
274 if (!$len || $tag === "-" || $tag === "*") { 296 if (!$len || $tag === "-" || $tag === "*") {
275 // nothing to search, return empty regex 297 // nothing to search, return empty regex
@@ -283,12 +305,13 @@ class BookmarkFilter
283 $i = 0; // start at first character 305 $i = 0; // start at first character
284 $regex = '(?='; // use positive lookahead 306 $regex = '(?='; // use positive lookahead
285 } 307 }
286 $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning 308 // before tag may only be the separator or the beginning
309 $regex .= '.*(?:^|' . $tagsSeparator . ')';
287 // iterate over string, separating it into placeholder and content 310 // iterate over string, separating it into placeholder and content
288 for (; $i < $len; $i++) { 311 for (; $i < $len; $i++) {
289 if ($tag[$i] === '*') { 312 if ($tag[$i] === '*') {
290 // placeholder found 313 // placeholder found
291 $regex .= '[^ ]*?'; 314 $regex .= '[^' . $tagsSeparator . ']*?';
292 } else { 315 } else {
293 // regular characters 316 // regular characters
294 $offset = strpos($tag, '*', $i); 317 $offset = strpos($tag, '*', $i);
@@ -304,7 +327,8 @@ class BookmarkFilter
304 $i = $offset; 327 $i = $offset;
305 } 328 }
306 } 329 }
307 $regex .= '(?:$| ))'; // after the tag may only be a space or the end 330 // after the tag may only be the separator or the end
331 $regex .= '(?:$|' . $tagsSeparator . '))';
308 return $regex; 332 return $regex;
309 } 333 }
310 334
@@ -314,22 +338,23 @@ class BookmarkFilter
314 * You can specify one or more tags, separated by space or a comma, e.g. 338 * You can specify one or more tags, separated by space or a comma, e.g.
315 * print_r($mydb->filterTags('linux programming')); 339 * print_r($mydb->filterTags('linux programming'));
316 * 340 *
317 * @param string $tags list of tags separated by commas or blank spaces. 341 * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string.
318 * @param bool $casesensitive ignore case if false. 342 * @param bool $casesensitive ignore case if false.
319 * @param string $visibility Optional: return only all/private/public bookmarks. 343 * @param string $visibility Optional: return only all/private/public bookmarks.
320 * 344 *
321 * @return array filtered bookmarks. 345 * @return Bookmark[] filtered bookmarks.
322 */ 346 */
323 public function filterTags($tags, $casesensitive = false, $visibility = 'all') 347 public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all')
324 { 348 {
349 $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
325 // get single tags (we may get passed an array, even though the docs say different) 350 // get single tags (we may get passed an array, even though the docs say different)
326 $inputTags = $tags; 351 $inputTags = $tags;
327 if (!is_array($tags)) { 352 if (!is_array($tags)) {
328 // we got an input string, split tags 353 // we got an input string, split tags
329 $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); 354 $inputTags = tags_str2array($inputTags, $tagsSeparator);
330 } 355 }
331 356
332 if (!count($inputTags)) { 357 if (count($inputTags) === 0) {
333 // no input tags 358 // no input tags
334 return $this->noFilter($visibility); 359 return $this->noFilter($visibility);
335 } 360 }
@@ -346,7 +371,7 @@ class BookmarkFilter
346 } 371 }
347 372
348 // build regex from all tags 373 // build regex from all tags
349 $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; 374 $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
350 if (!$casesensitive) { 375 if (!$casesensitive) {
351 // make regex case insensitive 376 // make regex case insensitive
352 $re .= 'i'; 377 $re .= 'i';
@@ -366,10 +391,11 @@ class BookmarkFilter
366 continue; 391 continue;
367 } 392 }
368 } 393 }
369 $search = $link->getTagsString(); // build search string, start with tags of current link 394 // build search string, start with tags of current link
395 $search = $link->getTagsString($tagsSeparator);
370 if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) { 396 if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) {
371 // description given and at least one possible tag found 397 // description given and at least one possible tag found
372 $descTags = array(); 398 $descTags = [];
373 // find all tags in the form of #tag in the description 399 // find all tags in the form of #tag in the description
374 preg_match_all( 400 preg_match_all(
375 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', 401 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
@@ -378,9 +404,9 @@ class BookmarkFilter
378 ); 404 );
379 if (count($descTags[1])) { 405 if (count($descTags[1])) {
380 // there were some tags in the description, add them to the search string 406 // there were some tags in the description, add them to the search string
381 $search .= ' ' . implode(' ', $descTags[1]); 407 $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator);
382 } 408 }
383 }; 409 }
384 // match regular expression with search string 410 // match regular expression with search string
385 if (!preg_match($re, $search)) { 411 if (!preg_match($re, $search)) {
386 // this entry does _not_ match our regex 412 // this entry does _not_ match our regex
@@ -396,9 +422,9 @@ class BookmarkFilter
396 * 422 *
397 * @param string $visibility return only all/private/public bookmarks. 423 * @param string $visibility return only all/private/public bookmarks.
398 * 424 *
399 * @return array filtered bookmarks. 425 * @return Bookmark[] filtered bookmarks.
400 */ 426 */
401 public function filterUntagged($visibility) 427 public function filterUntagged(string $visibility)
402 { 428 {
403 $filtered = []; 429 $filtered = [];
404 foreach ($this->bookmarks as $key => $link) { 430 foreach ($this->bookmarks as $key => $link) {
@@ -410,7 +436,7 @@ class BookmarkFilter
410 } 436 }
411 } 437 }
412 438
413 if (empty(trim($link->getTagsString()))) { 439 if (empty($link->getTags())) {
414 $filtered[$key] = $link; 440 $filtered[$key] = $link;
415 } 441 }
416 } 442 }
@@ -427,11 +453,11 @@ class BookmarkFilter
427 * @param string $day day to filter. 453 * @param string $day day to filter.
428 * @param string $visibility return only all/private/public bookmarks. 454 * @param string $visibility return only all/private/public bookmarks.
429 455
430 * @return array all link matching given day. 456 * @return Bookmark[] all link matching given day.
431 * 457 *
432 * @throws Exception if date format is invalid. 458 * @throws Exception if date format is invalid.
433 */ 459 */
434 public function filterDay($day, $visibility) 460 public function filterDay(string $day, string $visibility)
435 { 461 {
436 if (!checkDateFormat('Ymd', $day)) { 462 if (!checkDateFormat('Ymd', $day)) {
437 throw new Exception('Invalid date format'); 463 throw new Exception('Invalid date format');
@@ -460,9 +486,9 @@ class BookmarkFilter
460 * @param string $tags string containing a list of tags. 486 * @param string $tags string containing a list of tags.
461 * @param bool $casesensitive will convert everything to lowercase if false. 487 * @param bool $casesensitive will convert everything to lowercase if false.
462 * 488 *
463 * @return array filtered tags string. 489 * @return string[] filtered tags string.
464 */ 490 */
465 public static function tagsStrToArray($tags, $casesensitive) 491 public static function tagsStrToArray(string $tags, bool $casesensitive): array
466 { 492 {
467 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) 493 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
468 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); 494 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8');
@@ -470,4 +496,75 @@ class BookmarkFilter
470 496
471 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); 497 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
472 } 498 }
499
500 /**
501 * This method finalize the content of the foundPositions array,
502 * by associated all search results to their associated bookmark field,
503 * making sure that there is no overlapping results, etc.
504 *
505 * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content.
506 * @param array $foundPositions Positions where the search results were found in the aggregated content.
507 *
508 * @return array Updated $foundPositions, by bookmark field.
509 */
510 protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
511 {
512 // Sort results by starting position ASC.
513 usort($foundPositions, function (array $entryA, array $entryB): int {
514 return $entryA['start'] > $entryB['start'] ? 1 : -1;
515 });
516
517 $out = [];
518 $currentMax = -1;
519 foreach ($foundPositions as $foundPosition) {
520 // we do not allow overlapping highlights
521 if ($foundPosition['start'] < $currentMax) {
522 continue;
523 }
524
525 $currentMax = $foundPosition['end'];
526 foreach ($fieldLengths as $part => $length) {
527 if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) {
528 continue;
529 }
530
531 $out[$part][] = [
532 'start' => $foundPosition['start'] - $length['start'],
533 'end' => $foundPosition['end'] - $length['start'],
534 ];
535 break;
536 }
537 }
538
539 return $out;
540 }
541
542 /**
543 * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms.
544 * Also populate $length array with starting and ending positions of every bookmark field
545 * inside concatenated content.
546 *
547 * @param Bookmark $link
548 * @param array $lengths (by reference)
549 *
550 * @return string Lowercase concatenated fields content.
551 */
552 protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string
553 {
554 $tagString = $link->getTagsString($this->conf->get('general.tags_separator', ' '));
555 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') . '\\';
556 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') . '\\';
557 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') . '\\';
558 $content .= mb_convert_case($tagString, MB_CASE_LOWER, 'UTF-8') . '\\';
559
560 $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
561 $nextField = $lengths['title']['end'] + 1;
562 $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())];
563 $nextField = $lengths['description']['end'] + 1;
564 $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())];
565 $nextField = $lengths['url']['end'] + 1;
566 $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($tagString)];
567
568 return $content;
569 }
473} 570}