]> git.immae.eu Git - github/shaarli/Shaarli.git/blame - application/bookmark/BookmarkFilter.php
Merge pull request #1698 from ArthurHoaro/feature/plugins-search-filter
[github/shaarli/Shaarli.git] / application / bookmark / BookmarkFilter.php
CommitLineData
822bffce
A
1<?php
2
efb7d21b
A
3declare(strict_types=1);
4
6696729b
V
5namespace Shaarli\Bookmark;
6
336a28fa 7use Shaarli\Bookmark\Exception\BookmarkNotFoundException;
b3bd8c3e 8use Shaarli\Config\ConfigManager;
bcba6bd3 9use Shaarli\Plugin\PluginManager;
f24896b2 10
822bffce
A
11/**
12 * Class LinkFilter.
13 *
14 * Perform search and filter operation on link data list.
15 */
336a28fa 16class BookmarkFilter
822bffce
A
17{
18 /**
19 * @var string permalinks.
20 */
6696729b 21 public static $FILTER_HASH = 'permalink';
822bffce
A
22
23 /**
24 * @var string text search.
25 */
6696729b 26 public static $FILTER_TEXT = 'fulltext';
822bffce
A
27
28 /**
29 * @var string tag filter.
30 */
6696729b 31 public static $FILTER_TAG = 'tags';
822bffce 32
336a28fa
A
33 /**
34 * @var string filter by day.
35 */
36 public static $DEFAULT = 'NO_FILTER';
37
38 /** @var string Visibility: all */
39 public static $ALL = 'all';
40
41 /** @var string Visibility: public */
42 public static $PUBLIC = 'public';
43
44 /** @var string Visibility: private */
45 public static $PRIVATE = 'private';
46
9ccca401
A
47 /**
48 * @var string Allowed characters for hashtags (regex syntax).
49 */
50 public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
51
822bffce 52 /**
336a28fa 53 * @var Bookmark[] all available bookmarks.
822bffce 54 */
336a28fa 55 private $bookmarks;
822bffce 56
b3bd8c3e
A
57 /** @var ConfigManager */
58 protected $conf;
59
bcba6bd3
A
60 /** @var PluginManager */
61 protected $pluginManager;
62
822bffce 63 /**
336a28fa 64 * @param Bookmark[] $bookmarks initialization.
822bffce 65 */
bcba6bd3 66 public function __construct($bookmarks, ConfigManager $conf, PluginManager $pluginManager)
822bffce 67 {
336a28fa 68 $this->bookmarks = $bookmarks;
b3bd8c3e 69 $this->conf = $conf;
bcba6bd3 70 $this->pluginManager = $pluginManager;
822bffce
A
71 }
72
73 /**
336a28fa 74 * Filter bookmarks according to parameters.
822bffce
A
75 *
76 * @param string $type Type of filter (eg. tags, permalink, etc.).
528a6f8a 77 * @param mixed $request Filter content.
822bffce 78 * @param bool $casesensitive Optional: Perform case sensitive filter if true.
336a28fa
A
79 * @param string $visibility Optional: return only all/private/public bookmarks
80 * @param bool $untaggedonly Optional: return only untagged bookmarks. Applies only if $type includes FILTER_TAG
81 *
82 * @return Bookmark[] filtered bookmark list.
822bffce 83 *
336a28fa 84 * @throws BookmarkNotFoundException
822bffce 85 */
efb7d21b
A
86 public function filter(
87 string $type,
88 $request,
89 bool $casesensitive = false,
90 string $visibility = 'all',
91 bool $untaggedonly = false
92 ) {
6696729b 93 if (!in_array($visibility, ['all', 'public', 'private'])) {
7f96d9ec
A
94 $visibility = 'all';
95 }
96
f211e417 97 switch ($type) {
822bffce
A
98 case self::$FILTER_HASH:
99 return $this->filterSmallHash($request);
f210d94f
LC
100 case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext"
101 $noRequest = empty($request) || (empty($request[0]) && empty($request[1]));
102 if ($noRequest) {
103 if ($untaggedonly) {
104 return $this->filterUntagged($visibility);
c51fae92 105 }
f210d94f 106 return $this->noFilter($visibility);
c51fae92 107 }
f210d94f
LC
108 if ($untaggedonly) {
109 $filtered = $this->filterUntagged($visibility);
110 } else {
336a28fa 111 $filtered = $this->bookmarks;
f210d94f
LC
112 }
113 if (!empty($request[0])) {
bcba6bd3 114 $filtered = (new BookmarkFilter($filtered, $this->conf, $this->pluginManager))
b3bd8c3e
A
115 ->filterTags($request[0], $casesensitive, $visibility)
116 ;
f210d94f
LC
117 }
118 if (!empty($request[1])) {
bcba6bd3 119 $filtered = (new BookmarkFilter($filtered, $this->conf, $this->pluginManager))
b3bd8c3e
A
120 ->filterFulltext($request[1], $visibility)
121 ;
f210d94f
LC
122 }
123 return $filtered;
822bffce 124 case self::$FILTER_TEXT:
7f96d9ec 125 return $this->filterFulltext($request, $visibility);
822bffce 126 case self::$FILTER_TAG:
f210d94f
LC
127 if ($untaggedonly) {
128 return $this->filterUntagged($visibility);
129 } else {
130 return $this->filterTags($request, $casesensitive, $visibility);
131 }
822bffce 132 default:
7f96d9ec 133 return $this->noFilter($visibility);
822bffce
A
134 }
135 }
136
137 /**
138 * Unknown filter, but handle private only.
139 *
336a28fa 140 * @param string $visibility Optional: return only all/private/public bookmarks
822bffce 141 *
336a28fa 142 * @return Bookmark[] filtered bookmarks.
822bffce 143 */
efb7d21b 144 private function noFilter(string $visibility = 'all')
822bffce 145 {
53054b2b 146 $out = [];
336a28fa 147 foreach ($this->bookmarks as $key => $value) {
bcba6bd3
A
148 if (
149 !$this->pluginManager->filterSearchEntry(
150 $value,
151 ['source' => 'no_filter', 'visibility' => $visibility]
152 )
153 ) {
154 continue;
155 }
156
157 if ($visibility === 'all') {
158 $out[$key] = $value;
159 } elseif ($value->isPrivate() && $visibility === 'private') {
7f96d9ec 160 $out[$key] = $value;
336a28fa 161 } elseif (!$value->isPrivate() && $visibility === 'public') {
01878a75 162 $out[$key] = $value;
822bffce
A
163 }
164 }
165
822bffce
A
166 return $out;
167 }
168
169 /**
170 * Returns the shaare corresponding to a smallHash.
171 *
172 * @param string $smallHash permalink hash.
173 *
efb7d21b 174 * @return Bookmark[] $filtered array containing permalink data.
528a6f8a 175 *
efb7d21b 176 * @throws BookmarkNotFoundException if the smallhash doesn't match any link.
822bffce 177 */
efb7d21b 178 private function filterSmallHash(string $smallHash)
822bffce 179 {
336a28fa
A
180 foreach ($this->bookmarks as $key => $l) {
181 if ($smallHash == $l->getShortUrl()) {
822bffce 182 // Yes, this is ugly and slow
336a28fa 183 return [$key => $l];
822bffce
A
184 }
185 }
528a6f8a 186
336a28fa 187 throw new BookmarkNotFoundException();
822bffce
A
188 }
189
190 /**
336a28fa 191 * Returns the list of bookmarks corresponding to a full-text search
822bffce
A
192 *
193 * Searches:
194 * - in the URLs, title and description;
bedd176a
A
195 * - are case-insensitive;
196 * - terms surrounded by quotes " are exact terms search.
197 * - terms starting with a dash - are excluded (except exact terms).
822bffce
A
198 *
199 * Example:
200 * print_r($mydb->filterFulltext('hollandais'));
201 *
202 * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8')
203 * - allows to perform searches on Unicode text
204 * - see https://github.com/shaarli/Shaarli/issues/75 for examples
205 *
206 * @param string $searchterms search query.
336a28fa 207 * @param string $visibility Optional: return only all/private/public bookmarks.
822bffce 208 *
efb7d21b 209 * @return Bookmark[] search results.
822bffce 210 */
efb7d21b 211 private function filterFulltext(string $searchterms, string $visibility = 'all')
822bffce 212 {
c51fae92 213 if (empty($searchterms)) {
7f96d9ec 214 return $this->noFilter($visibility);
c51fae92
A
215 }
216
4e3875c0 217 $filtered = [];
ebd8075a 218 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
bedd176a
A
219 $exactRegex = '/"([^"]+)"/';
220 // Retrieve exact search terms.
221 preg_match_all($exactRegex, $search, $exactSearch);
222 $exactSearch = array_values(array_filter($exactSearch[1]));
223
224 // Remove exact search terms to get AND terms search.
225 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
226 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
227
228 // Filter excluding terms and update andSearch.
4e3875c0
A
229 $excludeSearch = [];
230 $andSearch = [];
bedd176a
A
231 foreach ($explodedSearchAnd as $needle) {
232 if ($needle[0] == '-' && strlen($needle) > 1) {
233 $excludeSearch[] = substr($needle, 1);
234 } else {
235 $andSearch[] = $needle;
236 }
237 }
238
822bffce 239 // Iterate over every stored link.
bcba6bd3
A
240 foreach ($this->bookmarks as $id => $bookmark) {
241 if (
242 !$this->pluginManager->filterSearchEntry(
243 $bookmark,
244 [
245 'source' => 'fulltext',
246 'searchterms' => $searchterms,
247 'andSearch' => $andSearch,
248 'exactSearch' => $exactSearch,
249 'excludeSearch' => $excludeSearch,
250 'visibility' => $visibility
251 ]
252 )
253 ) {
254 continue;
255 }
256
336a28fa 257 // ignore non private bookmarks when 'privatonly' is on.
7f96d9ec 258 if ($visibility !== 'all') {
bcba6bd3 259 if (!$bookmark->isPrivate() && $visibility === 'private') {
7f96d9ec 260 continue;
bcba6bd3 261 } elseif ($bookmark->isPrivate() && $visibility === 'public') {
7f96d9ec
A
262 continue;
263 }
822bffce
A
264 }
265
4e3875c0 266 $lengths = [];
bcba6bd3 267 $content = $this->buildFullTextSearchableLink($bookmark, $lengths);
522b278b
A
268
269 // Be optimistic
270 $found = true;
4e3875c0 271 $foundPositions = [];
522b278b
A
272
273 // First, we look for exact term search
4e3875c0 274 // Then iterate over keywords, if keyword is not found,
522b278b 275 // no need to check for the others. We want all or nothing.
4e3875c0
A
276 foreach ([$exactSearch, $andSearch] as $search) {
277 for ($i = 0; $i < count($search) && $found !== false; $i++) {
278 $found = mb_strpos($content, $search[$i]);
279 if ($found === false) {
280 break;
281 }
282
283 $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])];
284 }
522b278b
A
285 }
286
287 // Exclude terms.
4e3875c0 288 for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) {
522b278b
A
289 $found = strpos($content, $excludeSearch[$i]) === false;
290 }
291
4e3875c0 292 if ($found !== false) {
bcba6bd3 293 $bookmark->addAdditionalContentEntry(
4e3875c0
A
294 'search_highlight',
295 $this->postProcessFoundPositions($lengths, $foundPositions)
296 );
297
bcba6bd3 298 $filtered[$id] = $bookmark;
822bffce
A
299 }
300 }
301
822bffce
A
302 return $filtered;
303 }
304
305 /**
336a28fa 306 * Returns the list of bookmarks associated with a given list of tags
822bffce
A
307 *
308 * You can specify one or more tags, separated by space or a comma, e.g.
309 * print_r($mydb->filterTags('linux programming'));
310 *
efb7d21b
A
311 * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string.
312 * @param bool $casesensitive ignore case if false.
313 * @param string $visibility Optional: return only all/private/public bookmarks.
822bffce 314 *
efb7d21b 315 * @return Bookmark[] filtered bookmarks.
822bffce 316 */
efb7d21b 317 public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all')
822bffce 318 {
b3bd8c3e 319 $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
341527ba
WE
320 // get single tags (we may get passed an array, even though the docs say different)
321 $inputTags = $tags;
f211e417 322 if (!is_array($tags)) {
341527ba 323 // we got an input string, split tags
b3bd8c3e 324 $inputTags = tags_str2array($inputTags, $tagsSeparator);
341527ba
WE
325 }
326
b3bd8c3e 327 if (count($inputTags) === 0) {
341527ba 328 // no input tags
7f96d9ec 329 return $this->noFilter($visibility);
c51fae92
A
330 }
331
336a28fa
A
332 // If we only have public visibility, we can't look for hidden tags
333 if ($visibility === self::$PUBLIC) {
334 $inputTags = array_values(array_filter($inputTags, function ($tag) {
335 return ! startsWith($tag, '.');
336 }));
337
338 if (empty($inputTags)) {
339 return [];
340 }
341 }
342
341527ba 343 // build regex from all tags
b3bd8c3e 344 $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
f211e417 345 if (!$casesensitive) {
341527ba
WE
346 // make regex case insensitive
347 $re .= 'i';
21979ff1 348 }
822bffce 349
341527ba 350 // create resulting array
336a28fa 351 $filtered = [];
341527ba
WE
352
353 // iterate over each link
bcba6bd3
A
354 foreach ($this->bookmarks as $key => $bookmark) {
355 if (
356 !$this->pluginManager->filterSearchEntry(
357 $bookmark,
358 [
359 'source' => 'tags',
360 'tags' => $tags,
361 'casesensitive' => $casesensitive,
362 'visibility' => $visibility
363 ]
364 )
365 ) {
366 continue;
367 }
368
341527ba 369 // check level of visibility
336a28fa 370 // ignore non private bookmarks when 'privateonly' is on.
7f96d9ec 371 if ($visibility !== 'all') {
bcba6bd3 372 if (!$bookmark->isPrivate() && $visibility === 'private') {
7f96d9ec 373 continue;
bcba6bd3 374 } elseif ($bookmark->isPrivate() && $visibility === 'public') {
7f96d9ec
A
375 continue;
376 }
822bffce 377 }
b3bd8c3e 378 // build search string, start with tags of current link
bcba6bd3
A
379 $search = $bookmark->getTagsString($tagsSeparator);
380 if (strlen(trim($bookmark->getDescription())) && strpos($bookmark->getDescription(), '#') !== false) {
341527ba 381 // description given and at least one possible tag found
53054b2b 382 $descTags = [];
341527ba
WE
383 // find all tags in the form of #tag in the description
384 preg_match_all(
385 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
bcba6bd3 386 $bookmark->getDescription(),
341527ba
WE
387 $descTags
388 );
f211e417 389 if (count($descTags[1])) {
341527ba 390 // there were some tags in the description, add them to the search string
b3bd8c3e 391 $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator);
21979ff1 392 }
b3bd8c3e 393 }
341527ba 394 // match regular expression with search string
f211e417 395 if (!preg_match($re, $search)) {
341527ba
WE
396 // this entry does _not_ match our regex
397 continue;
21979ff1 398 }
bcba6bd3 399 $filtered[$key] = $bookmark;
822bffce 400 }
bcba6bd3 401
822bffce
A
402 return $filtered;
403 }
404
7d86f40b 405 /**
336a28fa 406 * Return only bookmarks without any tag.
7d86f40b 407 *
336a28fa 408 * @param string $visibility return only all/private/public bookmarks.
7d86f40b 409 *
efb7d21b 410 * @return Bookmark[] filtered bookmarks.
7d86f40b 411 */
efb7d21b 412 public function filterUntagged(string $visibility)
7d86f40b
A
413 {
414 $filtered = [];
bcba6bd3
A
415 foreach ($this->bookmarks as $key => $bookmark) {
416 if (
417 !$this->pluginManager->filterSearchEntry(
418 $bookmark,
419 ['source' => 'untagged', 'visibility' => $visibility]
420 )
421 ) {
422 continue;
423 }
424
7d86f40b 425 if ($visibility !== 'all') {
bcba6bd3 426 if (!$bookmark->isPrivate() && $visibility === 'private') {
7d86f40b 427 continue;
bcba6bd3 428 } elseif ($bookmark->isPrivate() && $visibility === 'public') {
7d86f40b
A
429 continue;
430 }
431 }
432
bcba6bd3 433 if (empty($bookmark->getTags())) {
27ddfec3 434 $filtered[$key] = $bookmark;
822bffce
A
435 }
436 }
01878a75 437
bcba6bd3 438 return $filtered;
822bffce
A
439 }
440
441 /**
442 * Convert a list of tags (str) to an array. Also
443 * - handle case sensitivity.
444 * - accepts spaces commas as separator.
822bffce
A
445 *
446 * @param string $tags string containing a list of tags.
447 * @param bool $casesensitive will convert everything to lowercase if false.
448 *
efb7d21b 449 * @return string[] filtered tags string.
7f96d9ec 450 */
efb7d21b 451 public static function tagsStrToArray(string $tags, bool $casesensitive): array
822bffce
A
452 {
453 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
454 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8');
455 $tagsOut = str_replace(',', ' ', $tagsOut);
456
b3051a6a 457 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
822bffce 458 }
4e3875c0 459
bcba6bd3
A
460 /**
461 * generate a regex fragment out of a tag
462 *
463 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
464 *
465 * @return string generated regex fragment
466 */
467 protected function tag2regex(string $tag): string
468 {
469 $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
470 $len = strlen($tag);
471 if (!$len || $tag === "-" || $tag === "*") {
472 // nothing to search, return empty regex
473 return '';
474 }
475 if ($tag[0] === "-") {
476 // query is negated
477 $i = 1; // use offset to start after '-' character
478 $regex = '(?!'; // create negative lookahead
479 } else {
480 $i = 0; // start at first character
481 $regex = '(?='; // use positive lookahead
482 }
483 // before tag may only be the separator or the beginning
484 $regex .= '.*(?:^|' . $tagsSeparator . ')';
485 // iterate over string, separating it into placeholder and content
486 for (; $i < $len; $i++) {
487 if ($tag[$i] === '*') {
488 // placeholder found
489 $regex .= '[^' . $tagsSeparator . ']*?';
490 } else {
491 // regular characters
492 $offset = strpos($tag, '*', $i);
493 if ($offset === false) {
494 // no placeholder found, set offset to end of string
495 $offset = $len;
496 }
497 // subtract one, as we want to get before the placeholder or end of string
498 $offset -= 1;
499 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
500 $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
501 // move $i on
502 $i = $offset;
503 }
504 }
505 // after the tag may only be the separator or the end
506 $regex .= '(?:$|' . $tagsSeparator . '))';
507 return $regex;
508 }
509
4e3875c0
A
510 /**
511 * This method finalize the content of the foundPositions array,
512 * by associated all search results to their associated bookmark field,
513 * making sure that there is no overlapping results, etc.
514 *
515 * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content.
516 * @param array $foundPositions Positions where the search results were found in the aggregated content.
517 *
518 * @return array Updated $foundPositions, by bookmark field.
519 */
520 protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
521 {
522 // Sort results by starting position ASC.
523 usort($foundPositions, function (array $entryA, array $entryB): int {
524 return $entryA['start'] > $entryB['start'] ? 1 : -1;
525 });
526
527 $out = [];
528 $currentMax = -1;
529 foreach ($foundPositions as $foundPosition) {
530 // we do not allow overlapping highlights
531 if ($foundPosition['start'] < $currentMax) {
532 continue;
533 }
534
535 $currentMax = $foundPosition['end'];
536 foreach ($fieldLengths as $part => $length) {
537 if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) {
538 continue;
539 }
540
541 $out[$part][] = [
542 'start' => $foundPosition['start'] - $length['start'],
543 'end' => $foundPosition['end'] - $length['start'],
544 ];
545 break;
546 }
547 }
548
549 return $out;
550 }
551
552 /**
553 * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms.
554 * Also populate $length array with starting and ending positions of every bookmark field
555 * inside concatenated content.
556 *
557 * @param Bookmark $link
558 * @param array $lengths (by reference)
559 *
560 * @return string Lowercase concatenated fields content.
561 */
562 protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string
563 {
b3bd8c3e 564 $tagString = $link->getTagsString($this->conf->get('general.tags_separator', ' '));
53054b2b
A
565 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') . '\\';
566 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') . '\\';
567 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') . '\\';
568 $content .= mb_convert_case($tagString, MB_CASE_LOWER, 'UTF-8') . '\\';
4e3875c0
A
569
570 $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
571 $nextField = $lengths['title']['end'] + 1;
572 $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())];
573 $nextField = $lengths['description']['end'] + 1;
574 $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())];
575 $nextField = $lengths['url']['end'] + 1;
b3bd8c3e 576 $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($tagString)];
4e3875c0
A
577
578 return $content;
579 }
822bffce 580}