]> git.immae.eu Git - github/shaarli/Shaarli.git/blame - application/bookmark/BookmarkFilter.php
Apply PHP Code Beautifier on source code for linter automatic fixes
[github/shaarli/Shaarli.git] / application / bookmark / BookmarkFilter.php
CommitLineData
822bffce
A
1<?php
2
efb7d21b
A
3declare(strict_types=1);
4
6696729b
V
5namespace Shaarli\Bookmark;
6
7use Exception;
336a28fa 8use Shaarli\Bookmark\Exception\BookmarkNotFoundException;
b3bd8c3e 9use Shaarli\Config\ConfigManager;
f24896b2 10
822bffce
A
11/**
12 * Class LinkFilter.
13 *
14 * Perform search and filter operation on link data list.
15 */
336a28fa 16class BookmarkFilter
822bffce
A
17{
18 /**
19 * @var string permalinks.
20 */
6696729b 21 public static $FILTER_HASH = 'permalink';
822bffce
A
22
23 /**
24 * @var string text search.
25 */
6696729b 26 public static $FILTER_TEXT = 'fulltext';
822bffce
A
27
28 /**
29 * @var string tag filter.
30 */
6696729b 31 public static $FILTER_TAG = 'tags';
822bffce
A
32
33 /**
34 * @var string filter by day.
35 */
6696729b 36 public static $FILTER_DAY = 'FILTER_DAY';
822bffce 37
336a28fa
A
38 /**
39 * @var string filter by day.
40 */
41 public static $DEFAULT = 'NO_FILTER';
42
43 /** @var string Visibility: all */
44 public static $ALL = 'all';
45
46 /** @var string Visibility: public */
47 public static $PUBLIC = 'public';
48
49 /** @var string Visibility: private */
50 public static $PRIVATE = 'private';
51
9ccca401
A
52 /**
53 * @var string Allowed characters for hashtags (regex syntax).
54 */
55 public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
56
822bffce 57 /**
336a28fa 58 * @var Bookmark[] all available bookmarks.
822bffce 59 */
336a28fa 60 private $bookmarks;
822bffce 61
b3bd8c3e
A
62 /** @var ConfigManager */
63 protected $conf;
64
822bffce 65 /**
336a28fa 66 * @param Bookmark[] $bookmarks initialization.
822bffce 67 */
b3bd8c3e 68 public function __construct($bookmarks, ConfigManager $conf)
822bffce 69 {
336a28fa 70 $this->bookmarks = $bookmarks;
b3bd8c3e 71 $this->conf = $conf;
822bffce
A
72 }
73
74 /**
336a28fa 75 * Filter bookmarks according to parameters.
822bffce
A
76 *
77 * @param string $type Type of filter (eg. tags, permalink, etc.).
528a6f8a 78 * @param mixed $request Filter content.
822bffce 79 * @param bool $casesensitive Optional: Perform case sensitive filter if true.
336a28fa
A
80 * @param string $visibility Optional: return only all/private/public bookmarks
81 * @param bool $untaggedonly Optional: return only untagged bookmarks. Applies only if $type includes FILTER_TAG
82 *
83 * @return Bookmark[] filtered bookmark list.
822bffce 84 *
336a28fa 85 * @throws BookmarkNotFoundException
822bffce 86 */
efb7d21b
A
87 public function filter(
88 string $type,
89 $request,
90 bool $casesensitive = false,
91 string $visibility = 'all',
92 bool $untaggedonly = false
93 ) {
6696729b 94 if (!in_array($visibility, ['all', 'public', 'private'])) {
7f96d9ec
A
95 $visibility = 'all';
96 }
97
f211e417 98 switch ($type) {
822bffce
A
99 case self::$FILTER_HASH:
100 return $this->filterSmallHash($request);
f210d94f
LC
101 case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext"
102 $noRequest = empty($request) || (empty($request[0]) && empty($request[1]));
103 if ($noRequest) {
104 if ($untaggedonly) {
105 return $this->filterUntagged($visibility);
c51fae92 106 }
f210d94f 107 return $this->noFilter($visibility);
c51fae92 108 }
f210d94f
LC
109 if ($untaggedonly) {
110 $filtered = $this->filterUntagged($visibility);
111 } else {
336a28fa 112 $filtered = $this->bookmarks;
f210d94f
LC
113 }
114 if (!empty($request[0])) {
b3bd8c3e
A
115 $filtered = (new BookmarkFilter($filtered, $this->conf))
116 ->filterTags($request[0], $casesensitive, $visibility)
117 ;
f210d94f
LC
118 }
119 if (!empty($request[1])) {
b3bd8c3e
A
120 $filtered = (new BookmarkFilter($filtered, $this->conf))
121 ->filterFulltext($request[1], $visibility)
122 ;
f210d94f
LC
123 }
124 return $filtered;
822bffce 125 case self::$FILTER_TEXT:
7f96d9ec 126 return $this->filterFulltext($request, $visibility);
822bffce 127 case self::$FILTER_TAG:
f210d94f
LC
128 if ($untaggedonly) {
129 return $this->filterUntagged($visibility);
130 } else {
131 return $this->filterTags($request, $casesensitive, $visibility);
132 }
822bffce 133 case self::$FILTER_DAY:
27ddfec3 134 return $this->filterDay($request, $visibility);
822bffce 135 default:
7f96d9ec 136 return $this->noFilter($visibility);
822bffce
A
137 }
138 }
139
140 /**
141 * Unknown filter, but handle private only.
142 *
336a28fa 143 * @param string $visibility Optional: return only all/private/public bookmarks
822bffce 144 *
336a28fa 145 * @return Bookmark[] filtered bookmarks.
822bffce 146 */
efb7d21b 147 private function noFilter(string $visibility = 'all')
822bffce 148 {
7f96d9ec 149 if ($visibility === 'all') {
336a28fa 150 return $this->bookmarks;
822bffce
A
151 }
152
53054b2b 153 $out = [];
336a28fa
A
154 foreach ($this->bookmarks as $key => $value) {
155 if ($value->isPrivate() && $visibility === 'private') {
7f96d9ec 156 $out[$key] = $value;
336a28fa 157 } elseif (!$value->isPrivate() && $visibility === 'public') {
01878a75 158 $out[$key] = $value;
822bffce
A
159 }
160 }
161
822bffce
A
162 return $out;
163 }
164
165 /**
166 * Returns the shaare corresponding to a smallHash.
167 *
168 * @param string $smallHash permalink hash.
169 *
efb7d21b 170 * @return Bookmark[] $filtered array containing permalink data.
528a6f8a 171 *
efb7d21b 172 * @throws BookmarkNotFoundException if the smallhash doesn't match any link.
822bffce 173 */
efb7d21b 174 private function filterSmallHash(string $smallHash)
822bffce 175 {
336a28fa
A
176 foreach ($this->bookmarks as $key => $l) {
177 if ($smallHash == $l->getShortUrl()) {
822bffce 178 // Yes, this is ugly and slow
336a28fa 179 return [$key => $l];
822bffce
A
180 }
181 }
528a6f8a 182
336a28fa 183 throw new BookmarkNotFoundException();
822bffce
A
184 }
185
186 /**
336a28fa 187 * Returns the list of bookmarks corresponding to a full-text search
822bffce
A
188 *
189 * Searches:
190 * - in the URLs, title and description;
bedd176a
A
191 * - are case-insensitive;
192 * - terms surrounded by quotes " are exact terms search.
193 * - terms starting with a dash - are excluded (except exact terms).
822bffce
A
194 *
195 * Example:
196 * print_r($mydb->filterFulltext('hollandais'));
197 *
198 * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8')
199 * - allows to perform searches on Unicode text
200 * - see https://github.com/shaarli/Shaarli/issues/75 for examples
201 *
202 * @param string $searchterms search query.
336a28fa 203 * @param string $visibility Optional: return only all/private/public bookmarks.
822bffce 204 *
efb7d21b 205 * @return Bookmark[] search results.
822bffce 206 */
efb7d21b 207 private function filterFulltext(string $searchterms, string $visibility = 'all')
822bffce 208 {
c51fae92 209 if (empty($searchterms)) {
7f96d9ec 210 return $this->noFilter($visibility);
c51fae92
A
211 }
212
4e3875c0 213 $filtered = [];
ebd8075a 214 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
bedd176a
A
215 $exactRegex = '/"([^"]+)"/';
216 // Retrieve exact search terms.
217 preg_match_all($exactRegex, $search, $exactSearch);
218 $exactSearch = array_values(array_filter($exactSearch[1]));
219
220 // Remove exact search terms to get AND terms search.
221 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
222 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
223
224 // Filter excluding terms and update andSearch.
4e3875c0
A
225 $excludeSearch = [];
226 $andSearch = [];
bedd176a
A
227 foreach ($explodedSearchAnd as $needle) {
228 if ($needle[0] == '-' && strlen($needle) > 1) {
229 $excludeSearch[] = substr($needle, 1);
230 } else {
231 $andSearch[] = $needle;
232 }
233 }
234
822bffce 235 // Iterate over every stored link.
336a28fa
A
236 foreach ($this->bookmarks as $id => $link) {
237 // ignore non private bookmarks when 'privatonly' is on.
7f96d9ec 238 if ($visibility !== 'all') {
336a28fa 239 if (!$link->isPrivate() && $visibility === 'private') {
7f96d9ec 240 continue;
336a28fa 241 } elseif ($link->isPrivate() && $visibility === 'public') {
7f96d9ec
A
242 continue;
243 }
822bffce
A
244 }
245
4e3875c0
A
246 $lengths = [];
247 $content = $this->buildFullTextSearchableLink($link, $lengths);
522b278b
A
248
249 // Be optimistic
250 $found = true;
4e3875c0 251 $foundPositions = [];
522b278b
A
252
253 // First, we look for exact term search
4e3875c0 254 // Then iterate over keywords, if keyword is not found,
522b278b 255 // no need to check for the others. We want all or nothing.
4e3875c0
A
256 foreach ([$exactSearch, $andSearch] as $search) {
257 for ($i = 0; $i < count($search) && $found !== false; $i++) {
258 $found = mb_strpos($content, $search[$i]);
259 if ($found === false) {
260 break;
261 }
262
263 $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])];
264 }
522b278b
A
265 }
266
267 // Exclude terms.
4e3875c0 268 for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) {
522b278b
A
269 $found = strpos($content, $excludeSearch[$i]) === false;
270 }
271
4e3875c0
A
272 if ($found !== false) {
273 $link->addAdditionalContentEntry(
274 'search_highlight',
275 $this->postProcessFoundPositions($lengths, $foundPositions)
276 );
277
01878a75 278 $filtered[$id] = $link;
822bffce
A
279 }
280 }
281
822bffce
A
282 return $filtered;
283 }
284
341527ba
WE
285 /**
286 * generate a regex fragment out of a tag
6696729b 287 *
341527ba 288 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
6696729b 289 *
341527ba
WE
290 * @return string generated regex fragment
291 */
b3bd8c3e 292 protected function tag2regex(string $tag): string
341527ba 293 {
b3bd8c3e 294 $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
341527ba 295 $len = strlen($tag);
f211e417 296 if (!$len || $tag === "-" || $tag === "*") {
341527ba
WE
297 // nothing to search, return empty regex
298 return '';
299 }
f211e417 300 if ($tag[0] === "-") {
341527ba
WE
301 // query is negated
302 $i = 1; // use offset to start after '-' character
303 $regex = '(?!'; // create negative lookahead
304 } else {
305 $i = 0; // start at first character
306 $regex = '(?='; // use positive lookahead
307 }
b3bd8c3e
A
308 // before tag may only be the separator or the beginning
309 $regex .= '.*(?:^|' . $tagsSeparator . ')';
341527ba 310 // iterate over string, separating it into placeholder and content
f211e417
V
311 for (; $i < $len; $i++) {
312 if ($tag[$i] === '*') {
341527ba 313 // placeholder found
b3bd8c3e 314 $regex .= '[^' . $tagsSeparator . ']*?';
341527ba
WE
315 } else {
316 // regular characters
317 $offset = strpos($tag, '*', $i);
f211e417 318 if ($offset === false) {
341527ba
WE
319 // no placeholder found, set offset to end of string
320 $offset = $len;
321 }
322 // subtract one, as we want to get before the placeholder or end of string
323 $offset -= 1;
324 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
325 $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
326 // move $i on
327 $i = $offset;
328 }
329 }
b3bd8c3e
A
330 // after the tag may only be the separator or the end
331 $regex .= '(?:$|' . $tagsSeparator . '))';
341527ba
WE
332 return $regex;
333 }
334
822bffce 335 /**
336a28fa 336 * Returns the list of bookmarks associated with a given list of tags
822bffce
A
337 *
338 * You can specify one or more tags, separated by space or a comma, e.g.
339 * print_r($mydb->filterTags('linux programming'));
340 *
efb7d21b
A
341 * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string.
342 * @param bool $casesensitive ignore case if false.
343 * @param string $visibility Optional: return only all/private/public bookmarks.
822bffce 344 *
efb7d21b 345 * @return Bookmark[] filtered bookmarks.
822bffce 346 */
efb7d21b 347 public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all')
822bffce 348 {
b3bd8c3e 349 $tagsSeparator = $this->conf->get('general.tags_separator', ' ');
341527ba
WE
350 // get single tags (we may get passed an array, even though the docs say different)
351 $inputTags = $tags;
f211e417 352 if (!is_array($tags)) {
341527ba 353 // we got an input string, split tags
b3bd8c3e 354 $inputTags = tags_str2array($inputTags, $tagsSeparator);
341527ba
WE
355 }
356
b3bd8c3e 357 if (count($inputTags) === 0) {
341527ba 358 // no input tags
7f96d9ec 359 return $this->noFilter($visibility);
c51fae92
A
360 }
361
336a28fa
A
362 // If we only have public visibility, we can't look for hidden tags
363 if ($visibility === self::$PUBLIC) {
364 $inputTags = array_values(array_filter($inputTags, function ($tag) {
365 return ! startsWith($tag, '.');
366 }));
367
368 if (empty($inputTags)) {
369 return [];
370 }
371 }
372
341527ba 373 // build regex from all tags
b3bd8c3e 374 $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
f211e417 375 if (!$casesensitive) {
341527ba
WE
376 // make regex case insensitive
377 $re .= 'i';
21979ff1 378 }
822bffce 379
341527ba 380 // create resulting array
336a28fa 381 $filtered = [];
341527ba
WE
382
383 // iterate over each link
336a28fa 384 foreach ($this->bookmarks as $key => $link) {
341527ba 385 // check level of visibility
336a28fa 386 // ignore non private bookmarks when 'privateonly' is on.
7f96d9ec 387 if ($visibility !== 'all') {
336a28fa 388 if (!$link->isPrivate() && $visibility === 'private') {
7f96d9ec 389 continue;
336a28fa 390 } elseif ($link->isPrivate() && $visibility === 'public') {
7f96d9ec
A
391 continue;
392 }
822bffce 393 }
b3bd8c3e
A
394 // build search string, start with tags of current link
395 $search = $link->getTagsString($tagsSeparator);
336a28fa 396 if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) {
341527ba 397 // description given and at least one possible tag found
53054b2b 398 $descTags = [];
341527ba
WE
399 // find all tags in the form of #tag in the description
400 preg_match_all(
401 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
336a28fa 402 $link->getDescription(),
341527ba
WE
403 $descTags
404 );
f211e417 405 if (count($descTags[1])) {
341527ba 406 // there were some tags in the description, add them to the search string
b3bd8c3e 407 $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator);
21979ff1 408 }
b3bd8c3e 409 }
341527ba 410 // match regular expression with search string
f211e417 411 if (!preg_match($re, $search)) {
341527ba
WE
412 // this entry does _not_ match our regex
413 continue;
21979ff1 414 }
341527ba 415 $filtered[$key] = $link;
822bffce 416 }
822bffce
A
417 return $filtered;
418 }
419
7d86f40b 420 /**
336a28fa 421 * Return only bookmarks without any tag.
7d86f40b 422 *
336a28fa 423 * @param string $visibility return only all/private/public bookmarks.
7d86f40b 424 *
efb7d21b 425 * @return Bookmark[] filtered bookmarks.
7d86f40b 426 */
efb7d21b 427 public function filterUntagged(string $visibility)
7d86f40b
A
428 {
429 $filtered = [];
336a28fa 430 foreach ($this->bookmarks as $key => $link) {
7d86f40b 431 if ($visibility !== 'all') {
336a28fa 432 if (!$link->isPrivate() && $visibility === 'private') {
7d86f40b 433 continue;
336a28fa 434 } elseif ($link->isPrivate() && $visibility === 'public') {
7d86f40b
A
435 continue;
436 }
437 }
438
b3bd8c3e 439 if (empty($link->getTags())) {
7d86f40b
A
440 $filtered[$key] = $link;
441 }
442 }
443
444 return $filtered;
445 }
446
822bffce
A
447 /**
448 * Returns the list of articles for a given day, chronologically sorted
449 *
450 * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g.
451 * print_r($mydb->filterDay('20120125'));
452 *
453 * @param string $day day to filter.
27ddfec3
A
454 * @param string $visibility return only all/private/public bookmarks.
455
efb7d21b 456 * @return Bookmark[] all link matching given day.
822bffce
A
457 *
458 * @throws Exception if date format is invalid.
459 */
efb7d21b 460 public function filterDay(string $day, string $visibility)
822bffce 461 {
6696729b 462 if (!checkDateFormat('Ymd', $day)) {
822bffce
A
463 throw new Exception('Invalid date format');
464 }
465
69e29ff6 466 $filtered = [];
27ddfec3
A
467 foreach ($this->bookmarks as $key => $bookmark) {
468 if ($visibility === static::$PUBLIC && $bookmark->isPrivate()) {
469 continue;
470 }
471
472 if ($bookmark->getCreated()->format('Ymd') == $day) {
473 $filtered[$key] = $bookmark;
822bffce
A
474 }
475 }
01878a75
A
476
477 // sort by date ASC
478 return array_reverse($filtered, true);
822bffce
A
479 }
480
481 /**
482 * Convert a list of tags (str) to an array. Also
483 * - handle case sensitivity.
484 * - accepts spaces commas as separator.
822bffce
A
485 *
486 * @param string $tags string containing a list of tags.
487 * @param bool $casesensitive will convert everything to lowercase if false.
488 *
efb7d21b 489 * @return string[] filtered tags string.
7f96d9ec 490 */
efb7d21b 491 public static function tagsStrToArray(string $tags, bool $casesensitive): array
822bffce
A
492 {
493 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
494 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8');
495 $tagsOut = str_replace(',', ' ', $tagsOut);
496
b3051a6a 497 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
822bffce 498 }
4e3875c0
A
499
500 /**
501 * This method finalize the content of the foundPositions array,
502 * by associated all search results to their associated bookmark field,
503 * making sure that there is no overlapping results, etc.
504 *
505 * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content.
506 * @param array $foundPositions Positions where the search results were found in the aggregated content.
507 *
508 * @return array Updated $foundPositions, by bookmark field.
509 */
510 protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
511 {
512 // Sort results by starting position ASC.
513 usort($foundPositions, function (array $entryA, array $entryB): int {
514 return $entryA['start'] > $entryB['start'] ? 1 : -1;
515 });
516
517 $out = [];
518 $currentMax = -1;
519 foreach ($foundPositions as $foundPosition) {
520 // we do not allow overlapping highlights
521 if ($foundPosition['start'] < $currentMax) {
522 continue;
523 }
524
525 $currentMax = $foundPosition['end'];
526 foreach ($fieldLengths as $part => $length) {
527 if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) {
528 continue;
529 }
530
531 $out[$part][] = [
532 'start' => $foundPosition['start'] - $length['start'],
533 'end' => $foundPosition['end'] - $length['start'],
534 ];
535 break;
536 }
537 }
538
539 return $out;
540 }
541
542 /**
543 * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms.
544 * Also populate $length array with starting and ending positions of every bookmark field
545 * inside concatenated content.
546 *
547 * @param Bookmark $link
548 * @param array $lengths (by reference)
549 *
550 * @return string Lowercase concatenated fields content.
551 */
552 protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string
553 {
b3bd8c3e 554 $tagString = $link->getTagsString($this->conf->get('general.tags_separator', ' '));
53054b2b
A
555 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') . '\\';
556 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') . '\\';
557 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') . '\\';
558 $content .= mb_convert_case($tagString, MB_CASE_LOWER, 'UTF-8') . '\\';
4e3875c0
A
559
560 $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
561 $nextField = $lengths['title']['end'] + 1;
562 $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())];
563 $nextField = $lengths['description']['end'] + 1;
564 $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())];
565 $nextField = $lengths['url']['end'] + 1;
b3bd8c3e 566 $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($tagString)];
4e3875c0
A
567
568 return $content;
569 }
822bffce 570}