]>
Commit | Line | Data |
---|---|---|
822bffce A |
1 | <?php |
2 | ||
efb7d21b A |
3 | declare(strict_types=1); |
4 | ||
6696729b V |
5 | namespace Shaarli\Bookmark; |
6 | ||
7 | use Exception; | |
336a28fa | 8 | use Shaarli\Bookmark\Exception\BookmarkNotFoundException; |
f24896b2 | 9 | |
822bffce A |
10 | /** |
11 | * Class LinkFilter. | |
12 | * | |
13 | * Perform search and filter operation on link data list. | |
14 | */ | |
336a28fa | 15 | class BookmarkFilter |
822bffce A |
16 | { |
17 | /** | |
18 | * @var string permalinks. | |
19 | */ | |
6696729b | 20 | public static $FILTER_HASH = 'permalink'; |
822bffce A |
21 | |
22 | /** | |
23 | * @var string text search. | |
24 | */ | |
6696729b | 25 | public static $FILTER_TEXT = 'fulltext'; |
822bffce A |
26 | |
27 | /** | |
28 | * @var string tag filter. | |
29 | */ | |
6696729b | 30 | public static $FILTER_TAG = 'tags'; |
822bffce A |
31 | |
32 | /** | |
33 | * @var string filter by day. | |
34 | */ | |
6696729b | 35 | public static $FILTER_DAY = 'FILTER_DAY'; |
822bffce | 36 | |
336a28fa A |
37 | /** |
38 | * @var string filter by day. | |
39 | */ | |
40 | public static $DEFAULT = 'NO_FILTER'; | |
41 | ||
42 | /** @var string Visibility: all */ | |
43 | public static $ALL = 'all'; | |
44 | ||
45 | /** @var string Visibility: public */ | |
46 | public static $PUBLIC = 'public'; | |
47 | ||
48 | /** @var string Visibility: private */ | |
49 | public static $PRIVATE = 'private'; | |
50 | ||
9ccca401 A |
51 | /** |
52 | * @var string Allowed characters for hashtags (regex syntax). | |
53 | */ | |
54 | public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; | |
55 | ||
822bffce | 56 | /** |
336a28fa | 57 | * @var Bookmark[] all available bookmarks. |
822bffce | 58 | */ |
336a28fa | 59 | private $bookmarks; |
822bffce A |
60 | |
61 | /** | |
336a28fa | 62 | * @param Bookmark[] $bookmarks initialization. |
822bffce | 63 | */ |
336a28fa | 64 | public function __construct($bookmarks) |
822bffce | 65 | { |
336a28fa | 66 | $this->bookmarks = $bookmarks; |
822bffce A |
67 | } |
68 | ||
69 | /** | |
336a28fa | 70 | * Filter bookmarks according to parameters. |
822bffce A |
71 | * |
72 | * @param string $type Type of filter (eg. tags, permalink, etc.). | |
528a6f8a | 73 | * @param mixed $request Filter content. |
822bffce | 74 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. |
336a28fa A |
75 | * @param string $visibility Optional: return only all/private/public bookmarks |
76 | * @param bool $untaggedonly Optional: return only untagged bookmarks. Applies only if $type includes FILTER_TAG | |
77 | * | |
78 | * @return Bookmark[] filtered bookmark list. | |
822bffce | 79 | * |
336a28fa | 80 | * @throws BookmarkNotFoundException |
822bffce | 81 | */ |
efb7d21b A |
82 | public function filter( |
83 | string $type, | |
84 | $request, | |
85 | bool $casesensitive = false, | |
86 | string $visibility = 'all', | |
87 | bool $untaggedonly = false | |
88 | ) { | |
6696729b | 89 | if (!in_array($visibility, ['all', 'public', 'private'])) { |
7f96d9ec A |
90 | $visibility = 'all'; |
91 | } | |
92 | ||
f211e417 | 93 | switch ($type) { |
822bffce A |
94 | case self::$FILTER_HASH: |
95 | return $this->filterSmallHash($request); | |
f210d94f LC |
96 | case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" |
97 | $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); | |
98 | if ($noRequest) { | |
99 | if ($untaggedonly) { | |
100 | return $this->filterUntagged($visibility); | |
c51fae92 | 101 | } |
f210d94f | 102 | return $this->noFilter($visibility); |
c51fae92 | 103 | } |
f210d94f LC |
104 | if ($untaggedonly) { |
105 | $filtered = $this->filterUntagged($visibility); | |
106 | } else { | |
336a28fa | 107 | $filtered = $this->bookmarks; |
f210d94f LC |
108 | } |
109 | if (!empty($request[0])) { | |
336a28fa | 110 | $filtered = (new BookmarkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); |
f210d94f LC |
111 | } |
112 | if (!empty($request[1])) { | |
336a28fa | 113 | $filtered = (new BookmarkFilter($filtered))->filterFulltext($request[1], $visibility); |
f210d94f LC |
114 | } |
115 | return $filtered; | |
822bffce | 116 | case self::$FILTER_TEXT: |
7f96d9ec | 117 | return $this->filterFulltext($request, $visibility); |
822bffce | 118 | case self::$FILTER_TAG: |
f210d94f LC |
119 | if ($untaggedonly) { |
120 | return $this->filterUntagged($visibility); | |
121 | } else { | |
122 | return $this->filterTags($request, $casesensitive, $visibility); | |
123 | } | |
822bffce | 124 | case self::$FILTER_DAY: |
27ddfec3 | 125 | return $this->filterDay($request, $visibility); |
822bffce | 126 | default: |
7f96d9ec | 127 | return $this->noFilter($visibility); |
822bffce A |
128 | } |
129 | } | |
130 | ||
131 | /** | |
132 | * Unknown filter, but handle private only. | |
133 | * | |
336a28fa | 134 | * @param string $visibility Optional: return only all/private/public bookmarks |
822bffce | 135 | * |
336a28fa | 136 | * @return Bookmark[] filtered bookmarks. |
822bffce | 137 | */ |
efb7d21b | 138 | private function noFilter(string $visibility = 'all') |
822bffce | 139 | { |
7f96d9ec | 140 | if ($visibility === 'all') { |
336a28fa | 141 | return $this->bookmarks; |
822bffce A |
142 | } |
143 | ||
144 | $out = array(); | |
336a28fa A |
145 | foreach ($this->bookmarks as $key => $value) { |
146 | if ($value->isPrivate() && $visibility === 'private') { | |
7f96d9ec | 147 | $out[$key] = $value; |
336a28fa | 148 | } elseif (!$value->isPrivate() && $visibility === 'public') { |
01878a75 | 149 | $out[$key] = $value; |
822bffce A |
150 | } |
151 | } | |
152 | ||
822bffce A |
153 | return $out; |
154 | } | |
155 | ||
156 | /** | |
157 | * Returns the shaare corresponding to a smallHash. | |
158 | * | |
159 | * @param string $smallHash permalink hash. | |
160 | * | |
efb7d21b | 161 | * @return Bookmark[] $filtered array containing permalink data. |
528a6f8a | 162 | * |
efb7d21b | 163 | * @throws BookmarkNotFoundException if the smallhash doesn't match any link. |
822bffce | 164 | */ |
efb7d21b | 165 | private function filterSmallHash(string $smallHash) |
822bffce | 166 | { |
336a28fa A |
167 | foreach ($this->bookmarks as $key => $l) { |
168 | if ($smallHash == $l->getShortUrl()) { | |
822bffce | 169 | // Yes, this is ugly and slow |
336a28fa | 170 | return [$key => $l]; |
822bffce A |
171 | } |
172 | } | |
528a6f8a | 173 | |
336a28fa | 174 | throw new BookmarkNotFoundException(); |
822bffce A |
175 | } |
176 | ||
177 | /** | |
336a28fa | 178 | * Returns the list of bookmarks corresponding to a full-text search |
822bffce A |
179 | * |
180 | * Searches: | |
181 | * - in the URLs, title and description; | |
bedd176a A |
182 | * - are case-insensitive; |
183 | * - terms surrounded by quotes " are exact terms search. | |
184 | * - terms starting with a dash - are excluded (except exact terms). | |
822bffce A |
185 | * |
186 | * Example: | |
187 | * print_r($mydb->filterFulltext('hollandais')); | |
188 | * | |
189 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | |
190 | * - allows to perform searches on Unicode text | |
191 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | |
192 | * | |
193 | * @param string $searchterms search query. | |
336a28fa | 194 | * @param string $visibility Optional: return only all/private/public bookmarks. |
822bffce | 195 | * |
efb7d21b | 196 | * @return Bookmark[] search results. |
822bffce | 197 | */ |
efb7d21b | 198 | private function filterFulltext(string $searchterms, string $visibility = 'all') |
822bffce | 199 | { |
c51fae92 | 200 | if (empty($searchterms)) { |
7f96d9ec | 201 | return $this->noFilter($visibility); |
c51fae92 A |
202 | } |
203 | ||
522b278b | 204 | $filtered = array(); |
ebd8075a | 205 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
bedd176a A |
206 | $exactRegex = '/"([^"]+)"/'; |
207 | // Retrieve exact search terms. | |
208 | preg_match_all($exactRegex, $search, $exactSearch); | |
209 | $exactSearch = array_values(array_filter($exactSearch[1])); | |
210 | ||
211 | // Remove exact search terms to get AND terms search. | |
212 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); | |
213 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | |
214 | ||
215 | // Filter excluding terms and update andSearch. | |
216 | $excludeSearch = array(); | |
217 | $andSearch = array(); | |
218 | foreach ($explodedSearchAnd as $needle) { | |
219 | if ($needle[0] == '-' && strlen($needle) > 1) { | |
220 | $excludeSearch[] = substr($needle, 1); | |
221 | } else { | |
222 | $andSearch[] = $needle; | |
223 | } | |
224 | } | |
225 | ||
822bffce | 226 | // Iterate over every stored link. |
336a28fa A |
227 | foreach ($this->bookmarks as $id => $link) { |
228 | // ignore non private bookmarks when 'privatonly' is on. | |
7f96d9ec | 229 | if ($visibility !== 'all') { |
336a28fa | 230 | if (!$link->isPrivate() && $visibility === 'private') { |
7f96d9ec | 231 | continue; |
336a28fa | 232 | } elseif ($link->isPrivate() && $visibility === 'public') { |
7f96d9ec A |
233 | continue; |
234 | } | |
822bffce A |
235 | } |
236 | ||
522b278b A |
237 | // Concatenate link fields to search across fields. |
238 | // Adds a '\' separator for exact search terms. | |
336a28fa A |
239 | $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; |
240 | $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
241 | $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
242 | $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
522b278b A |
243 | |
244 | // Be optimistic | |
245 | $found = true; | |
246 | ||
247 | // First, we look for exact term search | |
248 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | |
249 | $found = strpos($content, $exactSearch[$i]) !== false; | |
250 | } | |
251 | ||
252 | // Iterate over keywords, if keyword is not found, | |
253 | // no need to check for the others. We want all or nothing. | |
254 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | |
255 | $found = strpos($content, $andSearch[$i]) !== false; | |
256 | } | |
257 | ||
258 | // Exclude terms. | |
259 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | |
260 | $found = strpos($content, $excludeSearch[$i]) === false; | |
261 | } | |
262 | ||
822bffce | 263 | if ($found) { |
01878a75 | 264 | $filtered[$id] = $link; |
822bffce A |
265 | } |
266 | } | |
267 | ||
822bffce A |
268 | return $filtered; |
269 | } | |
270 | ||
341527ba WE |
271 | /** |
272 | * generate a regex fragment out of a tag | |
6696729b | 273 | * |
341527ba | 274 | * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard |
6696729b | 275 | * |
341527ba WE |
276 | * @return string generated regex fragment |
277 | */ | |
efb7d21b | 278 | private static function tag2regex(string $tag): string |
341527ba WE |
279 | { |
280 | $len = strlen($tag); | |
f211e417 | 281 | if (!$len || $tag === "-" || $tag === "*") { |
341527ba WE |
282 | // nothing to search, return empty regex |
283 | return ''; | |
284 | } | |
f211e417 | 285 | if ($tag[0] === "-") { |
341527ba WE |
286 | // query is negated |
287 | $i = 1; // use offset to start after '-' character | |
288 | $regex = '(?!'; // create negative lookahead | |
289 | } else { | |
290 | $i = 0; // start at first character | |
291 | $regex = '(?='; // use positive lookahead | |
292 | } | |
293 | $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning | |
294 | // iterate over string, separating it into placeholder and content | |
f211e417 V |
295 | for (; $i < $len; $i++) { |
296 | if ($tag[$i] === '*') { | |
341527ba WE |
297 | // placeholder found |
298 | $regex .= '[^ ]*?'; | |
299 | } else { | |
300 | // regular characters | |
301 | $offset = strpos($tag, '*', $i); | |
f211e417 | 302 | if ($offset === false) { |
341527ba WE |
303 | // no placeholder found, set offset to end of string |
304 | $offset = $len; | |
305 | } | |
306 | // subtract one, as we want to get before the placeholder or end of string | |
307 | $offset -= 1; | |
308 | // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. | |
309 | $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); | |
310 | // move $i on | |
311 | $i = $offset; | |
312 | } | |
313 | } | |
314 | $regex .= '(?:$| ))'; // after the tag may only be a space or the end | |
315 | return $regex; | |
316 | } | |
317 | ||
822bffce | 318 | /** |
336a28fa | 319 | * Returns the list of bookmarks associated with a given list of tags |
822bffce A |
320 | * |
321 | * You can specify one or more tags, separated by space or a comma, e.g. | |
322 | * print_r($mydb->filterTags('linux programming')); | |
323 | * | |
efb7d21b A |
324 | * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string. |
325 | * @param bool $casesensitive ignore case if false. | |
326 | * @param string $visibility Optional: return only all/private/public bookmarks. | |
822bffce | 327 | * |
efb7d21b | 328 | * @return Bookmark[] filtered bookmarks. |
822bffce | 329 | */ |
efb7d21b | 330 | public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all') |
822bffce | 331 | { |
341527ba WE |
332 | // get single tags (we may get passed an array, even though the docs say different) |
333 | $inputTags = $tags; | |
f211e417 | 334 | if (!is_array($tags)) { |
341527ba WE |
335 | // we got an input string, split tags |
336 | $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); | |
337 | } | |
338 | ||
f211e417 | 339 | if (!count($inputTags)) { |
341527ba | 340 | // no input tags |
7f96d9ec | 341 | return $this->noFilter($visibility); |
c51fae92 A |
342 | } |
343 | ||
336a28fa A |
344 | // If we only have public visibility, we can't look for hidden tags |
345 | if ($visibility === self::$PUBLIC) { | |
346 | $inputTags = array_values(array_filter($inputTags, function ($tag) { | |
347 | return ! startsWith($tag, '.'); | |
348 | })); | |
349 | ||
350 | if (empty($inputTags)) { | |
351 | return []; | |
352 | } | |
353 | } | |
354 | ||
341527ba WE |
355 | // build regex from all tags |
356 | $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; | |
f211e417 | 357 | if (!$casesensitive) { |
341527ba WE |
358 | // make regex case insensitive |
359 | $re .= 'i'; | |
21979ff1 | 360 | } |
822bffce | 361 | |
341527ba | 362 | // create resulting array |
336a28fa | 363 | $filtered = []; |
341527ba WE |
364 | |
365 | // iterate over each link | |
336a28fa | 366 | foreach ($this->bookmarks as $key => $link) { |
341527ba | 367 | // check level of visibility |
336a28fa | 368 | // ignore non private bookmarks when 'privateonly' is on. |
7f96d9ec | 369 | if ($visibility !== 'all') { |
336a28fa | 370 | if (!$link->isPrivate() && $visibility === 'private') { |
7f96d9ec | 371 | continue; |
336a28fa | 372 | } elseif ($link->isPrivate() && $visibility === 'public') { |
7f96d9ec A |
373 | continue; |
374 | } | |
822bffce | 375 | } |
336a28fa A |
376 | $search = $link->getTagsString(); // build search string, start with tags of current link |
377 | if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) { | |
341527ba WE |
378 | // description given and at least one possible tag found |
379 | $descTags = array(); | |
380 | // find all tags in the form of #tag in the description | |
381 | preg_match_all( | |
382 | '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', | |
336a28fa | 383 | $link->getDescription(), |
341527ba WE |
384 | $descTags |
385 | ); | |
f211e417 | 386 | if (count($descTags[1])) { |
341527ba WE |
387 | // there were some tags in the description, add them to the search string |
388 | $search .= ' ' . implode(' ', $descTags[1]); | |
21979ff1 | 389 | } |
341527ba WE |
390 | }; |
391 | // match regular expression with search string | |
f211e417 | 392 | if (!preg_match($re, $search)) { |
341527ba WE |
393 | // this entry does _not_ match our regex |
394 | continue; | |
21979ff1 | 395 | } |
341527ba | 396 | $filtered[$key] = $link; |
822bffce | 397 | } |
822bffce A |
398 | return $filtered; |
399 | } | |
400 | ||
7d86f40b | 401 | /** |
336a28fa | 402 | * Return only bookmarks without any tag. |
7d86f40b | 403 | * |
336a28fa | 404 | * @param string $visibility return only all/private/public bookmarks. |
7d86f40b | 405 | * |
efb7d21b | 406 | * @return Bookmark[] filtered bookmarks. |
7d86f40b | 407 | */ |
efb7d21b | 408 | public function filterUntagged(string $visibility) |
7d86f40b A |
409 | { |
410 | $filtered = []; | |
336a28fa | 411 | foreach ($this->bookmarks as $key => $link) { |
7d86f40b | 412 | if ($visibility !== 'all') { |
336a28fa | 413 | if (!$link->isPrivate() && $visibility === 'private') { |
7d86f40b | 414 | continue; |
336a28fa | 415 | } elseif ($link->isPrivate() && $visibility === 'public') { |
7d86f40b A |
416 | continue; |
417 | } | |
418 | } | |
419 | ||
336a28fa | 420 | if (empty(trim($link->getTagsString()))) { |
7d86f40b A |
421 | $filtered[$key] = $link; |
422 | } | |
423 | } | |
424 | ||
425 | return $filtered; | |
426 | } | |
427 | ||
822bffce A |
428 | /** |
429 | * Returns the list of articles for a given day, chronologically sorted | |
430 | * | |
431 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | |
432 | * print_r($mydb->filterDay('20120125')); | |
433 | * | |
434 | * @param string $day day to filter. | |
27ddfec3 A |
435 | * @param string $visibility return only all/private/public bookmarks. |
436 | ||
efb7d21b | 437 | * @return Bookmark[] all link matching given day. |
822bffce A |
438 | * |
439 | * @throws Exception if date format is invalid. | |
440 | */ | |
efb7d21b | 441 | public function filterDay(string $day, string $visibility) |
822bffce | 442 | { |
6696729b | 443 | if (!checkDateFormat('Ymd', $day)) { |
822bffce A |
444 | throw new Exception('Invalid date format'); |
445 | } | |
446 | ||
69e29ff6 | 447 | $filtered = []; |
27ddfec3 A |
448 | foreach ($this->bookmarks as $key => $bookmark) { |
449 | if ($visibility === static::$PUBLIC && $bookmark->isPrivate()) { | |
450 | continue; | |
451 | } | |
452 | ||
453 | if ($bookmark->getCreated()->format('Ymd') == $day) { | |
454 | $filtered[$key] = $bookmark; | |
822bffce A |
455 | } |
456 | } | |
01878a75 A |
457 | |
458 | // sort by date ASC | |
459 | return array_reverse($filtered, true); | |
822bffce A |
460 | } |
461 | ||
462 | /** | |
463 | * Convert a list of tags (str) to an array. Also | |
464 | * - handle case sensitivity. | |
465 | * - accepts spaces commas as separator. | |
822bffce A |
466 | * |
467 | * @param string $tags string containing a list of tags. | |
468 | * @param bool $casesensitive will convert everything to lowercase if false. | |
469 | * | |
efb7d21b | 470 | * @return string[] filtered tags string. |
7f96d9ec | 471 | */ |
efb7d21b | 472 | public static function tagsStrToArray(string $tags, bool $casesensitive): array |
822bffce A |
473 | { |
474 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | |
475 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | |
476 | $tagsOut = str_replace(',', ' ', $tagsOut); | |
477 | ||
b3051a6a | 478 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); |
822bffce A |
479 | } |
480 | } |