]>
Commit | Line | Data |
---|---|---|
1 | <?php | |
2 | ||
3 | namespace Shaarli\Bookmark; | |
4 | ||
5 | use Exception; | |
6 | use Shaarli\Bookmark\Exception\BookmarkNotFoundException; | |
7 | ||
8 | /** | |
9 | * Class LinkFilter. | |
10 | * | |
11 | * Perform search and filter operation on link data list. | |
12 | */ | |
13 | class BookmarkFilter | |
14 | { | |
15 | /** | |
16 | * @var string permalinks. | |
17 | */ | |
18 | public static $FILTER_HASH = 'permalink'; | |
19 | ||
20 | /** | |
21 | * @var string text search. | |
22 | */ | |
23 | public static $FILTER_TEXT = 'fulltext'; | |
24 | ||
25 | /** | |
26 | * @var string tag filter. | |
27 | */ | |
28 | public static $FILTER_TAG = 'tags'; | |
29 | ||
30 | /** | |
31 | * @var string filter by day. | |
32 | */ | |
33 | public static $FILTER_DAY = 'FILTER_DAY'; | |
34 | ||
35 | /** | |
36 | * @var string filter by day. | |
37 | */ | |
38 | public static $DEFAULT = 'NO_FILTER'; | |
39 | ||
40 | /** @var string Visibility: all */ | |
41 | public static $ALL = 'all'; | |
42 | ||
43 | /** @var string Visibility: public */ | |
44 | public static $PUBLIC = 'public'; | |
45 | ||
46 | /** @var string Visibility: private */ | |
47 | public static $PRIVATE = 'private'; | |
48 | ||
49 | /** | |
50 | * @var string Allowed characters for hashtags (regex syntax). | |
51 | */ | |
52 | public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; | |
53 | ||
54 | /** | |
55 | * @var Bookmark[] all available bookmarks. | |
56 | */ | |
57 | private $bookmarks; | |
58 | ||
59 | /** | |
60 | * @param Bookmark[] $bookmarks initialization. | |
61 | */ | |
62 | public function __construct($bookmarks) | |
63 | { | |
64 | $this->bookmarks = $bookmarks; | |
65 | } | |
66 | ||
67 | /** | |
68 | * Filter bookmarks according to parameters. | |
69 | * | |
70 | * @param string $type Type of filter (eg. tags, permalink, etc.). | |
71 | * @param mixed $request Filter content. | |
72 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. | |
73 | * @param string $visibility Optional: return only all/private/public bookmarks | |
74 | * @param bool $untaggedonly Optional: return only untagged bookmarks. Applies only if $type includes FILTER_TAG | |
75 | * | |
76 | * @return Bookmark[] filtered bookmark list. | |
77 | * | |
78 | * @throws BookmarkNotFoundException | |
79 | */ | |
80 | public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) | |
81 | { | |
82 | if (!in_array($visibility, ['all', 'public', 'private'])) { | |
83 | $visibility = 'all'; | |
84 | } | |
85 | ||
86 | switch ($type) { | |
87 | case self::$FILTER_HASH: | |
88 | return $this->filterSmallHash($request); | |
89 | case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" | |
90 | $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); | |
91 | if ($noRequest) { | |
92 | if ($untaggedonly) { | |
93 | return $this->filterUntagged($visibility); | |
94 | } | |
95 | return $this->noFilter($visibility); | |
96 | } | |
97 | if ($untaggedonly) { | |
98 | $filtered = $this->filterUntagged($visibility); | |
99 | } else { | |
100 | $filtered = $this->bookmarks; | |
101 | } | |
102 | if (!empty($request[0])) { | |
103 | $filtered = (new BookmarkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); | |
104 | } | |
105 | if (!empty($request[1])) { | |
106 | $filtered = (new BookmarkFilter($filtered))->filterFulltext($request[1], $visibility); | |
107 | } | |
108 | return $filtered; | |
109 | case self::$FILTER_TEXT: | |
110 | return $this->filterFulltext($request, $visibility); | |
111 | case self::$FILTER_TAG: | |
112 | if ($untaggedonly) { | |
113 | return $this->filterUntagged($visibility); | |
114 | } else { | |
115 | return $this->filterTags($request, $casesensitive, $visibility); | |
116 | } | |
117 | case self::$FILTER_DAY: | |
118 | return $this->filterDay($request, $visibility); | |
119 | default: | |
120 | return $this->noFilter($visibility); | |
121 | } | |
122 | } | |
123 | ||
124 | /** | |
125 | * Unknown filter, but handle private only. | |
126 | * | |
127 | * @param string $visibility Optional: return only all/private/public bookmarks | |
128 | * | |
129 | * @return Bookmark[] filtered bookmarks. | |
130 | */ | |
131 | private function noFilter($visibility = 'all') | |
132 | { | |
133 | if ($visibility === 'all') { | |
134 | return $this->bookmarks; | |
135 | } | |
136 | ||
137 | $out = array(); | |
138 | foreach ($this->bookmarks as $key => $value) { | |
139 | if ($value->isPrivate() && $visibility === 'private') { | |
140 | $out[$key] = $value; | |
141 | } elseif (!$value->isPrivate() && $visibility === 'public') { | |
142 | $out[$key] = $value; | |
143 | } | |
144 | } | |
145 | ||
146 | return $out; | |
147 | } | |
148 | ||
149 | /** | |
150 | * Returns the shaare corresponding to a smallHash. | |
151 | * | |
152 | * @param string $smallHash permalink hash. | |
153 | * | |
154 | * @return array $filtered array containing permalink data. | |
155 | * | |
156 | * @throws \Shaarli\Bookmark\Exception\BookmarkNotFoundException if the smallhash doesn't match any link. | |
157 | */ | |
158 | private function filterSmallHash($smallHash) | |
159 | { | |
160 | foreach ($this->bookmarks as $key => $l) { | |
161 | if ($smallHash == $l->getShortUrl()) { | |
162 | // Yes, this is ugly and slow | |
163 | return [$key => $l]; | |
164 | } | |
165 | } | |
166 | ||
167 | throw new BookmarkNotFoundException(); | |
168 | } | |
169 | ||
170 | /** | |
171 | * Returns the list of bookmarks corresponding to a full-text search | |
172 | * | |
173 | * Searches: | |
174 | * - in the URLs, title and description; | |
175 | * - are case-insensitive; | |
176 | * - terms surrounded by quotes " are exact terms search. | |
177 | * - terms starting with a dash - are excluded (except exact terms). | |
178 | * | |
179 | * Example: | |
180 | * print_r($mydb->filterFulltext('hollandais')); | |
181 | * | |
182 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | |
183 | * - allows to perform searches on Unicode text | |
184 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | |
185 | * | |
186 | * @param string $searchterms search query. | |
187 | * @param string $visibility Optional: return only all/private/public bookmarks. | |
188 | * | |
189 | * @return array search results. | |
190 | */ | |
191 | private function filterFulltext($searchterms, $visibility = 'all') | |
192 | { | |
193 | if (empty($searchterms)) { | |
194 | return $this->noFilter($visibility); | |
195 | } | |
196 | ||
197 | $filtered = array(); | |
198 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | |
199 | $exactRegex = '/"([^"]+)"/'; | |
200 | // Retrieve exact search terms. | |
201 | preg_match_all($exactRegex, $search, $exactSearch); | |
202 | $exactSearch = array_values(array_filter($exactSearch[1])); | |
203 | ||
204 | // Remove exact search terms to get AND terms search. | |
205 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); | |
206 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | |
207 | ||
208 | // Filter excluding terms and update andSearch. | |
209 | $excludeSearch = array(); | |
210 | $andSearch = array(); | |
211 | foreach ($explodedSearchAnd as $needle) { | |
212 | if ($needle[0] == '-' && strlen($needle) > 1) { | |
213 | $excludeSearch[] = substr($needle, 1); | |
214 | } else { | |
215 | $andSearch[] = $needle; | |
216 | } | |
217 | } | |
218 | ||
219 | // Iterate over every stored link. | |
220 | foreach ($this->bookmarks as $id => $link) { | |
221 | // ignore non private bookmarks when 'privatonly' is on. | |
222 | if ($visibility !== 'all') { | |
223 | if (!$link->isPrivate() && $visibility === 'private') { | |
224 | continue; | |
225 | } elseif ($link->isPrivate() && $visibility === 'public') { | |
226 | continue; | |
227 | } | |
228 | } | |
229 | ||
230 | // Concatenate link fields to search across fields. | |
231 | // Adds a '\' separator for exact search terms. | |
232 | $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
233 | $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
234 | $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
235 | $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; | |
236 | ||
237 | // Be optimistic | |
238 | $found = true; | |
239 | ||
240 | // First, we look for exact term search | |
241 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | |
242 | $found = strpos($content, $exactSearch[$i]) !== false; | |
243 | } | |
244 | ||
245 | // Iterate over keywords, if keyword is not found, | |
246 | // no need to check for the others. We want all or nothing. | |
247 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | |
248 | $found = strpos($content, $andSearch[$i]) !== false; | |
249 | } | |
250 | ||
251 | // Exclude terms. | |
252 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | |
253 | $found = strpos($content, $excludeSearch[$i]) === false; | |
254 | } | |
255 | ||
256 | if ($found) { | |
257 | $filtered[$id] = $link; | |
258 | } | |
259 | } | |
260 | ||
261 | return $filtered; | |
262 | } | |
263 | ||
264 | /** | |
265 | * generate a regex fragment out of a tag | |
266 | * | |
267 | * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard | |
268 | * | |
269 | * @return string generated regex fragment | |
270 | */ | |
271 | private static function tag2regex($tag) | |
272 | { | |
273 | $len = strlen($tag); | |
274 | if (!$len || $tag === "-" || $tag === "*") { | |
275 | // nothing to search, return empty regex | |
276 | return ''; | |
277 | } | |
278 | if ($tag[0] === "-") { | |
279 | // query is negated | |
280 | $i = 1; // use offset to start after '-' character | |
281 | $regex = '(?!'; // create negative lookahead | |
282 | } else { | |
283 | $i = 0; // start at first character | |
284 | $regex = '(?='; // use positive lookahead | |
285 | } | |
286 | $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning | |
287 | // iterate over string, separating it into placeholder and content | |
288 | for (; $i < $len; $i++) { | |
289 | if ($tag[$i] === '*') { | |
290 | // placeholder found | |
291 | $regex .= '[^ ]*?'; | |
292 | } else { | |
293 | // regular characters | |
294 | $offset = strpos($tag, '*', $i); | |
295 | if ($offset === false) { | |
296 | // no placeholder found, set offset to end of string | |
297 | $offset = $len; | |
298 | } | |
299 | // subtract one, as we want to get before the placeholder or end of string | |
300 | $offset -= 1; | |
301 | // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. | |
302 | $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); | |
303 | // move $i on | |
304 | $i = $offset; | |
305 | } | |
306 | } | |
307 | $regex .= '(?:$| ))'; // after the tag may only be a space or the end | |
308 | return $regex; | |
309 | } | |
310 | ||
311 | /** | |
312 | * Returns the list of bookmarks associated with a given list of tags | |
313 | * | |
314 | * You can specify one or more tags, separated by space or a comma, e.g. | |
315 | * print_r($mydb->filterTags('linux programming')); | |
316 | * | |
317 | * @param string $tags list of tags separated by commas or blank spaces. | |
318 | * @param bool $casesensitive ignore case if false. | |
319 | * @param string $visibility Optional: return only all/private/public bookmarks. | |
320 | * | |
321 | * @return array filtered bookmarks. | |
322 | */ | |
323 | public function filterTags($tags, $casesensitive = false, $visibility = 'all') | |
324 | { | |
325 | // get single tags (we may get passed an array, even though the docs say different) | |
326 | $inputTags = $tags; | |
327 | if (!is_array($tags)) { | |
328 | // we got an input string, split tags | |
329 | $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); | |
330 | } | |
331 | ||
332 | if (!count($inputTags)) { | |
333 | // no input tags | |
334 | return $this->noFilter($visibility); | |
335 | } | |
336 | ||
337 | // If we only have public visibility, we can't look for hidden tags | |
338 | if ($visibility === self::$PUBLIC) { | |
339 | $inputTags = array_values(array_filter($inputTags, function ($tag) { | |
340 | return ! startsWith($tag, '.'); | |
341 | })); | |
342 | ||
343 | if (empty($inputTags)) { | |
344 | return []; | |
345 | } | |
346 | } | |
347 | ||
348 | // build regex from all tags | |
349 | $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; | |
350 | if (!$casesensitive) { | |
351 | // make regex case insensitive | |
352 | $re .= 'i'; | |
353 | } | |
354 | ||
355 | // create resulting array | |
356 | $filtered = []; | |
357 | ||
358 | // iterate over each link | |
359 | foreach ($this->bookmarks as $key => $link) { | |
360 | // check level of visibility | |
361 | // ignore non private bookmarks when 'privateonly' is on. | |
362 | if ($visibility !== 'all') { | |
363 | if (!$link->isPrivate() && $visibility === 'private') { | |
364 | continue; | |
365 | } elseif ($link->isPrivate() && $visibility === 'public') { | |
366 | continue; | |
367 | } | |
368 | } | |
369 | $search = $link->getTagsString(); // build search string, start with tags of current link | |
370 | if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) { | |
371 | // description given and at least one possible tag found | |
372 | $descTags = array(); | |
373 | // find all tags in the form of #tag in the description | |
374 | preg_match_all( | |
375 | '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', | |
376 | $link->getDescription(), | |
377 | $descTags | |
378 | ); | |
379 | if (count($descTags[1])) { | |
380 | // there were some tags in the description, add them to the search string | |
381 | $search .= ' ' . implode(' ', $descTags[1]); | |
382 | } | |
383 | }; | |
384 | // match regular expression with search string | |
385 | if (!preg_match($re, $search)) { | |
386 | // this entry does _not_ match our regex | |
387 | continue; | |
388 | } | |
389 | $filtered[$key] = $link; | |
390 | } | |
391 | return $filtered; | |
392 | } | |
393 | ||
394 | /** | |
395 | * Return only bookmarks without any tag. | |
396 | * | |
397 | * @param string $visibility return only all/private/public bookmarks. | |
398 | * | |
399 | * @return array filtered bookmarks. | |
400 | */ | |
401 | public function filterUntagged($visibility) | |
402 | { | |
403 | $filtered = []; | |
404 | foreach ($this->bookmarks as $key => $link) { | |
405 | if ($visibility !== 'all') { | |
406 | if (!$link->isPrivate() && $visibility === 'private') { | |
407 | continue; | |
408 | } elseif ($link->isPrivate() && $visibility === 'public') { | |
409 | continue; | |
410 | } | |
411 | } | |
412 | ||
413 | if (empty(trim($link->getTagsString()))) { | |
414 | $filtered[$key] = $link; | |
415 | } | |
416 | } | |
417 | ||
418 | return $filtered; | |
419 | } | |
420 | ||
421 | /** | |
422 | * Returns the list of articles for a given day, chronologically sorted | |
423 | * | |
424 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | |
425 | * print_r($mydb->filterDay('20120125')); | |
426 | * | |
427 | * @param string $day day to filter. | |
428 | * @param string $visibility return only all/private/public bookmarks. | |
429 | ||
430 | * @return array all link matching given day. | |
431 | * | |
432 | * @throws Exception if date format is invalid. | |
433 | */ | |
434 | public function filterDay($day, $visibility) | |
435 | { | |
436 | if (!checkDateFormat('Ymd', $day)) { | |
437 | throw new Exception('Invalid date format'); | |
438 | } | |
439 | ||
440 | $filtered = []; | |
441 | foreach ($this->bookmarks as $key => $bookmark) { | |
442 | if ($visibility === static::$PUBLIC && $bookmark->isPrivate()) { | |
443 | continue; | |
444 | } | |
445 | ||
446 | if ($bookmark->getCreated()->format('Ymd') == $day) { | |
447 | $filtered[$key] = $bookmark; | |
448 | } | |
449 | } | |
450 | ||
451 | // sort by date ASC | |
452 | return array_reverse($filtered, true); | |
453 | } | |
454 | ||
455 | /** | |
456 | * Convert a list of tags (str) to an array. Also | |
457 | * - handle case sensitivity. | |
458 | * - accepts spaces commas as separator. | |
459 | * | |
460 | * @param string $tags string containing a list of tags. | |
461 | * @param bool $casesensitive will convert everything to lowercase if false. | |
462 | * | |
463 | * @return array filtered tags string. | |
464 | */ | |
465 | public static function tagsStrToArray($tags, $casesensitive) | |
466 | { | |
467 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | |
468 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | |
469 | $tagsOut = str_replace(',', ' ', $tagsOut); | |
470 | ||
471 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); | |
472 | } | |
473 | } |