diff options
Diffstat (limited to 'application/bookmark')
-rw-r--r-- | application/bookmark/LinkDB.php | 26 | ||||
-rw-r--r-- | application/bookmark/LinkFilter.php | 449 | ||||
-rw-r--r-- | application/bookmark/exception/LinkNotFoundException.php | 15 |
3 files changed, 477 insertions, 13 deletions
diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php index 3b77422a..6041c088 100644 --- a/application/bookmark/LinkDB.php +++ b/application/bookmark/LinkDB.php | |||
@@ -6,8 +6,8 @@ use ArrayAccess; | |||
6 | use Countable; | 6 | use Countable; |
7 | use DateTime; | 7 | use DateTime; |
8 | use Iterator; | 8 | use Iterator; |
9 | use LinkFilter; | 9 | use Shaarli\Bookmark\LinkFilter; |
10 | use LinkNotFoundException; | 10 | use Shaarli\Bookmark\Exception\LinkNotFoundException; |
11 | use Shaarli\Exceptions\IOException; | 11 | use Shaarli\Exceptions\IOException; |
12 | use Shaarli\FileUtils; | 12 | use Shaarli\FileUtils; |
13 | 13 | ||
@@ -107,10 +107,10 @@ class LinkDB implements Iterator, Countable, ArrayAccess | |||
107 | * | 107 | * |
108 | * Checks if the datastore exists; else, attempts to create a dummy one. | 108 | * Checks if the datastore exists; else, attempts to create a dummy one. |
109 | * | 109 | * |
110 | * @param string $datastore datastore file path. | 110 | * @param string $datastore datastore file path. |
111 | * @param boolean $isLoggedIn is the user logged in? | 111 | * @param boolean $isLoggedIn is the user logged in? |
112 | * @param boolean $hidePublicLinks if true all links are private. | 112 | * @param boolean $hidePublicLinks if true all links are private. |
113 | * @param string $redirector link redirector set in user settings. | 113 | * @param string $redirector link redirector set in user settings. |
114 | * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true). | 114 | * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true). |
115 | */ | 115 | */ |
116 | public function __construct( | 116 | public function __construct( |
@@ -426,12 +426,12 @@ You use the community supported version of the original Shaarli project, by Seba | |||
426 | /** | 426 | /** |
427 | * Filter links according to search parameters. | 427 | * Filter links according to search parameters. |
428 | * | 428 | * |
429 | * @param array $filterRequest Search request content. Supported keys: | 429 | * @param array $filterRequest Search request content. Supported keys: |
430 | * - searchtags: list of tags | 430 | * - searchtags: list of tags |
431 | * - searchterm: term search | 431 | * - searchterm: term search |
432 | * @param bool $casesensitive Optional: Perform case sensitive filter | 432 | * @param bool $casesensitive Optional: Perform case sensitive filter |
433 | * @param string $visibility return only all/private/public links | 433 | * @param string $visibility return only all/private/public links |
434 | * @param string $untaggedonly return only untagged links | 434 | * @param bool $untaggedonly return only untagged links |
435 | * | 435 | * |
436 | * @return array filtered links, all links if no suitable filter was provided. | 436 | * @return array filtered links, all links if no suitable filter was provided. |
437 | */ | 437 | */ |
@@ -457,8 +457,8 @@ You use the community supported version of the original Shaarli project, by Seba | |||
457 | /** | 457 | /** |
458 | * Returns the list tags appearing in the links with the given tags | 458 | * Returns the list tags appearing in the links with the given tags |
459 | * | 459 | * |
460 | * @param array $filteringTags tags selecting the links to consider | 460 | * @param array $filteringTags tags selecting the links to consider |
461 | * @param string $visibility process only all/private/public links | 461 | * @param string $visibility process only all/private/public links |
462 | * | 462 | * |
463 | * @return array tag => linksCount | 463 | * @return array tag => linksCount |
464 | */ | 464 | */ |
@@ -500,7 +500,7 @@ You use the community supported version of the original Shaarli project, by Seba | |||
500 | * Rename or delete a tag across all links. | 500 | * Rename or delete a tag across all links. |
501 | * | 501 | * |
502 | * @param string $from Tag to rename | 502 | * @param string $from Tag to rename |
503 | * @param string $to New tag. If none is provided, the from tag will be deleted | 503 | * @param string $to New tag. If none is provided, the from tag will be deleted |
504 | * | 504 | * |
505 | * @return array|bool List of altered links or false on error | 505 | * @return array|bool List of altered links or false on error |
506 | */ | 506 | */ |
diff --git a/application/bookmark/LinkFilter.php b/application/bookmark/LinkFilter.php new file mode 100644 index 00000000..9b966307 --- /dev/null +++ b/application/bookmark/LinkFilter.php | |||
@@ -0,0 +1,449 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Shaarli\Bookmark; | ||
4 | |||
5 | use Exception; | ||
6 | use Shaarli\Bookmark\Exception\LinkNotFoundException; | ||
7 | |||
8 | /** | ||
9 | * Class LinkFilter. | ||
10 | * | ||
11 | * Perform search and filter operation on link data list. | ||
12 | */ | ||
13 | class LinkFilter | ||
14 | { | ||
15 | /** | ||
16 | * @var string permalinks. | ||
17 | */ | ||
18 | public static $FILTER_HASH = 'permalink'; | ||
19 | |||
20 | /** | ||
21 | * @var string text search. | ||
22 | */ | ||
23 | public static $FILTER_TEXT = 'fulltext'; | ||
24 | |||
25 | /** | ||
26 | * @var string tag filter. | ||
27 | */ | ||
28 | public static $FILTER_TAG = 'tags'; | ||
29 | |||
30 | /** | ||
31 | * @var string filter by day. | ||
32 | */ | ||
33 | public static $FILTER_DAY = 'FILTER_DAY'; | ||
34 | |||
35 | /** | ||
36 | * @var string Allowed characters for hashtags (regex syntax). | ||
37 | */ | ||
38 | public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; | ||
39 | |||
40 | /** | ||
41 | * @var LinkDB all available links. | ||
42 | */ | ||
43 | private $links; | ||
44 | |||
45 | /** | ||
46 | * @param LinkDB $links initialization. | ||
47 | */ | ||
48 | public function __construct($links) | ||
49 | { | ||
50 | $this->links = $links; | ||
51 | } | ||
52 | |||
53 | /** | ||
54 | * Filter links according to parameters. | ||
55 | * | ||
56 | * @param string $type Type of filter (eg. tags, permalink, etc.). | ||
57 | * @param mixed $request Filter content. | ||
58 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. | ||
59 | * @param string $visibility Optional: return only all/private/public links | ||
60 | * @param string $untaggedonly Optional: return only untagged links. Applies only if $type includes FILTER_TAG | ||
61 | * | ||
62 | * @return array filtered link list. | ||
63 | */ | ||
64 | public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) | ||
65 | { | ||
66 | if (!in_array($visibility, ['all', 'public', 'private'])) { | ||
67 | $visibility = 'all'; | ||
68 | } | ||
69 | |||
70 | switch ($type) { | ||
71 | case self::$FILTER_HASH: | ||
72 | return $this->filterSmallHash($request); | ||
73 | case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" | ||
74 | $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); | ||
75 | if ($noRequest) { | ||
76 | if ($untaggedonly) { | ||
77 | return $this->filterUntagged($visibility); | ||
78 | } | ||
79 | return $this->noFilter($visibility); | ||
80 | } | ||
81 | if ($untaggedonly) { | ||
82 | $filtered = $this->filterUntagged($visibility); | ||
83 | } else { | ||
84 | $filtered = $this->links; | ||
85 | } | ||
86 | if (!empty($request[0])) { | ||
87 | $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); | ||
88 | } | ||
89 | if (!empty($request[1])) { | ||
90 | $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility); | ||
91 | } | ||
92 | return $filtered; | ||
93 | case self::$FILTER_TEXT: | ||
94 | return $this->filterFulltext($request, $visibility); | ||
95 | case self::$FILTER_TAG: | ||
96 | if ($untaggedonly) { | ||
97 | return $this->filterUntagged($visibility); | ||
98 | } else { | ||
99 | return $this->filterTags($request, $casesensitive, $visibility); | ||
100 | } | ||
101 | case self::$FILTER_DAY: | ||
102 | return $this->filterDay($request); | ||
103 | default: | ||
104 | return $this->noFilter($visibility); | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | * Unknown filter, but handle private only. | ||
110 | * | ||
111 | * @param string $visibility Optional: return only all/private/public links | ||
112 | * | ||
113 | * @return array filtered links. | ||
114 | */ | ||
115 | private function noFilter($visibility = 'all') | ||
116 | { | ||
117 | if ($visibility === 'all') { | ||
118 | return $this->links; | ||
119 | } | ||
120 | |||
121 | $out = array(); | ||
122 | foreach ($this->links as $key => $value) { | ||
123 | if ($value['private'] && $visibility === 'private') { | ||
124 | $out[$key] = $value; | ||
125 | } elseif (!$value['private'] && $visibility === 'public') { | ||
126 | $out[$key] = $value; | ||
127 | } | ||
128 | } | ||
129 | |||
130 | return $out; | ||
131 | } | ||
132 | |||
133 | /** | ||
134 | * Returns the shaare corresponding to a smallHash. | ||
135 | * | ||
136 | * @param string $smallHash permalink hash. | ||
137 | * | ||
138 | * @return array $filtered array containing permalink data. | ||
139 | * | ||
140 | * @throws \Shaarli\Bookmark\Exception\LinkNotFoundException if the smallhash doesn't match any link. | ||
141 | */ | ||
142 | private function filterSmallHash($smallHash) | ||
143 | { | ||
144 | $filtered = array(); | ||
145 | foreach ($this->links as $key => $l) { | ||
146 | if ($smallHash == $l['shorturl']) { | ||
147 | // Yes, this is ugly and slow | ||
148 | $filtered[$key] = $l; | ||
149 | return $filtered; | ||
150 | } | ||
151 | } | ||
152 | |||
153 | if (empty($filtered)) { | ||
154 | throw new LinkNotFoundException(); | ||
155 | } | ||
156 | |||
157 | return $filtered; | ||
158 | } | ||
159 | |||
160 | /** | ||
161 | * Returns the list of links corresponding to a full-text search | ||
162 | * | ||
163 | * Searches: | ||
164 | * - in the URLs, title and description; | ||
165 | * - are case-insensitive; | ||
166 | * - terms surrounded by quotes " are exact terms search. | ||
167 | * - terms starting with a dash - are excluded (except exact terms). | ||
168 | * | ||
169 | * Example: | ||
170 | * print_r($mydb->filterFulltext('hollandais')); | ||
171 | * | ||
172 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | ||
173 | * - allows to perform searches on Unicode text | ||
174 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | ||
175 | * | ||
176 | * @param string $searchterms search query. | ||
177 | * @param string $visibility Optional: return only all/private/public links. | ||
178 | * | ||
179 | * @return array search results. | ||
180 | */ | ||
181 | private function filterFulltext($searchterms, $visibility = 'all') | ||
182 | { | ||
183 | if (empty($searchterms)) { | ||
184 | return $this->noFilter($visibility); | ||
185 | } | ||
186 | |||
187 | $filtered = array(); | ||
188 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | ||
189 | $exactRegex = '/"([^"]+)"/'; | ||
190 | // Retrieve exact search terms. | ||
191 | preg_match_all($exactRegex, $search, $exactSearch); | ||
192 | $exactSearch = array_values(array_filter($exactSearch[1])); | ||
193 | |||
194 | // Remove exact search terms to get AND terms search. | ||
195 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); | ||
196 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | ||
197 | |||
198 | // Filter excluding terms and update andSearch. | ||
199 | $excludeSearch = array(); | ||
200 | $andSearch = array(); | ||
201 | foreach ($explodedSearchAnd as $needle) { | ||
202 | if ($needle[0] == '-' && strlen($needle) > 1) { | ||
203 | $excludeSearch[] = substr($needle, 1); | ||
204 | } else { | ||
205 | $andSearch[] = $needle; | ||
206 | } | ||
207 | } | ||
208 | |||
209 | $keys = array('title', 'description', 'url', 'tags'); | ||
210 | |||
211 | // Iterate over every stored link. | ||
212 | foreach ($this->links as $id => $link) { | ||
213 | // ignore non private links when 'privatonly' is on. | ||
214 | if ($visibility !== 'all') { | ||
215 | if (!$link['private'] && $visibility === 'private') { | ||
216 | continue; | ||
217 | } elseif ($link['private'] && $visibility === 'public') { | ||
218 | continue; | ||
219 | } | ||
220 | } | ||
221 | |||
222 | // Concatenate link fields to search across fields. | ||
223 | // Adds a '\' separator for exact search terms. | ||
224 | $content = ''; | ||
225 | foreach ($keys as $key) { | ||
226 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; | ||
227 | } | ||
228 | |||
229 | // Be optimistic | ||
230 | $found = true; | ||
231 | |||
232 | // First, we look for exact term search | ||
233 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | ||
234 | $found = strpos($content, $exactSearch[$i]) !== false; | ||
235 | } | ||
236 | |||
237 | // Iterate over keywords, if keyword is not found, | ||
238 | // no need to check for the others. We want all or nothing. | ||
239 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | ||
240 | $found = strpos($content, $andSearch[$i]) !== false; | ||
241 | } | ||
242 | |||
243 | // Exclude terms. | ||
244 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | ||
245 | $found = strpos($content, $excludeSearch[$i]) === false; | ||
246 | } | ||
247 | |||
248 | if ($found) { | ||
249 | $filtered[$id] = $link; | ||
250 | } | ||
251 | } | ||
252 | |||
253 | return $filtered; | ||
254 | } | ||
255 | |||
256 | /** | ||
257 | * generate a regex fragment out of a tag | ||
258 | * | ||
259 | * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard | ||
260 | * | ||
261 | * @return string generated regex fragment | ||
262 | */ | ||
263 | private static function tag2regex($tag) | ||
264 | { | ||
265 | $len = strlen($tag); | ||
266 | if (!$len || $tag === "-" || $tag === "*") { | ||
267 | // nothing to search, return empty regex | ||
268 | return ''; | ||
269 | } | ||
270 | if ($tag[0] === "-") { | ||
271 | // query is negated | ||
272 | $i = 1; // use offset to start after '-' character | ||
273 | $regex = '(?!'; // create negative lookahead | ||
274 | } else { | ||
275 | $i = 0; // start at first character | ||
276 | $regex = '(?='; // use positive lookahead | ||
277 | } | ||
278 | $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning | ||
279 | // iterate over string, separating it into placeholder and content | ||
280 | for (; $i < $len; $i++) { | ||
281 | if ($tag[$i] === '*') { | ||
282 | // placeholder found | ||
283 | $regex .= '[^ ]*?'; | ||
284 | } else { | ||
285 | // regular characters | ||
286 | $offset = strpos($tag, '*', $i); | ||
287 | if ($offset === false) { | ||
288 | // no placeholder found, set offset to end of string | ||
289 | $offset = $len; | ||
290 | } | ||
291 | // subtract one, as we want to get before the placeholder or end of string | ||
292 | $offset -= 1; | ||
293 | // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. | ||
294 | $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); | ||
295 | // move $i on | ||
296 | $i = $offset; | ||
297 | } | ||
298 | } | ||
299 | $regex .= '(?:$| ))'; // after the tag may only be a space or the end | ||
300 | return $regex; | ||
301 | } | ||
302 | |||
303 | /** | ||
304 | * Returns the list of links associated with a given list of tags | ||
305 | * | ||
306 | * You can specify one or more tags, separated by space or a comma, e.g. | ||
307 | * print_r($mydb->filterTags('linux programming')); | ||
308 | * | ||
309 | * @param string $tags list of tags separated by commas or blank spaces. | ||
310 | * @param bool $casesensitive ignore case if false. | ||
311 | * @param string $visibility Optional: return only all/private/public links. | ||
312 | * | ||
313 | * @return array filtered links. | ||
314 | */ | ||
315 | public function filterTags($tags, $casesensitive = false, $visibility = 'all') | ||
316 | { | ||
317 | // get single tags (we may get passed an array, even though the docs say different) | ||
318 | $inputTags = $tags; | ||
319 | if (!is_array($tags)) { | ||
320 | // we got an input string, split tags | ||
321 | $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); | ||
322 | } | ||
323 | |||
324 | if (!count($inputTags)) { | ||
325 | // no input tags | ||
326 | return $this->noFilter($visibility); | ||
327 | } | ||
328 | |||
329 | // build regex from all tags | ||
330 | $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; | ||
331 | if (!$casesensitive) { | ||
332 | // make regex case insensitive | ||
333 | $re .= 'i'; | ||
334 | } | ||
335 | |||
336 | // create resulting array | ||
337 | $filtered = array(); | ||
338 | |||
339 | // iterate over each link | ||
340 | foreach ($this->links as $key => $link) { | ||
341 | // check level of visibility | ||
342 | // ignore non private links when 'privateonly' is on. | ||
343 | if ($visibility !== 'all') { | ||
344 | if (!$link['private'] && $visibility === 'private') { | ||
345 | continue; | ||
346 | } elseif ($link['private'] && $visibility === 'public') { | ||
347 | continue; | ||
348 | } | ||
349 | } | ||
350 | $search = $link['tags']; // build search string, start with tags of current link | ||
351 | if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) { | ||
352 | // description given and at least one possible tag found | ||
353 | $descTags = array(); | ||
354 | // find all tags in the form of #tag in the description | ||
355 | preg_match_all( | ||
356 | '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', | ||
357 | $link['description'], | ||
358 | $descTags | ||
359 | ); | ||
360 | if (count($descTags[1])) { | ||
361 | // there were some tags in the description, add them to the search string | ||
362 | $search .= ' ' . implode(' ', $descTags[1]); | ||
363 | } | ||
364 | }; | ||
365 | // match regular expression with search string | ||
366 | if (!preg_match($re, $search)) { | ||
367 | // this entry does _not_ match our regex | ||
368 | continue; | ||
369 | } | ||
370 | $filtered[$key] = $link; | ||
371 | } | ||
372 | return $filtered; | ||
373 | } | ||
374 | |||
375 | /** | ||
376 | * Return only links without any tag. | ||
377 | * | ||
378 | * @param string $visibility return only all/private/public links. | ||
379 | * | ||
380 | * @return array filtered links. | ||
381 | */ | ||
382 | public function filterUntagged($visibility) | ||
383 | { | ||
384 | $filtered = []; | ||
385 | foreach ($this->links as $key => $link) { | ||
386 | if ($visibility !== 'all') { | ||
387 | if (!$link['private'] && $visibility === 'private') { | ||
388 | continue; | ||
389 | } elseif ($link['private'] && $visibility === 'public') { | ||
390 | continue; | ||
391 | } | ||
392 | } | ||
393 | |||
394 | if (empty(trim($link['tags']))) { | ||
395 | $filtered[$key] = $link; | ||
396 | } | ||
397 | } | ||
398 | |||
399 | return $filtered; | ||
400 | } | ||
401 | |||
402 | /** | ||
403 | * Returns the list of articles for a given day, chronologically sorted | ||
404 | * | ||
405 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | ||
406 | * print_r($mydb->filterDay('20120125')); | ||
407 | * | ||
408 | * @param string $day day to filter. | ||
409 | * | ||
410 | * @return array all link matching given day. | ||
411 | * | ||
412 | * @throws Exception if date format is invalid. | ||
413 | */ | ||
414 | public function filterDay($day) | ||
415 | { | ||
416 | if (!checkDateFormat('Ymd', $day)) { | ||
417 | throw new Exception('Invalid date format'); | ||
418 | } | ||
419 | |||
420 | $filtered = array(); | ||
421 | foreach ($this->links as $key => $l) { | ||
422 | if ($l['created']->format('Ymd') == $day) { | ||
423 | $filtered[$key] = $l; | ||
424 | } | ||
425 | } | ||
426 | |||
427 | // sort by date ASC | ||
428 | return array_reverse($filtered, true); | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * Convert a list of tags (str) to an array. Also | ||
433 | * - handle case sensitivity. | ||
434 | * - accepts spaces commas as separator. | ||
435 | * | ||
436 | * @param string $tags string containing a list of tags. | ||
437 | * @param bool $casesensitive will convert everything to lowercase if false. | ||
438 | * | ||
439 | * @return array filtered tags string. | ||
440 | */ | ||
441 | public static function tagsStrToArray($tags, $casesensitive) | ||
442 | { | ||
443 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | ||
444 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | ||
445 | $tagsOut = str_replace(',', ' ', $tagsOut); | ||
446 | |||
447 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); | ||
448 | } | ||
449 | } | ||
diff --git a/application/bookmark/exception/LinkNotFoundException.php b/application/bookmark/exception/LinkNotFoundException.php new file mode 100644 index 00000000..f9414428 --- /dev/null +++ b/application/bookmark/exception/LinkNotFoundException.php | |||
@@ -0,0 +1,15 @@ | |||
1 | <?php | ||
2 | namespace Shaarli\Bookmark\Exception; | ||
3 | |||
4 | use Exception; | ||
5 | |||
6 | class LinkNotFoundException extends Exception | ||
7 | { | ||
8 | /** | ||
9 | * LinkNotFoundException constructor. | ||
10 | */ | ||
11 | public function __construct() | ||
12 | { | ||
13 | $this->message = t('The link you are trying to reach does not exist or has been deleted.'); | ||
14 | } | ||
15 | } | ||