diff options
author | ArthurHoaro <arthur@hoa.ro> | 2020-01-18 10:01:06 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-01-18 10:01:06 +0100 |
commit | 3fb29fdda04ca86e04422d49b86cf646d53c4f9d (patch) | |
tree | adf8512f93f5559ba87d0c9931969ae4ebea7133 /application/legacy/LegacyLinkFilter.php | |
parent | 796c4c57d085ae4589b53dfe8369ae9ba30ffdaf (diff) | |
parent | e26e2060f5470ce8bf4c5973284bae07b8af170a (diff) | |
download | Shaarli-3fb29fdda04ca86e04422d49b86cf646d53c4f9d.tar.gz Shaarli-3fb29fdda04ca86e04422d49b86cf646d53c4f9d.tar.zst Shaarli-3fb29fdda04ca86e04422d49b86cf646d53c4f9d.zip |
Store bookmarks as PHP objects and add a service layer to retriā¦ (#1307)
Store bookmarks as PHP objects and add a service layer to retrieve them
Diffstat (limited to 'application/legacy/LegacyLinkFilter.php')
-rw-r--r-- | application/legacy/LegacyLinkFilter.php | 451 |
1 files changed, 451 insertions, 0 deletions
diff --git a/application/legacy/LegacyLinkFilter.php b/application/legacy/LegacyLinkFilter.php new file mode 100644 index 00000000..7cf93d60 --- /dev/null +++ b/application/legacy/LegacyLinkFilter.php | |||
@@ -0,0 +1,451 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Shaarli\Legacy; | ||
4 | |||
5 | use Exception; | ||
6 | use Shaarli\Bookmark\Exception\BookmarkNotFoundException; | ||
7 | |||
8 | /** | ||
9 | * Class LinkFilter. | ||
10 | * | ||
11 | * Perform search and filter operation on link data list. | ||
12 | * | ||
13 | * @deprecated | ||
14 | */ | ||
15 | class LegacyLinkFilter | ||
16 | { | ||
17 | /** | ||
18 | * @var string permalinks. | ||
19 | */ | ||
20 | public static $FILTER_HASH = 'permalink'; | ||
21 | |||
22 | /** | ||
23 | * @var string text search. | ||
24 | */ | ||
25 | public static $FILTER_TEXT = 'fulltext'; | ||
26 | |||
27 | /** | ||
28 | * @var string tag filter. | ||
29 | */ | ||
30 | public static $FILTER_TAG = 'tags'; | ||
31 | |||
32 | /** | ||
33 | * @var string filter by day. | ||
34 | */ | ||
35 | public static $FILTER_DAY = 'FILTER_DAY'; | ||
36 | |||
37 | /** | ||
38 | * @var string Allowed characters for hashtags (regex syntax). | ||
39 | */ | ||
40 | public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; | ||
41 | |||
42 | /** | ||
43 | * @var LegacyLinkDB all available links. | ||
44 | */ | ||
45 | private $links; | ||
46 | |||
47 | /** | ||
48 | * @param LegacyLinkDB $links initialization. | ||
49 | */ | ||
50 | public function __construct($links) | ||
51 | { | ||
52 | $this->links = $links; | ||
53 | } | ||
54 | |||
55 | /** | ||
56 | * Filter links according to parameters. | ||
57 | * | ||
58 | * @param string $type Type of filter (eg. tags, permalink, etc.). | ||
59 | * @param mixed $request Filter content. | ||
60 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. | ||
61 | * @param string $visibility Optional: return only all/private/public links | ||
62 | * @param string $untaggedonly Optional: return only untagged links. Applies only if $type includes FILTER_TAG | ||
63 | * | ||
64 | * @return array filtered link list. | ||
65 | */ | ||
66 | public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) | ||
67 | { | ||
68 | if (!in_array($visibility, ['all', 'public', 'private'])) { | ||
69 | $visibility = 'all'; | ||
70 | } | ||
71 | |||
72 | switch ($type) { | ||
73 | case self::$FILTER_HASH: | ||
74 | return $this->filterSmallHash($request); | ||
75 | case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" | ||
76 | $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); | ||
77 | if ($noRequest) { | ||
78 | if ($untaggedonly) { | ||
79 | return $this->filterUntagged($visibility); | ||
80 | } | ||
81 | return $this->noFilter($visibility); | ||
82 | } | ||
83 | if ($untaggedonly) { | ||
84 | $filtered = $this->filterUntagged($visibility); | ||
85 | } else { | ||
86 | $filtered = $this->links; | ||
87 | } | ||
88 | if (!empty($request[0])) { | ||
89 | $filtered = (new LegacyLinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); | ||
90 | } | ||
91 | if (!empty($request[1])) { | ||
92 | $filtered = (new LegacyLinkFilter($filtered))->filterFulltext($request[1], $visibility); | ||
93 | } | ||
94 | return $filtered; | ||
95 | case self::$FILTER_TEXT: | ||
96 | return $this->filterFulltext($request, $visibility); | ||
97 | case self::$FILTER_TAG: | ||
98 | if ($untaggedonly) { | ||
99 | return $this->filterUntagged($visibility); | ||
100 | } else { | ||
101 | return $this->filterTags($request, $casesensitive, $visibility); | ||
102 | } | ||
103 | case self::$FILTER_DAY: | ||
104 | return $this->filterDay($request); | ||
105 | default: | ||
106 | return $this->noFilter($visibility); | ||
107 | } | ||
108 | } | ||
109 | |||
110 | /** | ||
111 | * Unknown filter, but handle private only. | ||
112 | * | ||
113 | * @param string $visibility Optional: return only all/private/public links | ||
114 | * | ||
115 | * @return array filtered links. | ||
116 | */ | ||
117 | private function noFilter($visibility = 'all') | ||
118 | { | ||
119 | if ($visibility === 'all') { | ||
120 | return $this->links; | ||
121 | } | ||
122 | |||
123 | $out = array(); | ||
124 | foreach ($this->links as $key => $value) { | ||
125 | if ($value['private'] && $visibility === 'private') { | ||
126 | $out[$key] = $value; | ||
127 | } elseif (!$value['private'] && $visibility === 'public') { | ||
128 | $out[$key] = $value; | ||
129 | } | ||
130 | } | ||
131 | |||
132 | return $out; | ||
133 | } | ||
134 | |||
135 | /** | ||
136 | * Returns the shaare corresponding to a smallHash. | ||
137 | * | ||
138 | * @param string $smallHash permalink hash. | ||
139 | * | ||
140 | * @return array $filtered array containing permalink data. | ||
141 | * | ||
142 | * @throws BookmarkNotFoundException if the smallhash doesn't match any link. | ||
143 | */ | ||
144 | private function filterSmallHash($smallHash) | ||
145 | { | ||
146 | $filtered = array(); | ||
147 | foreach ($this->links as $key => $l) { | ||
148 | if ($smallHash == $l['shorturl']) { | ||
149 | // Yes, this is ugly and slow | ||
150 | $filtered[$key] = $l; | ||
151 | return $filtered; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | if (empty($filtered)) { | ||
156 | throw new BookmarkNotFoundException(); | ||
157 | } | ||
158 | |||
159 | return $filtered; | ||
160 | } | ||
161 | |||
162 | /** | ||
163 | * Returns the list of links corresponding to a full-text search | ||
164 | * | ||
165 | * Searches: | ||
166 | * - in the URLs, title and description; | ||
167 | * - are case-insensitive; | ||
168 | * - terms surrounded by quotes " are exact terms search. | ||
169 | * - terms starting with a dash - are excluded (except exact terms). | ||
170 | * | ||
171 | * Example: | ||
172 | * print_r($mydb->filterFulltext('hollandais')); | ||
173 | * | ||
174 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | ||
175 | * - allows to perform searches on Unicode text | ||
176 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | ||
177 | * | ||
178 | * @param string $searchterms search query. | ||
179 | * @param string $visibility Optional: return only all/private/public links. | ||
180 | * | ||
181 | * @return array search results. | ||
182 | */ | ||
183 | private function filterFulltext($searchterms, $visibility = 'all') | ||
184 | { | ||
185 | if (empty($searchterms)) { | ||
186 | return $this->noFilter($visibility); | ||
187 | } | ||
188 | |||
189 | $filtered = array(); | ||
190 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); | ||
191 | $exactRegex = '/"([^"]+)"/'; | ||
192 | // Retrieve exact search terms. | ||
193 | preg_match_all($exactRegex, $search, $exactSearch); | ||
194 | $exactSearch = array_values(array_filter($exactSearch[1])); | ||
195 | |||
196 | // Remove exact search terms to get AND terms search. | ||
197 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); | ||
198 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | ||
199 | |||
200 | // Filter excluding terms and update andSearch. | ||
201 | $excludeSearch = array(); | ||
202 | $andSearch = array(); | ||
203 | foreach ($explodedSearchAnd as $needle) { | ||
204 | if ($needle[0] == '-' && strlen($needle) > 1) { | ||
205 | $excludeSearch[] = substr($needle, 1); | ||
206 | } else { | ||
207 | $andSearch[] = $needle; | ||
208 | } | ||
209 | } | ||
210 | |||
211 | $keys = array('title', 'description', 'url', 'tags'); | ||
212 | |||
213 | // Iterate over every stored link. | ||
214 | foreach ($this->links as $id => $link) { | ||
215 | // ignore non private links when 'privatonly' is on. | ||
216 | if ($visibility !== 'all') { | ||
217 | if (!$link['private'] && $visibility === 'private') { | ||
218 | continue; | ||
219 | } elseif ($link['private'] && $visibility === 'public') { | ||
220 | continue; | ||
221 | } | ||
222 | } | ||
223 | |||
224 | // Concatenate link fields to search across fields. | ||
225 | // Adds a '\' separator for exact search terms. | ||
226 | $content = ''; | ||
227 | foreach ($keys as $key) { | ||
228 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; | ||
229 | } | ||
230 | |||
231 | // Be optimistic | ||
232 | $found = true; | ||
233 | |||
234 | // First, we look for exact term search | ||
235 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | ||
236 | $found = strpos($content, $exactSearch[$i]) !== false; | ||
237 | } | ||
238 | |||
239 | // Iterate over keywords, if keyword is not found, | ||
240 | // no need to check for the others. We want all or nothing. | ||
241 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | ||
242 | $found = strpos($content, $andSearch[$i]) !== false; | ||
243 | } | ||
244 | |||
245 | // Exclude terms. | ||
246 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | ||
247 | $found = strpos($content, $excludeSearch[$i]) === false; | ||
248 | } | ||
249 | |||
250 | if ($found) { | ||
251 | $filtered[$id] = $link; | ||
252 | } | ||
253 | } | ||
254 | |||
255 | return $filtered; | ||
256 | } | ||
257 | |||
258 | /** | ||
259 | * generate a regex fragment out of a tag | ||
260 | * | ||
261 | * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard | ||
262 | * | ||
263 | * @return string generated regex fragment | ||
264 | */ | ||
265 | private static function tag2regex($tag) | ||
266 | { | ||
267 | $len = strlen($tag); | ||
268 | if (!$len || $tag === "-" || $tag === "*") { | ||
269 | // nothing to search, return empty regex | ||
270 | return ''; | ||
271 | } | ||
272 | if ($tag[0] === "-") { | ||
273 | // query is negated | ||
274 | $i = 1; // use offset to start after '-' character | ||
275 | $regex = '(?!'; // create negative lookahead | ||
276 | } else { | ||
277 | $i = 0; // start at first character | ||
278 | $regex = '(?='; // use positive lookahead | ||
279 | } | ||
280 | $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning | ||
281 | // iterate over string, separating it into placeholder and content | ||
282 | for (; $i < $len; $i++) { | ||
283 | if ($tag[$i] === '*') { | ||
284 | // placeholder found | ||
285 | $regex .= '[^ ]*?'; | ||
286 | } else { | ||
287 | // regular characters | ||
288 | $offset = strpos($tag, '*', $i); | ||
289 | if ($offset === false) { | ||
290 | // no placeholder found, set offset to end of string | ||
291 | $offset = $len; | ||
292 | } | ||
293 | // subtract one, as we want to get before the placeholder or end of string | ||
294 | $offset -= 1; | ||
295 | // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. | ||
296 | $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); | ||
297 | // move $i on | ||
298 | $i = $offset; | ||
299 | } | ||
300 | } | ||
301 | $regex .= '(?:$| ))'; // after the tag may only be a space or the end | ||
302 | return $regex; | ||
303 | } | ||
304 | |||
305 | /** | ||
306 | * Returns the list of links associated with a given list of tags | ||
307 | * | ||
308 | * You can specify one or more tags, separated by space or a comma, e.g. | ||
309 | * print_r($mydb->filterTags('linux programming')); | ||
310 | * | ||
311 | * @param string $tags list of tags separated by commas or blank spaces. | ||
312 | * @param bool $casesensitive ignore case if false. | ||
313 | * @param string $visibility Optional: return only all/private/public links. | ||
314 | * | ||
315 | * @return array filtered links. | ||
316 | */ | ||
317 | public function filterTags($tags, $casesensitive = false, $visibility = 'all') | ||
318 | { | ||
319 | // get single tags (we may get passed an array, even though the docs say different) | ||
320 | $inputTags = $tags; | ||
321 | if (!is_array($tags)) { | ||
322 | // we got an input string, split tags | ||
323 | $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); | ||
324 | } | ||
325 | |||
326 | if (!count($inputTags)) { | ||
327 | // no input tags | ||
328 | return $this->noFilter($visibility); | ||
329 | } | ||
330 | |||
331 | // build regex from all tags | ||
332 | $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; | ||
333 | if (!$casesensitive) { | ||
334 | // make regex case insensitive | ||
335 | $re .= 'i'; | ||
336 | } | ||
337 | |||
338 | // create resulting array | ||
339 | $filtered = array(); | ||
340 | |||
341 | // iterate over each link | ||
342 | foreach ($this->links as $key => $link) { | ||
343 | // check level of visibility | ||
344 | // ignore non private links when 'privateonly' is on. | ||
345 | if ($visibility !== 'all') { | ||
346 | if (!$link['private'] && $visibility === 'private') { | ||
347 | continue; | ||
348 | } elseif ($link['private'] && $visibility === 'public') { | ||
349 | continue; | ||
350 | } | ||
351 | } | ||
352 | $search = $link['tags']; // build search string, start with tags of current link | ||
353 | if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) { | ||
354 | // description given and at least one possible tag found | ||
355 | $descTags = array(); | ||
356 | // find all tags in the form of #tag in the description | ||
357 | preg_match_all( | ||
358 | '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', | ||
359 | $link['description'], | ||
360 | $descTags | ||
361 | ); | ||
362 | if (count($descTags[1])) { | ||
363 | // there were some tags in the description, add them to the search string | ||
364 | $search .= ' ' . implode(' ', $descTags[1]); | ||
365 | } | ||
366 | }; | ||
367 | // match regular expression with search string | ||
368 | if (!preg_match($re, $search)) { | ||
369 | // this entry does _not_ match our regex | ||
370 | continue; | ||
371 | } | ||
372 | $filtered[$key] = $link; | ||
373 | } | ||
374 | return $filtered; | ||
375 | } | ||
376 | |||
377 | /** | ||
378 | * Return only links without any tag. | ||
379 | * | ||
380 | * @param string $visibility return only all/private/public links. | ||
381 | * | ||
382 | * @return array filtered links. | ||
383 | */ | ||
384 | public function filterUntagged($visibility) | ||
385 | { | ||
386 | $filtered = []; | ||
387 | foreach ($this->links as $key => $link) { | ||
388 | if ($visibility !== 'all') { | ||
389 | if (!$link['private'] && $visibility === 'private') { | ||
390 | continue; | ||
391 | } elseif ($link['private'] && $visibility === 'public') { | ||
392 | continue; | ||
393 | } | ||
394 | } | ||
395 | |||
396 | if (empty(trim($link['tags']))) { | ||
397 | $filtered[$key] = $link; | ||
398 | } | ||
399 | } | ||
400 | |||
401 | return $filtered; | ||
402 | } | ||
403 | |||
404 | /** | ||
405 | * Returns the list of articles for a given day, chronologically sorted | ||
406 | * | ||
407 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | ||
408 | * print_r($mydb->filterDay('20120125')); | ||
409 | * | ||
410 | * @param string $day day to filter. | ||
411 | * | ||
412 | * @return array all link matching given day. | ||
413 | * | ||
414 | * @throws Exception if date format is invalid. | ||
415 | */ | ||
416 | public function filterDay($day) | ||
417 | { | ||
418 | if (!checkDateFormat('Ymd', $day)) { | ||
419 | throw new Exception('Invalid date format'); | ||
420 | } | ||
421 | |||
422 | $filtered = array(); | ||
423 | foreach ($this->links as $key => $l) { | ||
424 | if ($l['created']->format('Ymd') == $day) { | ||
425 | $filtered[$key] = $l; | ||
426 | } | ||
427 | } | ||
428 | |||
429 | // sort by date ASC | ||
430 | return array_reverse($filtered, true); | ||
431 | } | ||
432 | |||
433 | /** | ||
434 | * Convert a list of tags (str) to an array. Also | ||
435 | * - handle case sensitivity. | ||
436 | * - accepts spaces commas as separator. | ||
437 | * | ||
438 | * @param string $tags string containing a list of tags. | ||
439 | * @param bool $casesensitive will convert everything to lowercase if false. | ||
440 | * | ||
441 | * @return array filtered tags string. | ||
442 | */ | ||
443 | public static function tagsStrToArray($tags, $casesensitive) | ||
444 | { | ||
445 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | ||
446 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | ||
447 | $tagsOut = str_replace(',', ' ', $tagsOut); | ||
448 | |||
449 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); | ||
450 | } | ||
451 | } | ||