]>
Commit | Line | Data |
---|---|---|
822bffce A |
1 | <?php |
2 | ||
3 | /** | |
4 | * Class LinkFilter. | |
5 | * | |
6 | * Perform search and filter operation on link data list. | |
7 | */ | |
8 | class LinkFilter | |
9 | { | |
10 | /** | |
11 | * @var string permalinks. | |
12 | */ | |
13 | public static $FILTER_HASH = 'permalink'; | |
14 | ||
15 | /** | |
16 | * @var string text search. | |
17 | */ | |
18 | public static $FILTER_TEXT = 'fulltext'; | |
19 | ||
20 | /** | |
21 | * @var string tag filter. | |
22 | */ | |
23 | public static $FILTER_TAG = 'tags'; | |
24 | ||
25 | /** | |
26 | * @var string filter by day. | |
27 | */ | |
28 | public static $FILTER_DAY = 'FILTER_DAY'; | |
29 | ||
9ccca401 A |
30 | /** |
31 | * @var string Allowed characters for hashtags (regex syntax). | |
32 | */ | |
33 | public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; | |
34 | ||
822bffce | 35 | /** |
01878a75 | 36 | * @var LinkDB all available links. |
822bffce A |
37 | */ |
38 | private $links; | |
39 | ||
40 | /** | |
01878a75 | 41 | * @param LinkDB $links initialization. |
822bffce A |
42 | */ |
43 | public function __construct($links) | |
44 | { | |
45 | $this->links = $links; | |
46 | } | |
47 | ||
48 | /** | |
49 | * Filter links according to parameters. | |
50 | * | |
51 | * @param string $type Type of filter (eg. tags, permalink, etc.). | |
528a6f8a | 52 | * @param mixed $request Filter content. |
822bffce | 53 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. |
7f96d9ec | 54 | * @param string $visibility Optional: return only all/private/public links |
822bffce A |
55 | * |
56 | * @return array filtered link list. | |
57 | */ | |
7f96d9ec | 58 | public function filter($type, $request, $casesensitive = false, $visibility = 'all') |
822bffce | 59 | { |
7f96d9ec A |
60 | if (! in_array($visibility, ['all', 'public', 'private'])) { |
61 | $visibility = 'all'; | |
62 | } | |
63 | ||
822bffce A |
64 | switch($type) { |
65 | case self::$FILTER_HASH: | |
66 | return $this->filterSmallHash($request); | |
c51fae92 A |
67 | case self::$FILTER_TAG | self::$FILTER_TEXT: |
68 | if (!empty($request)) { | |
69 | $filtered = $this->links; | |
70 | if (isset($request[0])) { | |
7f96d9ec | 71 | $filtered = $this->filterTags($request[0], $casesensitive, $visibility); |
c51fae92 A |
72 | } |
73 | if (isset($request[1])) { | |
74 | $lf = new LinkFilter($filtered); | |
7f96d9ec | 75 | $filtered = $lf->filterFulltext($request[1], $visibility); |
c51fae92 A |
76 | } |
77 | return $filtered; | |
78 | } | |
7f96d9ec | 79 | return $this->noFilter($visibility); |
822bffce | 80 | case self::$FILTER_TEXT: |
7f96d9ec | 81 | return $this->filterFulltext($request, $visibility); |
822bffce | 82 | case self::$FILTER_TAG: |
7f96d9ec | 83 | return $this->filterTags($request, $casesensitive, $visibility); |
822bffce A |
84 | case self::$FILTER_DAY: |
85 | return $this->filterDay($request); | |
822bffce | 86 | default: |
7f96d9ec | 87 | return $this->noFilter($visibility); |
822bffce A |
88 | } |
89 | } | |
90 | ||
91 | /** | |
92 | * Unknown filter, but handle private only. | |
93 | * | |
7f96d9ec | 94 | * @param string $visibility Optional: return only all/private/public links |
822bffce A |
95 | * |
96 | * @return array filtered links. | |
97 | */ | |
7f96d9ec | 98 | private function noFilter($visibility = 'all') |
822bffce | 99 | { |
7f96d9ec | 100 | if ($visibility === 'all') { |
822bffce A |
101 | return $this->links; |
102 | } | |
103 | ||
104 | $out = array(); | |
01878a75 | 105 | foreach ($this->links as $key => $value) { |
7f96d9ec A |
106 | if ($value['private'] && $visibility === 'private') { |
107 | $out[$key] = $value; | |
108 | } else if (! $value['private'] && $visibility === 'public') { | |
01878a75 | 109 | $out[$key] = $value; |
822bffce A |
110 | } |
111 | } | |
112 | ||
822bffce A |
113 | return $out; |
114 | } | |
115 | ||
116 | /** | |
117 | * Returns the shaare corresponding to a smallHash. | |
118 | * | |
119 | * @param string $smallHash permalink hash. | |
120 | * | |
121 | * @return array $filtered array containing permalink data. | |
528a6f8a A |
122 | * |
123 | * @throws LinkNotFoundException if the smallhash doesn't match any link. | |
822bffce A |
124 | */ |
125 | private function filterSmallHash($smallHash) | |
126 | { | |
127 | $filtered = array(); | |
01878a75 | 128 | foreach ($this->links as $key => $l) { |
d592daea | 129 | if ($smallHash == $l['shorturl']) { |
822bffce | 130 | // Yes, this is ugly and slow |
01878a75 | 131 | $filtered[$key] = $l; |
822bffce A |
132 | return $filtered; |
133 | } | |
134 | } | |
528a6f8a A |
135 | |
136 | if (empty($filtered)) { | |
137 | throw new LinkNotFoundException(); | |
138 | } | |
139 | ||
822bffce A |
140 | return $filtered; |
141 | } | |
142 | ||
143 | /** | |
144 | * Returns the list of links corresponding to a full-text search | |
145 | * | |
146 | * Searches: | |
147 | * - in the URLs, title and description; | |
bedd176a A |
148 | * - are case-insensitive; |
149 | * - terms surrounded by quotes " are exact terms search. | |
150 | * - terms starting with a dash - are excluded (except exact terms). | |
822bffce A |
151 | * |
152 | * Example: | |
153 | * print_r($mydb->filterFulltext('hollandais')); | |
154 | * | |
155 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | |
156 | * - allows to perform searches on Unicode text | |
157 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | |
158 | * | |
159 | * @param string $searchterms search query. | |
7f96d9ec | 160 | * @param string $visibility Optional: return only all/private/public links. |
822bffce A |
161 | * |
162 | * @return array search results. | |
163 | */ | |
7f96d9ec | 164 | private function filterFulltext($searchterms, $visibility = 'all') |
822bffce | 165 | { |
c51fae92 | 166 | if (empty($searchterms)) { |
7f96d9ec | 167 | return $this->noFilter($visibility); |
c51fae92 A |
168 | } |
169 | ||
522b278b | 170 | $filtered = array(); |
ebd8075a | 171 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
bedd176a A |
172 | $exactRegex = '/"([^"]+)"/'; |
173 | // Retrieve exact search terms. | |
174 | preg_match_all($exactRegex, $search, $exactSearch); | |
175 | $exactSearch = array_values(array_filter($exactSearch[1])); | |
176 | ||
177 | // Remove exact search terms to get AND terms search. | |
178 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); | |
179 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | |
180 | ||
181 | // Filter excluding terms and update andSearch. | |
182 | $excludeSearch = array(); | |
183 | $andSearch = array(); | |
184 | foreach ($explodedSearchAnd as $needle) { | |
185 | if ($needle[0] == '-' && strlen($needle) > 1) { | |
186 | $excludeSearch[] = substr($needle, 1); | |
187 | } else { | |
188 | $andSearch[] = $needle; | |
189 | } | |
190 | } | |
191 | ||
822bffce | 192 | $keys = array('title', 'description', 'url', 'tags'); |
ebd8075a | 193 | |
822bffce | 194 | // Iterate over every stored link. |
01878a75 | 195 | foreach ($this->links as $id => $link) { |
822bffce A |
196 | |
197 | // ignore non private links when 'privatonly' is on. | |
7f96d9ec A |
198 | if ($visibility !== 'all') { |
199 | if (! $link['private'] && $visibility === 'private') { | |
200 | continue; | |
201 | } else if ($link['private'] && $visibility === 'public') { | |
202 | continue; | |
203 | } | |
822bffce A |
204 | } |
205 | ||
522b278b A |
206 | // Concatenate link fields to search across fields. |
207 | // Adds a '\' separator for exact search terms. | |
208 | $content = ''; | |
822bffce | 209 | foreach ($keys as $key) { |
522b278b | 210 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; |
822bffce | 211 | } |
522b278b A |
212 | |
213 | // Be optimistic | |
214 | $found = true; | |
215 | ||
216 | // First, we look for exact term search | |
217 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | |
218 | $found = strpos($content, $exactSearch[$i]) !== false; | |
219 | } | |
220 | ||
221 | // Iterate over keywords, if keyword is not found, | |
222 | // no need to check for the others. We want all or nothing. | |
223 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | |
224 | $found = strpos($content, $andSearch[$i]) !== false; | |
225 | } | |
226 | ||
227 | // Exclude terms. | |
228 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | |
229 | $found = strpos($content, $excludeSearch[$i]) === false; | |
230 | } | |
231 | ||
822bffce | 232 | if ($found) { |
01878a75 | 233 | $filtered[$id] = $link; |
822bffce A |
234 | } |
235 | } | |
236 | ||
822bffce A |
237 | return $filtered; |
238 | } | |
239 | ||
240 | /** | |
241 | * Returns the list of links associated with a given list of tags | |
242 | * | |
243 | * You can specify one or more tags, separated by space or a comma, e.g. | |
244 | * print_r($mydb->filterTags('linux programming')); | |
245 | * | |
246 | * @param string $tags list of tags separated by commas or blank spaces. | |
247 | * @param bool $casesensitive ignore case if false. | |
7f96d9ec | 248 | * @param string $visibility Optional: return only all/private/public links. |
822bffce A |
249 | * |
250 | * @return array filtered links. | |
251 | */ | |
7f96d9ec | 252 | public function filterTags($tags, $casesensitive = false, $visibility = 'all') |
822bffce | 253 | { |
c51fae92 A |
254 | // Implode if array for clean up. |
255 | $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; | |
7d86f40b A |
256 | if ($tags === false) { |
257 | return $this->filterUntagged($visibility); | |
258 | } | |
c51fae92 | 259 | if (empty($tags)) { |
7f96d9ec | 260 | return $this->noFilter($visibility); |
c51fae92 A |
261 | } |
262 | ||
21979ff1 | 263 | $searchtags = self::tagsStrToArray($tags, $casesensitive); |
822bffce | 264 | $filtered = array(); |
21979ff1 A |
265 | if (empty($searchtags)) { |
266 | return $filtered; | |
267 | } | |
822bffce | 268 | |
01878a75 | 269 | foreach ($this->links as $key => $link) { |
822bffce | 270 | // ignore non private links when 'privatonly' is on. |
7f96d9ec A |
271 | if ($visibility !== 'all') { |
272 | if (! $link['private'] && $visibility === 'private') { | |
273 | continue; | |
274 | } else if ($link['private'] && $visibility === 'public') { | |
275 | continue; | |
276 | } | |
822bffce A |
277 | } |
278 | ||
21979ff1 | 279 | $linktags = self::tagsStrToArray($link['tags'], $casesensitive); |
822bffce | 280 | |
21979ff1 A |
281 | $found = true; |
282 | for ($i = 0 ; $i < count($searchtags) && $found; $i++) { | |
283 | // Exclusive search, quit if tag found. | |
284 | // Or, tag not found in the link, quit. | |
9ccca401 A |
285 | if (($searchtags[$i][0] == '-' |
286 | && $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description'])) | |
287 | || ($searchtags[$i][0] != '-') | |
288 | && ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description']) | |
21979ff1 A |
289 | ) { |
290 | $found = false; | |
291 | } | |
292 | } | |
293 | ||
294 | if ($found) { | |
01878a75 | 295 | $filtered[$key] = $link; |
822bffce A |
296 | } |
297 | } | |
822bffce A |
298 | return $filtered; |
299 | } | |
300 | ||
7d86f40b A |
301 | /** |
302 | * Return only links without any tag. | |
303 | * | |
304 | * @param string $visibility return only all/private/public links. | |
305 | * | |
306 | * @return array filtered links. | |
307 | */ | |
308 | public function filterUntagged($visibility) | |
309 | { | |
310 | $filtered = []; | |
311 | foreach ($this->links as $key => $link) { | |
312 | if ($visibility !== 'all') { | |
313 | if (! $link['private'] && $visibility === 'private') { | |
314 | continue; | |
315 | } else if ($link['private'] && $visibility === 'public') { | |
316 | continue; | |
317 | } | |
318 | } | |
319 | ||
320 | if (empty(trim($link['tags']))) { | |
321 | $filtered[$key] = $link; | |
322 | } | |
323 | } | |
324 | ||
325 | return $filtered; | |
326 | } | |
327 | ||
822bffce A |
328 | /** |
329 | * Returns the list of articles for a given day, chronologically sorted | |
330 | * | |
331 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | |
332 | * print_r($mydb->filterDay('20120125')); | |
333 | * | |
334 | * @param string $day day to filter. | |
335 | * | |
336 | * @return array all link matching given day. | |
337 | * | |
338 | * @throws Exception if date format is invalid. | |
339 | */ | |
340 | public function filterDay($day) | |
341 | { | |
342 | if (! checkDateFormat('Ymd', $day)) { | |
343 | throw new Exception('Invalid date format'); | |
344 | } | |
345 | ||
346 | $filtered = array(); | |
01878a75 A |
347 | foreach ($this->links as $key => $l) { |
348 | if ($l['created']->format('Ymd') == $day) { | |
349 | $filtered[$key] = $l; | |
822bffce A |
350 | } |
351 | } | |
01878a75 A |
352 | |
353 | // sort by date ASC | |
354 | return array_reverse($filtered, true); | |
822bffce A |
355 | } |
356 | ||
9ccca401 A |
357 | /** |
358 | * Check if a tag is found in the taglist, or as an hashtag in the link description. | |
359 | * | |
360 | * @param string $tag Tag to search. | |
361 | * @param array $taglist List of tags for the current link. | |
362 | * @param string $description Link description. | |
363 | * | |
364 | * @return bool True if found, false otherwise. | |
365 | */ | |
366 | protected function searchTagAndHashTag($tag, $taglist, $description) | |
367 | { | |
368 | if (in_array($tag, $taglist)) { | |
369 | return true; | |
370 | } | |
371 | ||
372 | if (preg_match('/(^| )#'. $tag .'([^'. self::$HASHTAG_CHARS .']|$)/mui', $description) > 0) { | |
373 | return true; | |
374 | } | |
375 | ||
376 | return false; | |
377 | } | |
378 | ||
822bffce A |
379 | /** |
380 | * Convert a list of tags (str) to an array. Also | |
381 | * - handle case sensitivity. | |
382 | * - accepts spaces commas as separator. | |
822bffce A |
383 | * |
384 | * @param string $tags string containing a list of tags. | |
385 | * @param bool $casesensitive will convert everything to lowercase if false. | |
386 | * | |
387 | * @return array filtered tags string. | |
7f96d9ec | 388 | */ |
21979ff1 | 389 | public static function tagsStrToArray($tags, $casesensitive) |
822bffce A |
390 | { |
391 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | |
392 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | |
393 | $tagsOut = str_replace(',', ' ', $tagsOut); | |
394 | ||
b3051a6a | 395 | return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); |
822bffce A |
396 | } |
397 | } | |
528a6f8a A |
398 | |
399 | class LinkNotFoundException extends Exception | |
400 | { | |
401 | protected $message = 'The link you are trying to reach does not exist or has been deleted.'; | |
402 | } |