diff options
author | Arthur <arthur@hoa.ro> | 2016-01-06 19:57:42 +0100 |
---|---|---|
committer | Arthur <arthur@hoa.ro> | 2016-01-06 19:57:42 +0100 |
commit | 88c15abb2a2b24f05ca926c0ddbdec18a407e47d (patch) | |
tree | 90f783c69dc51866dc01dc34e740023bd1e0c4ec /application | |
parent | a327d891b3a2762bb6aabba3a6572b077f6003c0 (diff) | |
parent | eefb636cea7acef9ddfd02a90749820f5fafc9f6 (diff) | |
download | Shaarli-88c15abb2a2b24f05ca926c0ddbdec18a407e47d.tar.gz Shaarli-88c15abb2a2b24f05ca926c0ddbdec18a407e47d.tar.zst Shaarli-88c15abb2a2b24f05ca926c0ddbdec18a407e47d.zip |
Merge pull request #424 from ArthurHoaro/search
Link filter refactoring
Diffstat (limited to 'application')
-rw-r--r-- | application/LinkDB.php | 120 | ||||
-rw-r--r-- | application/LinkFilter.php | 259 | ||||
-rw-r--r-- | application/Utils.php | 12 |
3 files changed, 281 insertions, 110 deletions
diff --git a/application/LinkDB.php b/application/LinkDB.php index 51fa926d..16848519 100644 --- a/application/LinkDB.php +++ b/application/LinkDB.php | |||
@@ -63,6 +63,11 @@ class LinkDB implements Iterator, Countable, ArrayAccess | |||
63 | private $_redirector; | 63 | private $_redirector; |
64 | 64 | ||
65 | /** | 65 | /** |
66 | * @var LinkFilter instance. | ||
67 | */ | ||
68 | private $linkFilter; | ||
69 | |||
70 | /** | ||
66 | * Creates a new LinkDB | 71 | * Creates a new LinkDB |
67 | * | 72 | * |
68 | * Checks if the datastore exists; else, attempts to create a dummy one. | 73 | * Checks if the datastore exists; else, attempts to create a dummy one. |
@@ -80,6 +85,7 @@ class LinkDB implements Iterator, Countable, ArrayAccess | |||
80 | $this->_redirector = $redirector; | 85 | $this->_redirector = $redirector; |
81 | $this->_checkDB(); | 86 | $this->_checkDB(); |
82 | $this->_readDB(); | 87 | $this->_readDB(); |
88 | $this->linkFilter = new LinkFilter($this->_links); | ||
83 | } | 89 | } |
84 | 90 | ||
85 | /** | 91 | /** |
@@ -334,114 +340,18 @@ You use the community supported version of the original Shaarli project, by Seba | |||
334 | } | 340 | } |
335 | 341 | ||
336 | /** | 342 | /** |
337 | * Returns the list of links corresponding to a full-text search | 343 | * Filter links. |
338 | * | 344 | * |
339 | * Searches: | 345 | * @param string $type Type of filter. |
340 | * - in the URLs, title and description; | 346 | * @param mixed $request Search request, string or array. |
341 | * - are case-insensitive. | 347 | * @param bool $casesensitive Optional: Perform case sensitive filter |
342 | * | 348 | * @param bool $privateonly Optional: Returns private links only if true. |
343 | * Example: | ||
344 | * print_r($mydb->filterFulltext('hollandais')); | ||
345 | * | ||
346 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | ||
347 | * - allows to perform searches on Unicode text | ||
348 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | ||
349 | */ | ||
350 | public function filterFulltext($searchterms) | ||
351 | { | ||
352 | // FIXME: explode(' ',$searchterms) and perform a AND search. | ||
353 | // FIXME: accept double-quotes to search for a string "as is"? | ||
354 | $filtered = array(); | ||
355 | $search = mb_convert_case($searchterms, MB_CASE_LOWER, 'UTF-8'); | ||
356 | $keys = array('title', 'description', 'url', 'tags'); | ||
357 | |||
358 | foreach ($this->_links as $link) { | ||
359 | $found = false; | ||
360 | |||
361 | foreach ($keys as $key) { | ||
362 | if (strpos(mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'), | ||
363 | $search) !== false) { | ||
364 | $found = true; | ||
365 | } | ||
366 | } | ||
367 | |||
368 | if ($found) { | ||
369 | $filtered[$link['linkdate']] = $link; | ||
370 | } | ||
371 | } | ||
372 | krsort($filtered); | ||
373 | return $filtered; | ||
374 | } | ||
375 | |||
376 | /** | ||
377 | * Returns the list of links associated with a given list of tags | ||
378 | * | 349 | * |
379 | * You can specify one or more tags, separated by space or a comma, e.g. | 350 | * @return array filtered links |
380 | * print_r($mydb->filterTags('linux programming')); | ||
381 | */ | 351 | */ |
382 | public function filterTags($tags, $casesensitive=false) | 352 | public function filter($type, $request, $casesensitive = false, $privateonly = false) { |
383 | { | 353 | $requestFilter = is_array($request) ? implode(' ', $request) : $request; |
384 | // Same as above, we use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | 354 | return $this->linkFilter->filter($type, trim($requestFilter), $casesensitive, $privateonly); |
385 | // FIXME: is $casesensitive ever true? | ||
386 | $t = str_replace( | ||
387 | ',', ' ', | ||
388 | ($casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8')) | ||
389 | ); | ||
390 | |||
391 | $searchtags = explode(' ', $t); | ||
392 | $filtered = array(); | ||
393 | |||
394 | foreach ($this->_links as $l) { | ||
395 | $linktags = explode( | ||
396 | ' ', | ||
397 | ($casesensitive ? $l['tags']:mb_convert_case($l['tags'], MB_CASE_LOWER, 'UTF-8')) | ||
398 | ); | ||
399 | |||
400 | if (count(array_intersect($linktags, $searchtags)) == count($searchtags)) { | ||
401 | $filtered[$l['linkdate']] = $l; | ||
402 | } | ||
403 | } | ||
404 | krsort($filtered); | ||
405 | return $filtered; | ||
406 | } | ||
407 | |||
408 | |||
409 | /** | ||
410 | * Returns the list of articles for a given day, chronologically sorted | ||
411 | * | ||
412 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | ||
413 | * print_r($mydb->filterDay('20120125')); | ||
414 | */ | ||
415 | public function filterDay($day) | ||
416 | { | ||
417 | if (! checkDateFormat('Ymd', $day)) { | ||
418 | throw new Exception('Invalid date format'); | ||
419 | } | ||
420 | |||
421 | $filtered = array(); | ||
422 | foreach ($this->_links as $l) { | ||
423 | if (startsWith($l['linkdate'], $day)) { | ||
424 | $filtered[$l['linkdate']] = $l; | ||
425 | } | ||
426 | } | ||
427 | ksort($filtered); | ||
428 | return $filtered; | ||
429 | } | ||
430 | |||
431 | /** | ||
432 | * Returns the article corresponding to a smallHash | ||
433 | */ | ||
434 | public function filterSmallHash($smallHash) | ||
435 | { | ||
436 | $filtered = array(); | ||
437 | foreach ($this->_links as $l) { | ||
438 | if ($smallHash == smallHash($l['linkdate'])) { | ||
439 | // Yes, this is ugly and slow | ||
440 | $filtered[$l['linkdate']] = $l; | ||
441 | return $filtered; | ||
442 | } | ||
443 | } | ||
444 | return $filtered; | ||
445 | } | 355 | } |
446 | 356 | ||
447 | /** | 357 | /** |
diff --git a/application/LinkFilter.php b/application/LinkFilter.php new file mode 100644 index 00000000..cf647371 --- /dev/null +++ b/application/LinkFilter.php | |||
@@ -0,0 +1,259 @@ | |||
1 | <?php | ||
2 | |||
3 | /** | ||
4 | * Class LinkFilter. | ||
5 | * | ||
6 | * Perform search and filter operation on link data list. | ||
7 | */ | ||
8 | class LinkFilter | ||
9 | { | ||
10 | /** | ||
11 | * @var string permalinks. | ||
12 | */ | ||
13 | public static $FILTER_HASH = 'permalink'; | ||
14 | |||
15 | /** | ||
16 | * @var string text search. | ||
17 | */ | ||
18 | public static $FILTER_TEXT = 'fulltext'; | ||
19 | |||
20 | /** | ||
21 | * @var string tag filter. | ||
22 | */ | ||
23 | public static $FILTER_TAG = 'tags'; | ||
24 | |||
25 | /** | ||
26 | * @var string filter by day. | ||
27 | */ | ||
28 | public static $FILTER_DAY = 'FILTER_DAY'; | ||
29 | |||
30 | /** | ||
31 | * @var array all available links. | ||
32 | */ | ||
33 | private $links; | ||
34 | |||
35 | /** | ||
36 | * @param array $links initialization. | ||
37 | */ | ||
38 | public function __construct($links) | ||
39 | { | ||
40 | $this->links = $links; | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * Filter links according to parameters. | ||
45 | * | ||
46 | * @param string $type Type of filter (eg. tags, permalink, etc.). | ||
47 | * @param string $request Filter content. | ||
48 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. | ||
49 | * @param bool $privateonly Optional: Only returns private links if true. | ||
50 | * | ||
51 | * @return array filtered link list. | ||
52 | */ | ||
53 | public function filter($type, $request, $casesensitive = false, $privateonly = false) | ||
54 | { | ||
55 | switch($type) { | ||
56 | case self::$FILTER_HASH: | ||
57 | return $this->filterSmallHash($request); | ||
58 | break; | ||
59 | case self::$FILTER_TEXT: | ||
60 | return $this->filterFulltext($request, $privateonly); | ||
61 | break; | ||
62 | case self::$FILTER_TAG: | ||
63 | return $this->filterTags($request, $casesensitive, $privateonly); | ||
64 | break; | ||
65 | case self::$FILTER_DAY: | ||
66 | return $this->filterDay($request); | ||
67 | break; | ||
68 | default: | ||
69 | return $this->noFilter($privateonly); | ||
70 | } | ||
71 | } | ||
72 | |||
73 | /** | ||
74 | * Unknown filter, but handle private only. | ||
75 | * | ||
76 | * @param bool $privateonly returns private link only if true. | ||
77 | * | ||
78 | * @return array filtered links. | ||
79 | */ | ||
80 | private function noFilter($privateonly = false) | ||
81 | { | ||
82 | if (! $privateonly) { | ||
83 | krsort($this->links); | ||
84 | return $this->links; | ||
85 | } | ||
86 | |||
87 | $out = array(); | ||
88 | foreach ($this->links as $value) { | ||
89 | if ($value['private']) { | ||
90 | $out[$value['linkdate']] = $value; | ||
91 | } | ||
92 | } | ||
93 | |||
94 | krsort($out); | ||
95 | return $out; | ||
96 | } | ||
97 | |||
98 | /** | ||
99 | * Returns the shaare corresponding to a smallHash. | ||
100 | * | ||
101 | * @param string $smallHash permalink hash. | ||
102 | * | ||
103 | * @return array $filtered array containing permalink data. | ||
104 | */ | ||
105 | private function filterSmallHash($smallHash) | ||
106 | { | ||
107 | $filtered = array(); | ||
108 | foreach ($this->links as $l) { | ||
109 | if ($smallHash == smallHash($l['linkdate'])) { | ||
110 | // Yes, this is ugly and slow | ||
111 | $filtered[$l['linkdate']] = $l; | ||
112 | return $filtered; | ||
113 | } | ||
114 | } | ||
115 | return $filtered; | ||
116 | } | ||
117 | |||
118 | /** | ||
119 | * Returns the list of links corresponding to a full-text search | ||
120 | * | ||
121 | * Searches: | ||
122 | * - in the URLs, title and description; | ||
123 | * - are case-insensitive. | ||
124 | * | ||
125 | * Example: | ||
126 | * print_r($mydb->filterFulltext('hollandais')); | ||
127 | * | ||
128 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | ||
129 | * - allows to perform searches on Unicode text | ||
130 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | ||
131 | * | ||
132 | * @param string $searchterms search query. | ||
133 | * @param bool $privateonly return only private links if true. | ||
134 | * | ||
135 | * @return array search results. | ||
136 | */ | ||
137 | private function filterFulltext($searchterms, $privateonly = false) | ||
138 | { | ||
139 | // FIXME: explode(' ',$searchterms) and perform a AND search. | ||
140 | // FIXME: accept double-quotes to search for a string "as is"? | ||
141 | $filtered = array(); | ||
142 | $search = mb_convert_case($searchterms, MB_CASE_LOWER, 'UTF-8'); | ||
143 | $explodedSearch = explode(' ', trim($search)); | ||
144 | $keys = array('title', 'description', 'url', 'tags'); | ||
145 | |||
146 | // Iterate over every stored link. | ||
147 | foreach ($this->links as $link) { | ||
148 | $found = false; | ||
149 | |||
150 | // ignore non private links when 'privatonly' is on. | ||
151 | if (! $link['private'] && $privateonly === true) { | ||
152 | continue; | ||
153 | } | ||
154 | |||
155 | // Iterate over searchable link fields. | ||
156 | foreach ($keys as $key) { | ||
157 | // Search full expression. | ||
158 | if (strpos( | ||
159 | mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'), | ||
160 | $search | ||
161 | ) !== false) { | ||
162 | $found = true; | ||
163 | } | ||
164 | |||
165 | if ($found) { | ||
166 | break; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | if ($found) { | ||
171 | $filtered[$link['linkdate']] = $link; | ||
172 | } | ||
173 | } | ||
174 | |||
175 | krsort($filtered); | ||
176 | return $filtered; | ||
177 | } | ||
178 | |||
179 | /** | ||
180 | * Returns the list of links associated with a given list of tags | ||
181 | * | ||
182 | * You can specify one or more tags, separated by space or a comma, e.g. | ||
183 | * print_r($mydb->filterTags('linux programming')); | ||
184 | * | ||
185 | * @param string $tags list of tags separated by commas or blank spaces. | ||
186 | * @param bool $casesensitive ignore case if false. | ||
187 | * @param bool $privateonly returns private links only. | ||
188 | * | ||
189 | * @return array filtered links. | ||
190 | */ | ||
191 | public function filterTags($tags, $casesensitive = false, $privateonly = false) | ||
192 | { | ||
193 | $searchtags = $this->tagsStrToArray($tags, $casesensitive); | ||
194 | $filtered = array(); | ||
195 | |||
196 | foreach ($this->links as $l) { | ||
197 | // ignore non private links when 'privatonly' is on. | ||
198 | if (! $l['private'] && $privateonly === true) { | ||
199 | continue; | ||
200 | } | ||
201 | |||
202 | $linktags = $this->tagsStrToArray($l['tags'], $casesensitive); | ||
203 | |||
204 | if (count(array_intersect($linktags, $searchtags)) == count($searchtags)) { | ||
205 | $filtered[$l['linkdate']] = $l; | ||
206 | } | ||
207 | } | ||
208 | krsort($filtered); | ||
209 | return $filtered; | ||
210 | } | ||
211 | |||
212 | /** | ||
213 | * Returns the list of articles for a given day, chronologically sorted | ||
214 | * | ||
215 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | ||
216 | * print_r($mydb->filterDay('20120125')); | ||
217 | * | ||
218 | * @param string $day day to filter. | ||
219 | * | ||
220 | * @return array all link matching given day. | ||
221 | * | ||
222 | * @throws Exception if date format is invalid. | ||
223 | */ | ||
224 | public function filterDay($day) | ||
225 | { | ||
226 | if (! checkDateFormat('Ymd', $day)) { | ||
227 | throw new Exception('Invalid date format'); | ||
228 | } | ||
229 | |||
230 | $filtered = array(); | ||
231 | foreach ($this->links as $l) { | ||
232 | if (startsWith($l['linkdate'], $day)) { | ||
233 | $filtered[$l['linkdate']] = $l; | ||
234 | } | ||
235 | } | ||
236 | ksort($filtered); | ||
237 | return $filtered; | ||
238 | } | ||
239 | |||
240 | /** | ||
241 | * Convert a list of tags (str) to an array. Also | ||
242 | * - handle case sensitivity. | ||
243 | * - accepts spaces commas as separator. | ||
244 | * - remove private tags for loggedout users. | ||
245 | * | ||
246 | * @param string $tags string containing a list of tags. | ||
247 | * @param bool $casesensitive will convert everything to lowercase if false. | ||
248 | * | ||
249 | * @return array filtered tags string. | ||
250 | */ | ||
251 | public function tagsStrToArray($tags, $casesensitive) | ||
252 | { | ||
253 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | ||
254 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | ||
255 | $tagsOut = str_replace(',', ' ', $tagsOut); | ||
256 | |||
257 | return explode(' ', trim($tagsOut)); | ||
258 | } | ||
259 | } | ||
diff --git a/application/Utils.php b/application/Utils.php index f84f70e4..aeaef9ff 100644 --- a/application/Utils.php +++ b/application/Utils.php | |||
@@ -72,12 +72,14 @@ function sanitizeLink(&$link) | |||
72 | 72 | ||
73 | /** | 73 | /** |
74 | * Checks if a string represents a valid date | 74 | * Checks if a string represents a valid date |
75 | |||
76 | * @param string $format The expected DateTime format of the string | ||
77 | * @param string $string A string-formatted date | ||
78 | * | ||
79 | * @return bool whether the string is a valid date | ||
75 | * | 80 | * |
76 | * @param string a string-formatted date | 81 | * @see http://php.net/manual/en/class.datetime.php |
77 | * @param format the expected DateTime format of the string | 82 | * @see http://php.net/manual/en/datetime.createfromformat.php |
78 | * @return whether the string is a valid date | ||
79 | * @see http://php.net/manual/en/class.datetime.php | ||
80 | * @see http://php.net/manual/en/datetime.createfromformat.php | ||
81 | */ | 83 | */ |
82 | function checkDateFormat($format, $string) | 84 | function checkDateFormat($format, $string) |
83 | { | 85 | { |