]>
Commit | Line | Data |
---|---|---|
822bffce A |
1 | <?php |
2 | ||
3 | /** | |
4 | * Class LinkFilter. | |
5 | * | |
6 | * Perform search and filter operation on link data list. | |
7 | */ | |
8 | class LinkFilter | |
9 | { | |
10 | /** | |
11 | * @var string permalinks. | |
12 | */ | |
13 | public static $FILTER_HASH = 'permalink'; | |
14 | ||
15 | /** | |
16 | * @var string text search. | |
17 | */ | |
18 | public static $FILTER_TEXT = 'fulltext'; | |
19 | ||
20 | /** | |
21 | * @var string tag filter. | |
22 | */ | |
23 | public static $FILTER_TAG = 'tags'; | |
24 | ||
25 | /** | |
26 | * @var string filter by day. | |
27 | */ | |
28 | public static $FILTER_DAY = 'FILTER_DAY'; | |
29 | ||
30 | /** | |
31 | * @var array all available links. | |
32 | */ | |
33 | private $links; | |
34 | ||
35 | /** | |
36 | * @param array $links initialization. | |
37 | */ | |
38 | public function __construct($links) | |
39 | { | |
40 | $this->links = $links; | |
41 | } | |
42 | ||
43 | /** | |
44 | * Filter links according to parameters. | |
45 | * | |
46 | * @param string $type Type of filter (eg. tags, permalink, etc.). | |
47 | * @param string $request Filter content. | |
48 | * @param bool $casesensitive Optional: Perform case sensitive filter if true. | |
49 | * @param bool $privateonly Optional: Only returns private links if true. | |
50 | * | |
51 | * @return array filtered link list. | |
52 | */ | |
53 | public function filter($type, $request, $casesensitive = false, $privateonly = false) | |
54 | { | |
55 | switch($type) { | |
56 | case self::$FILTER_HASH: | |
57 | return $this->filterSmallHash($request); | |
c51fae92 A |
58 | case self::$FILTER_TAG | self::$FILTER_TEXT: |
59 | if (!empty($request)) { | |
60 | $filtered = $this->links; | |
61 | if (isset($request[0])) { | |
62 | $filtered = $this->filterTags($request[0], $casesensitive, $privateonly); | |
63 | } | |
64 | if (isset($request[1])) { | |
65 | $lf = new LinkFilter($filtered); | |
66 | $filtered = $lf->filterFulltext($request[1], $privateonly); | |
67 | } | |
68 | return $filtered; | |
69 | } | |
70 | return $this->noFilter($privateonly); | |
822bffce A |
71 | case self::$FILTER_TEXT: |
72 | return $this->filterFulltext($request, $privateonly); | |
822bffce A |
73 | case self::$FILTER_TAG: |
74 | return $this->filterTags($request, $casesensitive, $privateonly); | |
822bffce A |
75 | case self::$FILTER_DAY: |
76 | return $this->filterDay($request); | |
822bffce A |
77 | default: |
78 | return $this->noFilter($privateonly); | |
79 | } | |
80 | } | |
81 | ||
82 | /** | |
83 | * Unknown filter, but handle private only. | |
84 | * | |
85 | * @param bool $privateonly returns private link only if true. | |
86 | * | |
87 | * @return array filtered links. | |
88 | */ | |
89 | private function noFilter($privateonly = false) | |
90 | { | |
91 | if (! $privateonly) { | |
92 | krsort($this->links); | |
93 | return $this->links; | |
94 | } | |
95 | ||
96 | $out = array(); | |
97 | foreach ($this->links as $value) { | |
98 | if ($value['private']) { | |
99 | $out[$value['linkdate']] = $value; | |
100 | } | |
101 | } | |
102 | ||
103 | krsort($out); | |
104 | return $out; | |
105 | } | |
106 | ||
107 | /** | |
108 | * Returns the shaare corresponding to a smallHash. | |
109 | * | |
110 | * @param string $smallHash permalink hash. | |
111 | * | |
112 | * @return array $filtered array containing permalink data. | |
113 | */ | |
114 | private function filterSmallHash($smallHash) | |
115 | { | |
116 | $filtered = array(); | |
117 | foreach ($this->links as $l) { | |
118 | if ($smallHash == smallHash($l['linkdate'])) { | |
119 | // Yes, this is ugly and slow | |
120 | $filtered[$l['linkdate']] = $l; | |
121 | return $filtered; | |
122 | } | |
123 | } | |
124 | return $filtered; | |
125 | } | |
126 | ||
127 | /** | |
128 | * Returns the list of links corresponding to a full-text search | |
129 | * | |
130 | * Searches: | |
131 | * - in the URLs, title and description; | |
bedd176a A |
132 | * - are case-insensitive; |
133 | * - terms surrounded by quotes " are exact terms search. | |
134 | * - terms starting with a dash - are excluded (except exact terms). | |
822bffce A |
135 | * |
136 | * Example: | |
137 | * print_r($mydb->filterFulltext('hollandais')); | |
138 | * | |
139 | * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') | |
140 | * - allows to perform searches on Unicode text | |
141 | * - see https://github.com/shaarli/Shaarli/issues/75 for examples | |
142 | * | |
143 | * @param string $searchterms search query. | |
144 | * @param bool $privateonly return only private links if true. | |
145 | * | |
146 | * @return array search results. | |
147 | */ | |
148 | private function filterFulltext($searchterms, $privateonly = false) | |
149 | { | |
c51fae92 A |
150 | if (empty($searchterms)) { |
151 | return $this->links; | |
152 | } | |
153 | ||
522b278b | 154 | $filtered = array(); |
ebd8075a | 155 | $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); |
bedd176a A |
156 | $exactRegex = '/"([^"]+)"/'; |
157 | // Retrieve exact search terms. | |
158 | preg_match_all($exactRegex, $search, $exactSearch); | |
159 | $exactSearch = array_values(array_filter($exactSearch[1])); | |
160 | ||
161 | // Remove exact search terms to get AND terms search. | |
162 | $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); | |
163 | $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); | |
164 | ||
165 | // Filter excluding terms and update andSearch. | |
166 | $excludeSearch = array(); | |
167 | $andSearch = array(); | |
168 | foreach ($explodedSearchAnd as $needle) { | |
169 | if ($needle[0] == '-' && strlen($needle) > 1) { | |
170 | $excludeSearch[] = substr($needle, 1); | |
171 | } else { | |
172 | $andSearch[] = $needle; | |
173 | } | |
174 | } | |
175 | ||
822bffce | 176 | $keys = array('title', 'description', 'url', 'tags'); |
ebd8075a | 177 | |
822bffce A |
178 | // Iterate over every stored link. |
179 | foreach ($this->links as $link) { | |
822bffce A |
180 | |
181 | // ignore non private links when 'privatonly' is on. | |
182 | if (! $link['private'] && $privateonly === true) { | |
183 | continue; | |
184 | } | |
185 | ||
522b278b A |
186 | // Concatenate link fields to search across fields. |
187 | // Adds a '\' separator for exact search terms. | |
188 | $content = ''; | |
822bffce | 189 | foreach ($keys as $key) { |
522b278b | 190 | $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; |
822bffce | 191 | } |
522b278b A |
192 | |
193 | // Be optimistic | |
194 | $found = true; | |
195 | ||
196 | // First, we look for exact term search | |
197 | for ($i = 0; $i < count($exactSearch) && $found; $i++) { | |
198 | $found = strpos($content, $exactSearch[$i]) !== false; | |
199 | } | |
200 | ||
201 | // Iterate over keywords, if keyword is not found, | |
202 | // no need to check for the others. We want all or nothing. | |
203 | for ($i = 0; $i < count($andSearch) && $found; $i++) { | |
204 | $found = strpos($content, $andSearch[$i]) !== false; | |
205 | } | |
206 | ||
207 | // Exclude terms. | |
208 | for ($i = 0; $i < count($excludeSearch) && $found; $i++) { | |
209 | $found = strpos($content, $excludeSearch[$i]) === false; | |
210 | } | |
211 | ||
822bffce A |
212 | if ($found) { |
213 | $filtered[$link['linkdate']] = $link; | |
214 | } | |
215 | } | |
216 | ||
217 | krsort($filtered); | |
218 | return $filtered; | |
219 | } | |
220 | ||
221 | /** | |
222 | * Returns the list of links associated with a given list of tags | |
223 | * | |
224 | * You can specify one or more tags, separated by space or a comma, e.g. | |
225 | * print_r($mydb->filterTags('linux programming')); | |
226 | * | |
227 | * @param string $tags list of tags separated by commas or blank spaces. | |
228 | * @param bool $casesensitive ignore case if false. | |
229 | * @param bool $privateonly returns private links only. | |
230 | * | |
231 | * @return array filtered links. | |
232 | */ | |
233 | public function filterTags($tags, $casesensitive = false, $privateonly = false) | |
234 | { | |
c51fae92 A |
235 | // Implode if array for clean up. |
236 | $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; | |
237 | if (empty($tags)) { | |
238 | return $this->links; | |
239 | } | |
240 | ||
21979ff1 | 241 | $searchtags = self::tagsStrToArray($tags, $casesensitive); |
822bffce | 242 | $filtered = array(); |
21979ff1 A |
243 | if (empty($searchtags)) { |
244 | return $filtered; | |
245 | } | |
822bffce | 246 | |
21979ff1 | 247 | foreach ($this->links as $link) { |
822bffce | 248 | // ignore non private links when 'privatonly' is on. |
21979ff1 | 249 | if (! $link['private'] && $privateonly === true) { |
822bffce A |
250 | continue; |
251 | } | |
252 | ||
21979ff1 | 253 | $linktags = self::tagsStrToArray($link['tags'], $casesensitive); |
822bffce | 254 | |
21979ff1 A |
255 | $found = true; |
256 | for ($i = 0 ; $i < count($searchtags) && $found; $i++) { | |
257 | // Exclusive search, quit if tag found. | |
258 | // Or, tag not found in the link, quit. | |
259 | if (($searchtags[$i][0] == '-' && in_array(substr($searchtags[$i], 1), $linktags)) | |
260 | || ($searchtags[$i][0] != '-') && ! in_array($searchtags[$i], $linktags) | |
261 | ) { | |
262 | $found = false; | |
263 | } | |
264 | } | |
265 | ||
266 | if ($found) { | |
267 | $filtered[$link['linkdate']] = $link; | |
822bffce A |
268 | } |
269 | } | |
270 | krsort($filtered); | |
271 | return $filtered; | |
272 | } | |
273 | ||
274 | /** | |
275 | * Returns the list of articles for a given day, chronologically sorted | |
276 | * | |
277 | * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. | |
278 | * print_r($mydb->filterDay('20120125')); | |
279 | * | |
280 | * @param string $day day to filter. | |
281 | * | |
282 | * @return array all link matching given day. | |
283 | * | |
284 | * @throws Exception if date format is invalid. | |
285 | */ | |
286 | public function filterDay($day) | |
287 | { | |
288 | if (! checkDateFormat('Ymd', $day)) { | |
289 | throw new Exception('Invalid date format'); | |
290 | } | |
291 | ||
292 | $filtered = array(); | |
293 | foreach ($this->links as $l) { | |
294 | if (startsWith($l['linkdate'], $day)) { | |
295 | $filtered[$l['linkdate']] = $l; | |
296 | } | |
297 | } | |
298 | ksort($filtered); | |
299 | return $filtered; | |
300 | } | |
301 | ||
302 | /** | |
303 | * Convert a list of tags (str) to an array. Also | |
304 | * - handle case sensitivity. | |
305 | * - accepts spaces commas as separator. | |
822bffce A |
306 | * |
307 | * @param string $tags string containing a list of tags. | |
308 | * @param bool $casesensitive will convert everything to lowercase if false. | |
309 | * | |
310 | * @return array filtered tags string. | |
311 | */ | |
21979ff1 | 312 | public static function tagsStrToArray($tags, $casesensitive) |
822bffce A |
313 | { |
314 | // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) | |
315 | $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); | |
316 | $tagsOut = str_replace(',', ' ', $tagsOut); | |
317 | ||
21979ff1 | 318 | return array_filter(explode(' ', trim($tagsOut)), 'strlen'); |
822bffce A |
319 | } |
320 | } |