aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/LinkFilter.php
diff options
context:
space:
mode:
authorWilli Eggeling <thewilli@gmail.com>2017-08-26 23:05:02 +0200
committerWilli Eggeling <thewilli@gmail.com>2017-08-30 13:20:22 +0200
commit341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8 (patch)
tree19527ce7721d87d553c369705d8b1d69ee5d5777 /application/LinkFilter.php
parentfc27141cf6eb04d3d8714385cb6961a8063fe61b (diff)
downloadShaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.tar.gz
Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.tar.zst
Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.zip
wildcard tag search support
- when searching for tags you can now include '*' as wildcard placeholder - new search reduces overall overhead when filtering for tags - fixed combination with description tag search ('#' prefix) - tests added
Diffstat (limited to 'application/LinkFilter.php')
-rw-r--r--application/LinkFilter.php131
1 files changed, 84 insertions, 47 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php
index 95519528..99ecd1e2 100644
--- a/application/LinkFilter.php
+++ b/application/LinkFilter.php
@@ -250,6 +250,51 @@ class LinkFilter
250 } 250 }
251 251
252 /** 252 /**
253 * generate a regex fragment out of a tag
254 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
255 * @return string generated regex fragment
256 */
257 private static function tag2regex($tag)
258 {
259 $len = strlen($tag);
260 if(!$len || $tag === "-" || $tag === "*"){
261 // nothing to search, return empty regex
262 return '';
263 }
264 if($tag[0] === "-") {
265 // query is negated
266 $i = 1; // use offset to start after '-' character
267 $regex = '(?!'; // create negative lookahead
268 } else {
269 $i = 0; // start at first character
270 $regex = '(?='; // use positive lookahead
271 }
272 $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning
273 // iterate over string, separating it into placeholder and content
274 for(; $i < $len; $i++){
275 if($tag[$i] === '*'){
276 // placeholder found
277 $regex .= '[^ ]*?';
278 } else {
279 // regular characters
280 $offset = strpos($tag, '*', $i);
281 if($offset === false){
282 // no placeholder found, set offset to end of string
283 $offset = $len;
284 }
285 // subtract one, as we want to get before the placeholder or end of string
286 $offset -= 1;
287 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
288 $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
289 // move $i on
290 $i = $offset;
291 }
292 }
293 $regex .= '(?:$| ))'; // after the tag may only be a space or the end
294 return $regex;
295 }
296
297 /**
253 * Returns the list of links associated with a given list of tags 298 * Returns the list of links associated with a given list of tags
254 * 299 *
255 * You can specify one or more tags, separated by space or a comma, e.g. 300 * You can specify one or more tags, separated by space or a comma, e.g.
@@ -263,20 +308,32 @@ class LinkFilter
263 */ 308 */
264 public function filterTags($tags, $casesensitive = false, $visibility = 'all') 309 public function filterTags($tags, $casesensitive = false, $visibility = 'all')
265 { 310 {
266 // Implode if array for clean up. 311 // get single tags (we may get passed an array, even though the docs say different)
267 $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; 312 $inputTags = $tags;
268 if (empty($tags)) { 313 if(!is_array($tags)) {
314 // we got an input string, split tags
315 $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY);
316 }
317
318 if(!count($inputTags)){
319 // no input tags
269 return $this->noFilter($visibility); 320 return $this->noFilter($visibility);
270 } 321 }
271 322
272 $searchtags = self::tagsStrToArray($tags, $casesensitive); 323 // build regex from all tags
273 $filtered = array(); 324 $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/';
274 if (empty($searchtags)) { 325 if(!$casesensitive) {
275 return $filtered; 326 // make regex case insensitive
327 $re .= 'i';
276 } 328 }
277 329
330 // create resulting array
331 $filtered = array();
332
333 // iterate over each link
278 foreach ($this->links as $key => $link) { 334 foreach ($this->links as $key => $link) {
279 // ignore non private links when 'privatonly' is on. 335 // check level of visibility
336 // ignore non private links when 'privateonly' is on.
280 if ($visibility !== 'all') { 337 if ($visibility !== 'all') {
281 if (! $link['private'] && $visibility === 'private') { 338 if (! $link['private'] && $visibility === 'private') {
282 continue; 339 continue;
@@ -284,25 +341,27 @@ class LinkFilter
284 continue; 341 continue;
285 } 342 }
286 } 343 }
287 344 $search = $link['tags']; // build search string, start with tags of current link
288 $linktags = self::tagsStrToArray($link['tags'], $casesensitive); 345 if(strlen(trim($link['description'])) && strpos($link['description'], '#') !== false){
289 346 // description given and at least one possible tag found
290 $found = true; 347 $descTags = array();
291 for ($i = 0 ; $i < count($searchtags) && $found; $i++) { 348 // find all tags in the form of #tag in the description
292 // Exclusive search, quit if tag found. 349 preg_match_all(
293 // Or, tag not found in the link, quit. 350 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
294 if (($searchtags[$i][0] == '-' 351 $link['description'],
295 && $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description'])) 352 $descTags
296 || ($searchtags[$i][0] != '-') 353 );
297 && ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description']) 354 if(count($descTags[1])){
298 ) { 355 // there were some tags in the description, add them to the search string
299 $found = false; 356 $search .= ' ' . implode(' ', $descTags[1]);
300 } 357 }
358 };
359 // match regular expression with search string
360 if(!preg_match($re, $search)){
361 // this entry does _not_ match our regex
362 continue;
301 } 363 }
302 364 $filtered[$key] = $link;
303 if ($found) {
304 $filtered[$key] = $link;
305 }
306 } 365 }
307 return $filtered; 366 return $filtered;
308 } 367 }
@@ -364,28 +423,6 @@ class LinkFilter
364 } 423 }
365 424
366 /** 425 /**
367 * Check if a tag is found in the taglist, or as an hashtag in the link description.
368 *
369 * @param string $tag Tag to search.
370 * @param array $taglist List of tags for the current link.
371 * @param string $description Link description.
372 *
373 * @return bool True if found, false otherwise.
374 */
375 protected function searchTagAndHashTag($tag, $taglist, $description)
376 {
377 if (in_array($tag, $taglist)) {
378 return true;
379 }
380
381 if (preg_match('/(^| )#'. $tag .'([^'. self::$HASHTAG_CHARS .']|$)/mui', $description) > 0) {
382 return true;
383 }
384
385 return false;
386 }
387
388 /**
389 * Convert a list of tags (str) to an array. Also 426 * Convert a list of tags (str) to an array. Also
390 * - handle case sensitivity. 427 * - handle case sensitivity.
391 * - accepts spaces commas as separator. 428 * - accepts spaces commas as separator.