diff options
author | Willi Eggeling <thewilli@gmail.com> | 2017-08-26 23:05:02 +0200 |
---|---|---|
committer | Willi Eggeling <thewilli@gmail.com> | 2017-08-30 13:20:22 +0200 |
commit | 341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8 (patch) | |
tree | 19527ce7721d87d553c369705d8b1d69ee5d5777 | |
parent | fc27141cf6eb04d3d8714385cb6961a8063fe61b (diff) | |
download | Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.tar.gz Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.tar.zst Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.zip |
wildcard tag search support
- when searching for tags you can now include '*' as wildcard placeholder
- new search reduces overall overhead when filtering for tags
- fixed combination with description tag search ('#' prefix)
- tests added
-rw-r--r-- | application/LinkFilter.php | 131 | ||||
-rw-r--r-- | tests/api/controllers/GetLinksTest.php | 83 |
2 files changed, 167 insertions, 47 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php index 95519528..99ecd1e2 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php | |||
@@ -250,6 +250,51 @@ class LinkFilter | |||
250 | } | 250 | } |
251 | 251 | ||
252 | /** | 252 | /** |
253 | * generate a regex fragment out of a tag | ||
254 | * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard | ||
255 | * @return string generated regex fragment | ||
256 | */ | ||
257 | private static function tag2regex($tag) | ||
258 | { | ||
259 | $len = strlen($tag); | ||
260 | if(!$len || $tag === "-" || $tag === "*"){ | ||
261 | // nothing to search, return empty regex | ||
262 | return ''; | ||
263 | } | ||
264 | if($tag[0] === "-") { | ||
265 | // query is negated | ||
266 | $i = 1; // use offset to start after '-' character | ||
267 | $regex = '(?!'; // create negative lookahead | ||
268 | } else { | ||
269 | $i = 0; // start at first character | ||
270 | $regex = '(?='; // use positive lookahead | ||
271 | } | ||
272 | $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning | ||
273 | // iterate over string, separating it into placeholder and content | ||
274 | for(; $i < $len; $i++){ | ||
275 | if($tag[$i] === '*'){ | ||
276 | // placeholder found | ||
277 | $regex .= '[^ ]*?'; | ||
278 | } else { | ||
279 | // regular characters | ||
280 | $offset = strpos($tag, '*', $i); | ||
281 | if($offset === false){ | ||
282 | // no placeholder found, set offset to end of string | ||
283 | $offset = $len; | ||
284 | } | ||
285 | // subtract one, as we want to get before the placeholder or end of string | ||
286 | $offset -= 1; | ||
287 | // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. | ||
288 | $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); | ||
289 | // move $i on | ||
290 | $i = $offset; | ||
291 | } | ||
292 | } | ||
293 | $regex .= '(?:$| ))'; // after the tag may only be a space or the end | ||
294 | return $regex; | ||
295 | } | ||
296 | |||
297 | /** | ||
253 | * Returns the list of links associated with a given list of tags | 298 | * Returns the list of links associated with a given list of tags |
254 | * | 299 | * |
255 | * You can specify one or more tags, separated by space or a comma, e.g. | 300 | * You can specify one or more tags, separated by space or a comma, e.g. |
@@ -263,20 +308,32 @@ class LinkFilter | |||
263 | */ | 308 | */ |
264 | public function filterTags($tags, $casesensitive = false, $visibility = 'all') | 309 | public function filterTags($tags, $casesensitive = false, $visibility = 'all') |
265 | { | 310 | { |
266 | // Implode if array for clean up. | 311 | // get single tags (we may get passed an array, even though the docs say different) |
267 | $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; | 312 | $inputTags = $tags; |
268 | if (empty($tags)) { | 313 | if(!is_array($tags)) { |
314 | // we got an input string, split tags | ||
315 | $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); | ||
316 | } | ||
317 | |||
318 | if(!count($inputTags)){ | ||
319 | // no input tags | ||
269 | return $this->noFilter($visibility); | 320 | return $this->noFilter($visibility); |
270 | } | 321 | } |
271 | 322 | ||
272 | $searchtags = self::tagsStrToArray($tags, $casesensitive); | 323 | // build regex from all tags |
273 | $filtered = array(); | 324 | $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; |
274 | if (empty($searchtags)) { | 325 | if(!$casesensitive) { |
275 | return $filtered; | 326 | // make regex case insensitive |
327 | $re .= 'i'; | ||
276 | } | 328 | } |
277 | 329 | ||
330 | // create resulting array | ||
331 | $filtered = array(); | ||
332 | |||
333 | // iterate over each link | ||
278 | foreach ($this->links as $key => $link) { | 334 | foreach ($this->links as $key => $link) { |
279 | // ignore non private links when 'privatonly' is on. | 335 | // check level of visibility |
336 | // ignore non private links when 'privateonly' is on. | ||
280 | if ($visibility !== 'all') { | 337 | if ($visibility !== 'all') { |
281 | if (! $link['private'] && $visibility === 'private') { | 338 | if (! $link['private'] && $visibility === 'private') { |
282 | continue; | 339 | continue; |
@@ -284,25 +341,27 @@ class LinkFilter | |||
284 | continue; | 341 | continue; |
285 | } | 342 | } |
286 | } | 343 | } |
287 | 344 | $search = $link['tags']; // build search string, start with tags of current link | |
288 | $linktags = self::tagsStrToArray($link['tags'], $casesensitive); | 345 | if(strlen(trim($link['description'])) && strpos($link['description'], '#') !== false){ |
289 | 346 | // description given and at least one possible tag found | |
290 | $found = true; | 347 | $descTags = array(); |
291 | for ($i = 0 ; $i < count($searchtags) && $found; $i++) { | 348 | // find all tags in the form of #tag in the description |
292 | // Exclusive search, quit if tag found. | 349 | preg_match_all( |
293 | // Or, tag not found in the link, quit. | 350 | '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', |
294 | if (($searchtags[$i][0] == '-' | 351 | $link['description'], |
295 | && $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description'])) | 352 | $descTags |
296 | || ($searchtags[$i][0] != '-') | 353 | ); |
297 | && ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description']) | 354 | if(count($descTags[1])){ |
298 | ) { | 355 | // there were some tags in the description, add them to the search string |
299 | $found = false; | 356 | $search .= ' ' . implode(' ', $descTags[1]); |
300 | } | 357 | } |
358 | }; | ||
359 | // match regular expression with search string | ||
360 | if(!preg_match($re, $search)){ | ||
361 | // this entry does _not_ match our regex | ||
362 | continue; | ||
301 | } | 363 | } |
302 | 364 | $filtered[$key] = $link; | |
303 | if ($found) { | ||
304 | $filtered[$key] = $link; | ||
305 | } | ||
306 | } | 365 | } |
307 | return $filtered; | 366 | return $filtered; |
308 | } | 367 | } |
@@ -364,28 +423,6 @@ class LinkFilter | |||
364 | } | 423 | } |
365 | 424 | ||
366 | /** | 425 | /** |
367 | * Check if a tag is found in the taglist, or as an hashtag in the link description. | ||
368 | * | ||
369 | * @param string $tag Tag to search. | ||
370 | * @param array $taglist List of tags for the current link. | ||
371 | * @param string $description Link description. | ||
372 | * | ||
373 | * @return bool True if found, false otherwise. | ||
374 | */ | ||
375 | protected function searchTagAndHashTag($tag, $taglist, $description) | ||
376 | { | ||
377 | if (in_array($tag, $taglist)) { | ||
378 | return true; | ||
379 | } | ||
380 | |||
381 | if (preg_match('/(^| )#'. $tag .'([^'. self::$HASHTAG_CHARS .']|$)/mui', $description) > 0) { | ||
382 | return true; | ||
383 | } | ||
384 | |||
385 | return false; | ||
386 | } | ||
387 | |||
388 | /** | ||
389 | * Convert a list of tags (str) to an array. Also | 426 | * Convert a list of tags (str) to an array. Also |
390 | * - handle case sensitivity. | 427 | * - handle case sensitivity. |
391 | * - accepts spaces commas as separator. | 428 | * - accepts spaces commas as separator. |
diff --git a/tests/api/controllers/GetLinksTest.php b/tests/api/controllers/GetLinksTest.php index 4cb70224..d22ed3bf 100644 --- a/tests/api/controllers/GetLinksTest.php +++ b/tests/api/controllers/GetLinksTest.php | |||
@@ -367,6 +367,89 @@ class GetLinksTest extends \PHPUnit_Framework_TestCase | |||
367 | $this->assertEquals(1, count($data)); | 367 | $this->assertEquals(1, count($data)); |
368 | $this->assertEquals(41, $data[0]['id']); | 368 | $this->assertEquals(41, $data[0]['id']); |
369 | $this->assertEquals(self::NB_FIELDS_LINK, count($data[0])); | 369 | $this->assertEquals(self::NB_FIELDS_LINK, count($data[0])); |
370 | |||
371 | // wildcard: placeholder at the start | ||
372 | $env = Environment::mock([ | ||
373 | 'REQUEST_METHOD' => 'GET', | ||
374 | 'QUERY_STRING' => 'searchtags=*Tuff', | ||
375 | ]); | ||
376 | $request = Request::createFromEnvironment($env); | ||
377 | $response = $this->controller->getLinks($request, new Response()); | ||
378 | $this->assertEquals(200, $response->getStatusCode()); | ||
379 | $data = json_decode((string) $response->getBody(), true); | ||
380 | $this->assertEquals(2, count($data)); | ||
381 | $this->assertEquals(41, $data[0]['id']); | ||
382 | |||
383 | // wildcard: placeholder at the end | ||
384 | $env = Environment::mock([ | ||
385 | 'REQUEST_METHOD' => 'GET', | ||
386 | 'QUERY_STRING' => 'searchtags=c*', | ||
387 | ]); | ||
388 | $request = Request::createFromEnvironment($env); | ||
389 | $response = $this->controller->getLinks($request, new Response()); | ||
390 | $this->assertEquals(200, $response->getStatusCode()); | ||
391 | $data = json_decode((string) $response->getBody(), true); | ||
392 | $this->assertEquals(4, count($data)); | ||
393 | $this->assertEquals(6, $data[0]['id']); | ||
394 | |||
395 | // wildcard: placeholder at the middle | ||
396 | $env = Environment::mock([ | ||
397 | 'REQUEST_METHOD' => 'GET', | ||
398 | 'QUERY_STRING' => 'searchtags=w*b', | ||
399 | ]); | ||
400 | $request = Request::createFromEnvironment($env); | ||
401 | $response = $this->controller->getLinks($request, new Response()); | ||
402 | $this->assertEquals(200, $response->getStatusCode()); | ||
403 | $data = json_decode((string) $response->getBody(), true); | ||
404 | $this->assertEquals(4, count($data)); | ||
405 | $this->assertEquals(6, $data[0]['id']); | ||
406 | |||
407 | // wildcard: match all | ||
408 | $env = Environment::mock([ | ||
409 | 'REQUEST_METHOD' => 'GET', | ||
410 | 'QUERY_STRING' => 'searchtags=*', | ||
411 | ]); | ||
412 | $request = Request::createFromEnvironment($env); | ||
413 | $response = $this->controller->getLinks($request, new Response()); | ||
414 | $this->assertEquals(200, $response->getStatusCode()); | ||
415 | $data = json_decode((string) $response->getBody(), true); | ||
416 | $this->assertEquals(9, count($data)); | ||
417 | $this->assertEquals(41, $data[0]['id']); | ||
418 | |||
419 | // wildcard: optional ('*' does not need to expand) | ||
420 | $env = Environment::mock([ | ||
421 | 'REQUEST_METHOD' => 'GET', | ||
422 | 'QUERY_STRING' => 'searchtags=*stuff*', | ||
423 | ]); | ||
424 | $request = Request::createFromEnvironment($env); | ||
425 | $response = $this->controller->getLinks($request, new Response()); | ||
426 | $this->assertEquals(200, $response->getStatusCode()); | ||
427 | $data = json_decode((string) $response->getBody(), true); | ||
428 | $this->assertEquals(2, count($data)); | ||
429 | $this->assertEquals(41, $data[0]['id']); | ||
430 | |||
431 | // wildcard: exclusions | ||
432 | $env = Environment::mock([ | ||
433 | 'REQUEST_METHOD' => 'GET', | ||
434 | 'QUERY_STRING' => 'searchtags=*a*+-*e*', | ||
435 | ]); | ||
436 | $request = Request::createFromEnvironment($env); | ||
437 | $response = $this->controller->getLinks($request, new Response()); | ||
438 | $this->assertEquals(200, $response->getStatusCode()); | ||
439 | $data = json_decode((string) $response->getBody(), true); | ||
440 | $this->assertEquals(1, count($data)); | ||
441 | $this->assertEquals(41, $data[0]['id']); // finds '#hashtag' in descr. | ||
442 | |||
443 | // wildcard: exclude all | ||
444 | $env = Environment::mock([ | ||
445 | 'REQUEST_METHOD' => 'GET', | ||
446 | 'QUERY_STRING' => 'searchtags=-*', | ||
447 | ]); | ||
448 | $request = Request::createFromEnvironment($env); | ||
449 | $response = $this->controller->getLinks($request, new Response()); | ||
450 | $this->assertEquals(200, $response->getStatusCode()); | ||
451 | $data = json_decode((string) $response->getBody(), true); | ||
452 | $this->assertEquals(0, count($data)); | ||
370 | } | 453 | } |
371 | 454 | ||
372 | /** | 455 | /** |