aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorWilli Eggeling <thewilli@gmail.com>2017-08-26 23:05:02 +0200
committerWilli Eggeling <thewilli@gmail.com>2017-08-30 13:20:22 +0200
commit341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8 (patch)
tree19527ce7721d87d553c369705d8b1d69ee5d5777
parentfc27141cf6eb04d3d8714385cb6961a8063fe61b (diff)
downloadShaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.tar.gz
Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.tar.zst
Shaarli-341527bae96e0d1c6a0cb5dfc790eacb3ef58bf8.zip
wildcard tag search support
- when searching for tags you can now include '*' as wildcard placeholder - new search reduces overall overhead when filtering for tags - fixed combination with description tag search ('#' prefix) - tests added
-rw-r--r--application/LinkFilter.php131
-rw-r--r--tests/api/controllers/GetLinksTest.php83
2 files changed, 167 insertions, 47 deletions
diff --git a/application/LinkFilter.php b/application/LinkFilter.php
index 95519528..99ecd1e2 100644
--- a/application/LinkFilter.php
+++ b/application/LinkFilter.php
@@ -250,6 +250,51 @@ class LinkFilter
250 } 250 }
251 251
252 /** 252 /**
253 * generate a regex fragment out of a tag
254 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
255 * @return string generated regex fragment
256 */
257 private static function tag2regex($tag)
258 {
259 $len = strlen($tag);
260 if(!$len || $tag === "-" || $tag === "*"){
261 // nothing to search, return empty regex
262 return '';
263 }
264 if($tag[0] === "-") {
265 // query is negated
266 $i = 1; // use offset to start after '-' character
267 $regex = '(?!'; // create negative lookahead
268 } else {
269 $i = 0; // start at first character
270 $regex = '(?='; // use positive lookahead
271 }
272 $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning
273 // iterate over string, separating it into placeholder and content
274 for(; $i < $len; $i++){
275 if($tag[$i] === '*'){
276 // placeholder found
277 $regex .= '[^ ]*?';
278 } else {
279 // regular characters
280 $offset = strpos($tag, '*', $i);
281 if($offset === false){
282 // no placeholder found, set offset to end of string
283 $offset = $len;
284 }
285 // subtract one, as we want to get before the placeholder or end of string
286 $offset -= 1;
287 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
288 $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
289 // move $i on
290 $i = $offset;
291 }
292 }
293 $regex .= '(?:$| ))'; // after the tag may only be a space or the end
294 return $regex;
295 }
296
297 /**
253 * Returns the list of links associated with a given list of tags 298 * Returns the list of links associated with a given list of tags
254 * 299 *
255 * You can specify one or more tags, separated by space or a comma, e.g. 300 * You can specify one or more tags, separated by space or a comma, e.g.
@@ -263,20 +308,32 @@ class LinkFilter
263 */ 308 */
264 public function filterTags($tags, $casesensitive = false, $visibility = 'all') 309 public function filterTags($tags, $casesensitive = false, $visibility = 'all')
265 { 310 {
266 // Implode if array for clean up. 311 // get single tags (we may get passed an array, even though the docs say different)
267 $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; 312 $inputTags = $tags;
268 if (empty($tags)) { 313 if(!is_array($tags)) {
314 // we got an input string, split tags
315 $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY);
316 }
317
318 if(!count($inputTags)){
319 // no input tags
269 return $this->noFilter($visibility); 320 return $this->noFilter($visibility);
270 } 321 }
271 322
272 $searchtags = self::tagsStrToArray($tags, $casesensitive); 323 // build regex from all tags
273 $filtered = array(); 324 $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/';
274 if (empty($searchtags)) { 325 if(!$casesensitive) {
275 return $filtered; 326 // make regex case insensitive
327 $re .= 'i';
276 } 328 }
277 329
330 // create resulting array
331 $filtered = array();
332
333 // iterate over each link
278 foreach ($this->links as $key => $link) { 334 foreach ($this->links as $key => $link) {
279 // ignore non private links when 'privatonly' is on. 335 // check level of visibility
336 // ignore non private links when 'privateonly' is on.
280 if ($visibility !== 'all') { 337 if ($visibility !== 'all') {
281 if (! $link['private'] && $visibility === 'private') { 338 if (! $link['private'] && $visibility === 'private') {
282 continue; 339 continue;
@@ -284,25 +341,27 @@ class LinkFilter
284 continue; 341 continue;
285 } 342 }
286 } 343 }
287 344 $search = $link['tags']; // build search string, start with tags of current link
288 $linktags = self::tagsStrToArray($link['tags'], $casesensitive); 345 if(strlen(trim($link['description'])) && strpos($link['description'], '#') !== false){
289 346 // description given and at least one possible tag found
290 $found = true; 347 $descTags = array();
291 for ($i = 0 ; $i < count($searchtags) && $found; $i++) { 348 // find all tags in the form of #tag in the description
292 // Exclusive search, quit if tag found. 349 preg_match_all(
293 // Or, tag not found in the link, quit. 350 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
294 if (($searchtags[$i][0] == '-' 351 $link['description'],
295 && $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description'])) 352 $descTags
296 || ($searchtags[$i][0] != '-') 353 );
297 && ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description']) 354 if(count($descTags[1])){
298 ) { 355 // there were some tags in the description, add them to the search string
299 $found = false; 356 $search .= ' ' . implode(' ', $descTags[1]);
300 } 357 }
358 };
359 // match regular expression with search string
360 if(!preg_match($re, $search)){
361 // this entry does _not_ match our regex
362 continue;
301 } 363 }
302 364 $filtered[$key] = $link;
303 if ($found) {
304 $filtered[$key] = $link;
305 }
306 } 365 }
307 return $filtered; 366 return $filtered;
308 } 367 }
@@ -364,28 +423,6 @@ class LinkFilter
364 } 423 }
365 424
366 /** 425 /**
367 * Check if a tag is found in the taglist, or as an hashtag in the link description.
368 *
369 * @param string $tag Tag to search.
370 * @param array $taglist List of tags for the current link.
371 * @param string $description Link description.
372 *
373 * @return bool True if found, false otherwise.
374 */
375 protected function searchTagAndHashTag($tag, $taglist, $description)
376 {
377 if (in_array($tag, $taglist)) {
378 return true;
379 }
380
381 if (preg_match('/(^| )#'. $tag .'([^'. self::$HASHTAG_CHARS .']|$)/mui', $description) > 0) {
382 return true;
383 }
384
385 return false;
386 }
387
388 /**
389 * Convert a list of tags (str) to an array. Also 426 * Convert a list of tags (str) to an array. Also
390 * - handle case sensitivity. 427 * - handle case sensitivity.
391 * - accepts spaces commas as separator. 428 * - accepts spaces commas as separator.
diff --git a/tests/api/controllers/GetLinksTest.php b/tests/api/controllers/GetLinksTest.php
index 4cb70224..d22ed3bf 100644
--- a/tests/api/controllers/GetLinksTest.php
+++ b/tests/api/controllers/GetLinksTest.php
@@ -367,6 +367,89 @@ class GetLinksTest extends \PHPUnit_Framework_TestCase
367 $this->assertEquals(1, count($data)); 367 $this->assertEquals(1, count($data));
368 $this->assertEquals(41, $data[0]['id']); 368 $this->assertEquals(41, $data[0]['id']);
369 $this->assertEquals(self::NB_FIELDS_LINK, count($data[0])); 369 $this->assertEquals(self::NB_FIELDS_LINK, count($data[0]));
370
371 // wildcard: placeholder at the start
372 $env = Environment::mock([
373 'REQUEST_METHOD' => 'GET',
374 'QUERY_STRING' => 'searchtags=*Tuff',
375 ]);
376 $request = Request::createFromEnvironment($env);
377 $response = $this->controller->getLinks($request, new Response());
378 $this->assertEquals(200, $response->getStatusCode());
379 $data = json_decode((string) $response->getBody(), true);
380 $this->assertEquals(2, count($data));
381 $this->assertEquals(41, $data[0]['id']);
382
383 // wildcard: placeholder at the end
384 $env = Environment::mock([
385 'REQUEST_METHOD' => 'GET',
386 'QUERY_STRING' => 'searchtags=c*',
387 ]);
388 $request = Request::createFromEnvironment($env);
389 $response = $this->controller->getLinks($request, new Response());
390 $this->assertEquals(200, $response->getStatusCode());
391 $data = json_decode((string) $response->getBody(), true);
392 $this->assertEquals(4, count($data));
393 $this->assertEquals(6, $data[0]['id']);
394
395 // wildcard: placeholder at the middle
396 $env = Environment::mock([
397 'REQUEST_METHOD' => 'GET',
398 'QUERY_STRING' => 'searchtags=w*b',
399 ]);
400 $request = Request::createFromEnvironment($env);
401 $response = $this->controller->getLinks($request, new Response());
402 $this->assertEquals(200, $response->getStatusCode());
403 $data = json_decode((string) $response->getBody(), true);
404 $this->assertEquals(4, count($data));
405 $this->assertEquals(6, $data[0]['id']);
406
407 // wildcard: match all
408 $env = Environment::mock([
409 'REQUEST_METHOD' => 'GET',
410 'QUERY_STRING' => 'searchtags=*',
411 ]);
412 $request = Request::createFromEnvironment($env);
413 $response = $this->controller->getLinks($request, new Response());
414 $this->assertEquals(200, $response->getStatusCode());
415 $data = json_decode((string) $response->getBody(), true);
416 $this->assertEquals(9, count($data));
417 $this->assertEquals(41, $data[0]['id']);
418
419 // wildcard: optional ('*' does not need to expand)
420 $env = Environment::mock([
421 'REQUEST_METHOD' => 'GET',
422 'QUERY_STRING' => 'searchtags=*stuff*',
423 ]);
424 $request = Request::createFromEnvironment($env);
425 $response = $this->controller->getLinks($request, new Response());
426 $this->assertEquals(200, $response->getStatusCode());
427 $data = json_decode((string) $response->getBody(), true);
428 $this->assertEquals(2, count($data));
429 $this->assertEquals(41, $data[0]['id']);
430
431 // wildcard: exclusions
432 $env = Environment::mock([
433 'REQUEST_METHOD' => 'GET',
434 'QUERY_STRING' => 'searchtags=*a*+-*e*',
435 ]);
436 $request = Request::createFromEnvironment($env);
437 $response = $this->controller->getLinks($request, new Response());
438 $this->assertEquals(200, $response->getStatusCode());
439 $data = json_decode((string) $response->getBody(), true);
440 $this->assertEquals(1, count($data));
441 $this->assertEquals(41, $data[0]['id']); // finds '#hashtag' in descr.
442
443 // wildcard: exclude all
444 $env = Environment::mock([
445 'REQUEST_METHOD' => 'GET',
446 'QUERY_STRING' => 'searchtags=-*',
447 ]);
448 $request = Request::createFromEnvironment($env);
449 $response = $this->controller->getLinks($request, new Response());
450 $this->assertEquals(200, $response->getStatusCode());
451 $data = json_decode((string) $response->getBody(), true);
452 $this->assertEquals(0, count($data));
370 } 453 }
371 454
372 /** 455 /**