From b3bd8c3e8d367975980043e772f7cd78b7f96bc6 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 22 Oct 2020 16:21:03 +0200 Subject: Feature: support any tag separator So it allows to have multiple words tags. Breaking change: commas ',' are no longer a default separator. Fixes #594 --- application/bookmark/Bookmark.php | 39 ++++++++++++----------- application/bookmark/BookmarkFileService.php | 2 +- application/bookmark/BookmarkFilter.php | 47 ++++++++++++++++++---------- application/bookmark/LinkUtils.php | 46 +++++++++++++++++++++++++++ 4 files changed, 99 insertions(+), 35 deletions(-) (limited to 'application/bookmark') diff --git a/application/bookmark/Bookmark.php b/application/bookmark/Bookmark.php index 4810c5e6..8aaeb9d8 100644 --- a/application/bookmark/Bookmark.php +++ b/application/bookmark/Bookmark.php @@ -60,11 +60,13 @@ class Bookmark /** * Initialize a link from array data. Especially useful to create a Bookmark from former link storage format. * - * @param array $data + * @param array $data + * @param string $tagsSeparator Tags separator loaded from the config file. + * This is a context data, and it should *never* be stored in the Bookmark object. * * @return $this */ - public function fromArray(array $data): Bookmark + public function fromArray(array $data, string $tagsSeparator = ' '): Bookmark { $this->id = $data['id'] ?? null; $this->shortUrl = $data['shorturl'] ?? null; @@ -77,7 +79,7 @@ class Bookmark if (is_array($data['tags'])) { $this->tags = $data['tags']; } else { - $this->tags = preg_split('/\s+/', $data['tags'] ?? '', -1, PREG_SPLIT_NO_EMPTY); + $this->tags = tags_str2array($data['tags'] ?? '', $tagsSeparator); } if (! empty($data['updated'])) { $this->updated = $data['updated']; @@ -348,7 +350,12 @@ class Bookmark */ public function setTags(?array $tags): Bookmark { - $this->setTagsString(implode(' ', $tags ?? [])); + $this->tags = array_map( + function (string $tag): string { + return $tag[0] === '-' ? substr($tag, 1) : $tag; + }, + tags_filter($tags, ' ') + ); return $this; } @@ -420,11 +427,13 @@ class Bookmark } /** - * @return string Bookmark's tags as a string, separated by a space + * @param string $separator Tags separator loaded from the config file. + * + * @return string Bookmark's tags as a string, separated by a separator */ - public function getTagsString(): string + public function getTagsString(string $separator = ' '): string { - return implode(' ', $this->getTags()); + return tags_array2str($this->getTags(), $separator); } /** @@ -444,19 +453,13 @@ class Bookmark * - trailing dash in tags will be removed * * @param string|null $tags + * @param string $separator Tags separator loaded from the config file. * * @return $this */ - public function setTagsString(?string $tags): Bookmark + public function setTagsString(?string $tags, string $separator = ' '): Bookmark { - // Remove first '-' char in tags. - $tags = preg_replace('/(^| )\-/', '$1', $tags ?? ''); - // Explode all tags separted by spaces or commas - $tags = preg_split('/[\s,]+/', $tags); - // Remove eventual empty values - $tags = array_values(array_filter($tags)); - - $this->tags = $tags; + $this->setTags(tags_str2array($tags, $separator)); return $this; } @@ -507,7 +510,7 @@ class Bookmark */ public function renameTag(string $fromTag, string $toTag): void { - if (($pos = array_search($fromTag, $this->tags)) !== false) { + if (($pos = array_search($fromTag, $this->tags ?? [])) !== false) { $this->tags[$pos] = trim($toTag); } } @@ -519,7 +522,7 @@ class Bookmark */ public function deleteTag(string $tag): void { - if (($pos = array_search($tag, $this->tags)) !== false) { + if (($pos = array_search($tag, $this->tags ?? [])) !== false) { unset($this->tags[$pos]); $this->tags = array_values($this->tags); } diff --git a/application/bookmark/BookmarkFileService.php b/application/bookmark/BookmarkFileService.php index 3ea98a45..85efeea6 100644 --- a/application/bookmark/BookmarkFileService.php +++ b/application/bookmark/BookmarkFileService.php @@ -91,7 +91,7 @@ class BookmarkFileService implements BookmarkServiceInterface } } - $this->bookmarkFilter = new BookmarkFilter($this->bookmarks); + $this->bookmarkFilter = new BookmarkFilter($this->bookmarks, $this->conf); } /** diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php index c79386ea..5d8733dc 100644 --- a/application/bookmark/BookmarkFilter.php +++ b/application/bookmark/BookmarkFilter.php @@ -6,6 +6,7 @@ namespace Shaarli\Bookmark; use Exception; use Shaarli\Bookmark\Exception\BookmarkNotFoundException; +use Shaarli\Config\ConfigManager; /** * Class LinkFilter. @@ -58,12 +59,16 @@ class BookmarkFilter */ private $bookmarks; + /** @var ConfigManager */ + protected $conf; + /** * @param Bookmark[] $bookmarks initialization. */ - public function __construct($bookmarks) + public function __construct($bookmarks, ConfigManager $conf) { $this->bookmarks = $bookmarks; + $this->conf = $conf; } /** @@ -107,10 +112,14 @@ class BookmarkFilter $filtered = $this->bookmarks; } if (!empty($request[0])) { - $filtered = (new BookmarkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); + $filtered = (new BookmarkFilter($filtered, $this->conf)) + ->filterTags($request[0], $casesensitive, $visibility) + ; } if (!empty($request[1])) { - $filtered = (new BookmarkFilter($filtered))->filterFulltext($request[1], $visibility); + $filtered = (new BookmarkFilter($filtered, $this->conf)) + ->filterFulltext($request[1], $visibility) + ; } return $filtered; case self::$FILTER_TEXT: @@ -280,8 +289,9 @@ class BookmarkFilter * * @return string generated regex fragment */ - private static function tag2regex(string $tag): string + protected function tag2regex(string $tag): string { + $tagsSeparator = $this->conf->get('general.tags_separator', ' '); $len = strlen($tag); if (!$len || $tag === "-" || $tag === "*") { // nothing to search, return empty regex @@ -295,12 +305,13 @@ class BookmarkFilter $i = 0; // start at first character $regex = '(?='; // use positive lookahead } - $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning + // before tag may only be the separator or the beginning + $regex .= '.*(?:^|' . $tagsSeparator . ')'; // iterate over string, separating it into placeholder and content for (; $i < $len; $i++) { if ($tag[$i] === '*') { // placeholder found - $regex .= '[^ ]*?'; + $regex .= '[^' . $tagsSeparator . ']*?'; } else { // regular characters $offset = strpos($tag, '*', $i); @@ -316,7 +327,8 @@ class BookmarkFilter $i = $offset; } } - $regex .= '(?:$| ))'; // after the tag may only be a space or the end + // after the tag may only be the separator or the end + $regex .= '(?:$|' . $tagsSeparator . '))'; return $regex; } @@ -334,14 +346,15 @@ class BookmarkFilter */ public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all') { + $tagsSeparator = $this->conf->get('general.tags_separator', ' '); // get single tags (we may get passed an array, even though the docs say different) $inputTags = $tags; if (!is_array($tags)) { // we got an input string, split tags - $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); + $inputTags = tags_str2array($inputTags, $tagsSeparator); } - if (!count($inputTags)) { + if (count($inputTags) === 0) { // no input tags return $this->noFilter($visibility); } @@ -358,7 +371,7 @@ class BookmarkFilter } // build regex from all tags - $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/'; if (!$casesensitive) { // make regex case insensitive $re .= 'i'; @@ -378,7 +391,8 @@ class BookmarkFilter continue; } } - $search = $link->getTagsString(); // build search string, start with tags of current link + // build search string, start with tags of current link + $search = $link->getTagsString($tagsSeparator); if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) { // description given and at least one possible tag found $descTags = array(); @@ -390,9 +404,9 @@ class BookmarkFilter ); if (count($descTags[1])) { // there were some tags in the description, add them to the search string - $search .= ' ' . implode(' ', $descTags[1]); + $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator); } - }; + } // match regular expression with search string if (!preg_match($re, $search)) { // this entry does _not_ match our regex @@ -422,7 +436,7 @@ class BookmarkFilter } } - if (empty(trim($link->getTagsString()))) { + if (empty($link->getTags())) { $filtered[$key] = $link; } } @@ -537,10 +551,11 @@ class BookmarkFilter */ protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string { + $tagString = $link->getTagsString($this->conf->get('general.tags_separator', ' ')); $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($tagString, MB_CASE_LOWER, 'UTF-8') .'\\'; $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())]; $nextField = $lengths['title']['end'] + 1; @@ -548,7 +563,7 @@ class BookmarkFilter $nextField = $lengths['description']['end'] + 1; $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())]; $nextField = $lengths['url']['end'] + 1; - $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())]; + $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($tagString)]; return $content; } diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php index 17c37979..9493b0aa 100644 --- a/application/bookmark/LinkUtils.php +++ b/application/bookmark/LinkUtils.php @@ -176,3 +176,49 @@ function is_note($linkUrl) { return isset($linkUrl[0]) && $linkUrl[0] === '?'; } + +/** + * Extract an array of tags from a given tag string, with provided separator. + * + * @param string|null $tags String containing a list of tags separated by $separator. + * @param string $separator Shaarli's default: ' ' (whitespace) + * + * @return array List of tags + */ +function tags_str2array(?string $tags, string $separator): array +{ + // For whitespaces, we use the special \s regex character + $separator = $separator === ' ' ? '\s' : $separator; + + return preg_split('/\s*' . $separator . '+\s*/', trim($tags) ?? '', -1, PREG_SPLIT_NO_EMPTY); +} + +/** + * Return a tag string with provided separator from a list of tags. + * Note that given array is clean up by tags_filter(). + * + * @param array|null $tags List of tags + * @param string $separator + * + * @return string + */ +function tags_array2str(?array $tags, string $separator): string +{ + return implode($separator, tags_filter($tags, $separator)); +} + +/** + * Clean an array of tags: trim + remove empty entries + * + * @param array|null $tags List of tags + * @param string $separator + * + * @return array + */ +function tags_filter(?array $tags, string $separator): array +{ + $trimDefault = " \t\n\r\0\x0B"; + return array_values(array_filter(array_map(function (string $entry) use ($separator, $trimDefault): string { + return trim($entry, $trimDefault . $separator); + }, $tags ?? []))); +} -- cgit v1.2.3