X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;ds=sidebyside;f=application%2Fbookmark%2FBookmarkFilter.php;h=8b41dbb86766991dcc5a46ca665bfe605dc0c8c6;hb=HEAD;hp=6636bbfeec63e6e759154eda8bde4c377337d26c;hpb=e2dff28b44fafcf11a1db7985c50cd40e6945821;p=github%2Fshaarli%2FShaarli.git diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php index 6636bbfe..8b41dbb8 100644 --- a/application/bookmark/BookmarkFilter.php +++ b/application/bookmark/BookmarkFilter.php @@ -1,9 +1,12 @@ bookmarks = $bookmarks; + $this->conf = $conf; + $this->pluginManager = $pluginManager; } /** @@ -77,8 +83,13 @@ class BookmarkFilter * * @throws BookmarkNotFoundException */ - public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) - { + public function filter( + string $type, + $request, + bool $casesensitive = false, + string $visibility = 'all', + bool $untaggedonly = false + ) { if (!in_array($visibility, ['all', 'public', 'private'])) { $visibility = 'all'; } @@ -100,10 +111,14 @@ class BookmarkFilter $filtered = $this->bookmarks; } if (!empty($request[0])) { - $filtered = (new BookmarkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); + $filtered = (new BookmarkFilter($filtered, $this->conf, $this->pluginManager)) + ->filterTags($request[0], $casesensitive, $visibility) + ; } if (!empty($request[1])) { - $filtered = (new BookmarkFilter($filtered))->filterFulltext($request[1], $visibility); + $filtered = (new BookmarkFilter($filtered, $this->conf, $this->pluginManager)) + ->filterFulltext($request[1], $visibility) + ; } return $filtered; case self::$FILTER_TEXT: @@ -114,8 +129,6 @@ class BookmarkFilter } else { return $this->filterTags($request, $casesensitive, $visibility); } - case self::$FILTER_DAY: - return $this->filterDay($request, $visibility); default: return $this->noFilter($visibility); } @@ -128,15 +141,22 @@ class BookmarkFilter * * @return Bookmark[] filtered bookmarks. */ - private function noFilter($visibility = 'all') + private function noFilter(string $visibility = 'all') { - if ($visibility === 'all') { - return $this->bookmarks; - } - - $out = array(); + $out = []; foreach ($this->bookmarks as $key => $value) { - if ($value->isPrivate() && $visibility === 'private') { + if ( + !$this->pluginManager->filterSearchEntry( + $value, + ['source' => 'no_filter', 'visibility' => $visibility] + ) + ) { + continue; + } + + if ($visibility === 'all') { + $out[$key] = $value; + } elseif ($value->isPrivate() && $visibility === 'private') { $out[$key] = $value; } elseif (!$value->isPrivate() && $visibility === 'public') { $out[$key] = $value; @@ -151,11 +171,11 @@ class BookmarkFilter * * @param string $smallHash permalink hash. * - * @return array $filtered array containing permalink data. + * @return Bookmark[] $filtered array containing permalink data. * - * @throws \Shaarli\Bookmark\Exception\BookmarkNotFoundException if the smallhash doesn't match any link. + * @throws BookmarkNotFoundException if the smallhash doesn't match any link. */ - private function filterSmallHash($smallHash) + private function filterSmallHash(string $smallHash) { foreach ($this->bookmarks as $key => $l) { if ($smallHash == $l->getShortUrl()) { @@ -186,15 +206,15 @@ class BookmarkFilter * @param string $searchterms search query. * @param string $visibility Optional: return only all/private/public bookmarks. * - * @return array search results. + * @return Bookmark[] search results. */ - private function filterFulltext($searchterms, $visibility = 'all') + private function filterFulltext(string $searchterms, string $visibility = 'all') { if (empty($searchterms)) { return $this->noFilter($visibility); } - $filtered = array(); + $filtered = []; $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); $exactRegex = '/"([^"]+)"/'; // Retrieve exact search terms. @@ -206,8 +226,8 @@ class BookmarkFilter $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); // Filter excluding terms and update andSearch. - $excludeSearch = array(); - $andSearch = array(); + $excludeSearch = []; + $andSearch = []; foreach ($explodedSearchAnd as $needle) { if ($needle[0] == '-' && strlen($needle) > 1) { $excludeSearch[] = substr($needle, 1); @@ -217,119 +237,94 @@ class BookmarkFilter } // Iterate over every stored link. - foreach ($this->bookmarks as $id => $link) { + foreach ($this->bookmarks as $id => $bookmark) { + if ( + !$this->pluginManager->filterSearchEntry( + $bookmark, + [ + 'source' => 'fulltext', + 'searchterms' => $searchterms, + 'andSearch' => $andSearch, + 'exactSearch' => $exactSearch, + 'excludeSearch' => $excludeSearch, + 'visibility' => $visibility + ] + ) + ) { + continue; + } + // ignore non private bookmarks when 'privatonly' is on. if ($visibility !== 'all') { - if (!$link->isPrivate() && $visibility === 'private') { + if (!$bookmark->isPrivate() && $visibility === 'private') { continue; - } elseif ($link->isPrivate() && $visibility === 'public') { + } elseif ($bookmark->isPrivate() && $visibility === 'public') { continue; } } - // Concatenate link fields to search across fields. - // Adds a '\' separator for exact search terms. - $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $lengths = []; + $content = $this->buildFullTextSearchableLink($bookmark, $lengths); // Be optimistic $found = true; + $foundPositions = []; // First, we look for exact term search - for ($i = 0; $i < count($exactSearch) && $found; $i++) { - $found = strpos($content, $exactSearch[$i]) !== false; - } - - // Iterate over keywords, if keyword is not found, + // Then iterate over keywords, if keyword is not found, // no need to check for the others. We want all or nothing. - for ($i = 0; $i < count($andSearch) && $found; $i++) { - $found = strpos($content, $andSearch[$i]) !== false; + foreach ([$exactSearch, $andSearch] as $search) { + for ($i = 0; $i < count($search) && $found !== false; $i++) { + $found = mb_strpos($content, $search[$i]); + if ($found === false) { + break; + } + + $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])]; + } } // Exclude terms. - for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) { $found = strpos($content, $excludeSearch[$i]) === false; } - if ($found) { - $filtered[$id] = $link; + if ($found !== false) { + $bookmark->addAdditionalContentEntry( + 'search_highlight', + $this->postProcessFoundPositions($lengths, $foundPositions) + ); + + $filtered[$id] = $bookmark; } } return $filtered; } - /** - * generate a regex fragment out of a tag - * - * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard - * - * @return string generated regex fragment - */ - private static function tag2regex($tag) - { - $len = strlen($tag); - if (!$len || $tag === "-" || $tag === "*") { - // nothing to search, return empty regex - return ''; - } - if ($tag[0] === "-") { - // query is negated - $i = 1; // use offset to start after '-' character - $regex = '(?!'; // create negative lookahead - } else { - $i = 0; // start at first character - $regex = '(?='; // use positive lookahead - } - $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning - // iterate over string, separating it into placeholder and content - for (; $i < $len; $i++) { - if ($tag[$i] === '*') { - // placeholder found - $regex .= '[^ ]*?'; - } else { - // regular characters - $offset = strpos($tag, '*', $i); - if ($offset === false) { - // no placeholder found, set offset to end of string - $offset = $len; - } - // subtract one, as we want to get before the placeholder or end of string - $offset -= 1; - // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. - $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); - // move $i on - $i = $offset; - } - } - $regex .= '(?:$| ))'; // after the tag may only be a space or the end - return $regex; - } - /** * Returns the list of bookmarks associated with a given list of tags * * You can specify one or more tags, separated by space or a comma, e.g. * print_r($mydb->filterTags('linux programming')); * - * @param string $tags list of tags separated by commas or blank spaces. - * @param bool $casesensitive ignore case if false. - * @param string $visibility Optional: return only all/private/public bookmarks. + * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string. + * @param bool $casesensitive ignore case if false. + * @param string $visibility Optional: return only all/private/public bookmarks. * - * @return array filtered bookmarks. + * @return Bookmark[] filtered bookmarks. */ - public function filterTags($tags, $casesensitive = false, $visibility = 'all') + public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all') { + $tagsSeparator = $this->conf->get('general.tags_separator', ' '); // get single tags (we may get passed an array, even though the docs say different) $inputTags = $tags; if (!is_array($tags)) { // we got an input string, split tags - $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); + $inputTags = tags_str2array($inputTags, $tagsSeparator); } - if (!count($inputTags)) { + if (count($inputTags) === 0) { // no input tags return $this->noFilter($visibility); } @@ -346,7 +341,7 @@ class BookmarkFilter } // build regex from all tags - $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/'; if (!$casesensitive) { // make regex case insensitive $re .= 'i'; @@ -356,38 +351,54 @@ class BookmarkFilter $filtered = []; // iterate over each link - foreach ($this->bookmarks as $key => $link) { + foreach ($this->bookmarks as $key => $bookmark) { + if ( + !$this->pluginManager->filterSearchEntry( + $bookmark, + [ + 'source' => 'tags', + 'tags' => $tags, + 'casesensitive' => $casesensitive, + 'visibility' => $visibility + ] + ) + ) { + continue; + } + // check level of visibility // ignore non private bookmarks when 'privateonly' is on. if ($visibility !== 'all') { - if (!$link->isPrivate() && $visibility === 'private') { + if (!$bookmark->isPrivate() && $visibility === 'private') { continue; - } elseif ($link->isPrivate() && $visibility === 'public') { + } elseif ($bookmark->isPrivate() && $visibility === 'public') { continue; } } - $search = $link->getTagsString(); // build search string, start with tags of current link - if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) { + // build search string, start with tags of current link + $search = $bookmark->getTagsString($tagsSeparator); + if (strlen(trim($bookmark->getDescription())) && strpos($bookmark->getDescription(), '#') !== false) { // description given and at least one possible tag found - $descTags = array(); + $descTags = []; // find all tags in the form of #tag in the description preg_match_all( '/(?getDescription(), + $bookmark->getDescription(), $descTags ); if (count($descTags[1])) { // there were some tags in the description, add them to the search string - $search .= ' ' . implode(' ', $descTags[1]); + $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator); } - }; + } // match regular expression with search string if (!preg_match($re, $search)) { // this entry does _not_ match our regex continue; } - $filtered[$key] = $link; + $filtered[$key] = $bookmark; } + return $filtered; } @@ -396,22 +407,31 @@ class BookmarkFilter * * @param string $visibility return only all/private/public bookmarks. * - * @return array filtered bookmarks. + * @return Bookmark[] filtered bookmarks. */ - public function filterUntagged($visibility) + public function filterUntagged(string $visibility) { $filtered = []; - foreach ($this->bookmarks as $key => $link) { + foreach ($this->bookmarks as $key => $bookmark) { + if ( + !$this->pluginManager->filterSearchEntry( + $bookmark, + ['source' => 'untagged', 'visibility' => $visibility] + ) + ) { + continue; + } + if ($visibility !== 'all') { - if (!$link->isPrivate() && $visibility === 'private') { + if (!$bookmark->isPrivate() && $visibility === 'private') { continue; - } elseif ($link->isPrivate() && $visibility === 'public') { + } elseif ($bookmark->isPrivate() && $visibility === 'public') { continue; } } - if (empty(trim($link->getTagsString()))) { - $filtered[$key] = $link; + if (empty($bookmark->getTags())) { + $filtered[$key] = $bookmark; } } @@ -419,55 +439,142 @@ class BookmarkFilter } /** - * Returns the list of articles for a given day, chronologically sorted + * Convert a list of tags (str) to an array. Also + * - handle case sensitivity. + * - accepts spaces commas as separator. * - * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. - * print_r($mydb->filterDay('20120125')); + * @param string $tags string containing a list of tags. + * @param bool $casesensitive will convert everything to lowercase if false. * - * @param string $day day to filter. - * @param string $visibility return only all/private/public bookmarks. + * @return string[] filtered tags string. + */ + public static function tagsStrToArray(string $tags, bool $casesensitive): array + { + // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) + $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); + $tagsOut = str_replace(',', ' ', $tagsOut); + + return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); + } - * @return array all link matching given day. + /** + * generate a regex fragment out of a tag * - * @throws Exception if date format is invalid. + * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard + * + * @return string generated regex fragment */ - public function filterDay($day, $visibility) + protected function tag2regex(string $tag): string { - if (!checkDateFormat('Ymd', $day)) { - throw new Exception('Invalid date format'); + $tagsSeparator = $this->conf->get('general.tags_separator', ' '); + $len = strlen($tag); + if (!$len || $tag === "-" || $tag === "*") { + // nothing to search, return empty regex + return ''; } + if ($tag[0] === "-") { + // query is negated + $i = 1; // use offset to start after '-' character + $regex = '(?!'; // create negative lookahead + } else { + $i = 0; // start at first character + $regex = '(?='; // use positive lookahead + } + // before tag may only be the separator or the beginning + $regex .= '.*(?:^|' . $tagsSeparator . ')'; + // iterate over string, separating it into placeholder and content + for (; $i < $len; $i++) { + if ($tag[$i] === '*') { + // placeholder found + $regex .= '[^' . $tagsSeparator . ']*?'; + } else { + // regular characters + $offset = strpos($tag, '*', $i); + if ($offset === false) { + // no placeholder found, set offset to end of string + $offset = $len; + } + // subtract one, as we want to get before the placeholder or end of string + $offset -= 1; + // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. + $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); + // move $i on + $i = $offset; + } + } + // after the tag may only be the separator or the end + $regex .= '(?:$|' . $tagsSeparator . '))'; + return $regex; + } - $filtered = []; - foreach ($this->bookmarks as $key => $bookmark) { - if ($visibility === static::$PUBLIC && $bookmark->isPrivate()) { + /** + * This method finalize the content of the foundPositions array, + * by associated all search results to their associated bookmark field, + * making sure that there is no overlapping results, etc. + * + * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content. + * @param array $foundPositions Positions where the search results were found in the aggregated content. + * + * @return array Updated $foundPositions, by bookmark field. + */ + protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array + { + // Sort results by starting position ASC. + usort($foundPositions, function (array $entryA, array $entryB): int { + return $entryA['start'] > $entryB['start'] ? 1 : -1; + }); + + $out = []; + $currentMax = -1; + foreach ($foundPositions as $foundPosition) { + // we do not allow overlapping highlights + if ($foundPosition['start'] < $currentMax) { continue; } - if ($bookmark->getCreated()->format('Ymd') == $day) { - $filtered[$key] = $bookmark; + $currentMax = $foundPosition['end']; + foreach ($fieldLengths as $part => $length) { + if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) { + continue; + } + + $out[$part][] = [ + 'start' => $foundPosition['start'] - $length['start'], + 'end' => $foundPosition['end'] - $length['start'], + ]; + break; } } - // sort by date ASC - return array_reverse($filtered, true); + return $out; } /** - * Convert a list of tags (str) to an array. Also - * - handle case sensitivity. - * - accepts spaces commas as separator. + * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms. + * Also populate $length array with starting and ending positions of every bookmark field + * inside concatenated content. * - * @param string $tags string containing a list of tags. - * @param bool $casesensitive will convert everything to lowercase if false. + * @param Bookmark $link + * @param array $lengths (by reference) * - * @return array filtered tags string. + * @return string Lowercase concatenated fields content. */ - public static function tagsStrToArray($tags, $casesensitive) + protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string { - // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) - $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); - $tagsOut = str_replace(',', ' ', $tagsOut); - - return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); + $tagString = $link->getTagsString($this->conf->get('general.tags_separator', ' ')); + $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') . '\\'; + $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') . '\\'; + $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') . '\\'; + $content .= mb_convert_case($tagString, MB_CASE_LOWER, 'UTF-8') . '\\'; + + $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())]; + $nextField = $lengths['title']['end'] + 1; + $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())]; + $nextField = $lengths['description']['end'] + 1; + $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())]; + $nextField = $lengths['url']['end'] + 1; + $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($tagString)]; + + return $content; } }