X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2Fbookmark%2FBookmarkFilter.php;h=c79386ea7ba750db4d1d7d7974ea7564154e943a;hb=156061d445fd23d033a52f84954484a3349c988a;hp=797a36b8ecd54c34a7aaea1ac63d3db0c2496bc8;hpb=a975d97a8da64864c3c49f1c54f571eb4ea5b81a;p=github%2Fshaarli%2FShaarli.git diff --git a/application/bookmark/BookmarkFilter.php b/application/bookmark/BookmarkFilter.php index 797a36b8..c79386ea 100644 --- a/application/bookmark/BookmarkFilter.php +++ b/application/bookmark/BookmarkFilter.php @@ -1,5 +1,7 @@ filterTags($request, $casesensitive, $visibility); } case self::$FILTER_DAY: - return $this->filterDay($request); + return $this->filterDay($request, $visibility); default: return $this->noFilter($visibility); } @@ -128,7 +135,7 @@ class BookmarkFilter * * @return Bookmark[] filtered bookmarks. */ - private function noFilter($visibility = 'all') + private function noFilter(string $visibility = 'all') { if ($visibility === 'all') { return $this->bookmarks; @@ -151,11 +158,11 @@ class BookmarkFilter * * @param string $smallHash permalink hash. * - * @return array $filtered array containing permalink data. + * @return Bookmark[] $filtered array containing permalink data. * - * @throws \Shaarli\Bookmark\Exception\BookmarkNotFoundException if the smallhash doesn't match any link. + * @throws BookmarkNotFoundException if the smallhash doesn't match any link. */ - private function filterSmallHash($smallHash) + private function filterSmallHash(string $smallHash) { foreach ($this->bookmarks as $key => $l) { if ($smallHash == $l->getShortUrl()) { @@ -186,15 +193,15 @@ class BookmarkFilter * @param string $searchterms search query. * @param string $visibility Optional: return only all/private/public bookmarks. * - * @return array search results. + * @return Bookmark[] search results. */ - private function filterFulltext($searchterms, $visibility = 'all') + private function filterFulltext(string $searchterms, string $visibility = 'all') { if (empty($searchterms)) { return $this->noFilter($visibility); } - $filtered = array(); + $filtered = []; $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); $exactRegex = '/"([^"]+)"/'; // Retrieve exact search terms. @@ -206,8 +213,8 @@ class BookmarkFilter $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); // Filter excluding terms and update andSearch. - $excludeSearch = array(); - $andSearch = array(); + $excludeSearch = []; + $andSearch = []; foreach ($explodedSearchAnd as $needle) { if ($needle[0] == '-' && strlen($needle) > 1) { $excludeSearch[] = substr($needle, 1); @@ -227,33 +234,38 @@ class BookmarkFilter } } - // Concatenate link fields to search across fields. - // Adds a '\' separator for exact search terms. - $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; - $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $lengths = []; + $content = $this->buildFullTextSearchableLink($link, $lengths); // Be optimistic $found = true; + $foundPositions = []; // First, we look for exact term search - for ($i = 0; $i < count($exactSearch) && $found; $i++) { - $found = strpos($content, $exactSearch[$i]) !== false; - } - - // Iterate over keywords, if keyword is not found, + // Then iterate over keywords, if keyword is not found, // no need to check for the others. We want all or nothing. - for ($i = 0; $i < count($andSearch) && $found; $i++) { - $found = strpos($content, $andSearch[$i]) !== false; + foreach ([$exactSearch, $andSearch] as $search) { + for ($i = 0; $i < count($search) && $found !== false; $i++) { + $found = mb_strpos($content, $search[$i]); + if ($found === false) { + break; + } + + $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])]; + } } // Exclude terms. - for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) { $found = strpos($content, $excludeSearch[$i]) === false; } - if ($found) { + if ($found !== false) { + $link->addAdditionalContentEntry( + 'search_highlight', + $this->postProcessFoundPositions($lengths, $foundPositions) + ); + $filtered[$id] = $link; } } @@ -268,7 +280,7 @@ class BookmarkFilter * * @return string generated regex fragment */ - private static function tag2regex($tag) + private static function tag2regex(string $tag): string { $len = strlen($tag); if (!$len || $tag === "-" || $tag === "*") { @@ -314,13 +326,13 @@ class BookmarkFilter * You can specify one or more tags, separated by space or a comma, e.g. * print_r($mydb->filterTags('linux programming')); * - * @param string $tags list of tags separated by commas or blank spaces. - * @param bool $casesensitive ignore case if false. - * @param string $visibility Optional: return only all/private/public bookmarks. + * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string. + * @param bool $casesensitive ignore case if false. + * @param string $visibility Optional: return only all/private/public bookmarks. * - * @return array filtered bookmarks. + * @return Bookmark[] filtered bookmarks. */ - public function filterTags($tags, $casesensitive = false, $visibility = 'all') + public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all') { // get single tags (we may get passed an array, even though the docs say different) $inputTags = $tags; @@ -396,9 +408,9 @@ class BookmarkFilter * * @param string $visibility return only all/private/public bookmarks. * - * @return array filtered bookmarks. + * @return Bookmark[] filtered bookmarks. */ - public function filterUntagged($visibility) + public function filterUntagged(string $visibility) { $filtered = []; foreach ($this->bookmarks as $key => $link) { @@ -425,21 +437,26 @@ class BookmarkFilter * print_r($mydb->filterDay('20120125')); * * @param string $day day to filter. - * - * @return array all link matching given day. + * @param string $visibility return only all/private/public bookmarks. + + * @return Bookmark[] all link matching given day. * * @throws Exception if date format is invalid. */ - public function filterDay($day) + public function filterDay(string $day, string $visibility) { if (!checkDateFormat('Ymd', $day)) { throw new Exception('Invalid date format'); } $filtered = []; - foreach ($this->bookmarks as $key => $l) { - if ($l->getCreated()->format('Ymd') == $day) { - $filtered[$key] = $l; + foreach ($this->bookmarks as $key => $bookmark) { + if ($visibility === static::$PUBLIC && $bookmark->isPrivate()) { + continue; + } + + if ($bookmark->getCreated()->format('Ymd') == $day) { + $filtered[$key] = $bookmark; } } @@ -455,9 +472,9 @@ class BookmarkFilter * @param string $tags string containing a list of tags. * @param bool $casesensitive will convert everything to lowercase if false. * - * @return array filtered tags string. + * @return string[] filtered tags string. */ - public static function tagsStrToArray($tags, $casesensitive) + public static function tagsStrToArray(string $tags, bool $casesensitive): array { // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); @@ -465,4 +482,74 @@ class BookmarkFilter return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); } + + /** + * This method finalize the content of the foundPositions array, + * by associated all search results to their associated bookmark field, + * making sure that there is no overlapping results, etc. + * + * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content. + * @param array $foundPositions Positions where the search results were found in the aggregated content. + * + * @return array Updated $foundPositions, by bookmark field. + */ + protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array + { + // Sort results by starting position ASC. + usort($foundPositions, function (array $entryA, array $entryB): int { + return $entryA['start'] > $entryB['start'] ? 1 : -1; + }); + + $out = []; + $currentMax = -1; + foreach ($foundPositions as $foundPosition) { + // we do not allow overlapping highlights + if ($foundPosition['start'] < $currentMax) { + continue; + } + + $currentMax = $foundPosition['end']; + foreach ($fieldLengths as $part => $length) { + if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) { + continue; + } + + $out[$part][] = [ + 'start' => $foundPosition['start'] - $length['start'], + 'end' => $foundPosition['end'] - $length['start'], + ]; + break; + } + } + + return $out; + } + + /** + * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms. + * Also populate $length array with starting and ending positions of every bookmark field + * inside concatenated content. + * + * @param Bookmark $link + * @param array $lengths (by reference) + * + * @return string Lowercase concatenated fields content. + */ + protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string + { + $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\'; + $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\'; + + $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())]; + $nextField = $lengths['title']['end'] + 1; + $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())]; + $nextField = $lengths['description']['end'] + 1; + $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())]; + $nextField = $lengths['url']['end'] + 1; + $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())]; + + return $content; + } }