X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FLinkFilter.php;h=91c79905280851473878059a7e6d8e57dd9b3265;hb=f24896b237e40718fb6eaa2869592eb0855a47fd;hp=ceb47d16c21133e670c20066d126aa23019ed816;hpb=1e7331126d81a5759ab91c221f7e0f164aeebfb5;p=github%2Fshaarli%2FShaarli.git diff --git a/application/LinkFilter.php b/application/LinkFilter.php index ceb47d16..91c79905 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php @@ -1,5 +1,7 @@ filterSmallHash($request); - break; + case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" + $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); + if ($noRequest) { + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } + return $this->noFilter($visibility); + } + if ($untaggedonly) { + $filtered = $this->filterUntagged($visibility); + } else { + $filtered = $this->links; + } + if (!empty($request[0])) { + $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); + } + if (!empty($request[1])) { + $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility); + } + return $filtered; case self::$FILTER_TEXT: - return $this->filterFulltext($request, $privateonly); - break; + return $this->filterFulltext($request, $visibility); case self::$FILTER_TAG: - return $this->filterTags($request, $casesensitive, $privateonly); - break; + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } else { + return $this->filterTags($request, $casesensitive, $visibility); + } case self::$FILTER_DAY: return $this->filterDay($request); - break; default: - return $this->noFilter($privateonly); + return $this->noFilter($visibility); } } /** * Unknown filter, but handle private only. * - * @param bool $privateonly returns private link only if true. + * @param string $visibility Optional: return only all/private/public links * * @return array filtered links. */ - private function noFilter($privateonly = false) + private function noFilter($visibility = 'all') { - if (! $privateonly) { - krsort($this->links); + if ($visibility === 'all') { return $this->links; } $out = array(); - foreach ($this->links as $value) { - if ($value['private']) { - $out[$value['linkdate']] = $value; + foreach ($this->links as $key => $value) { + if ($value['private'] && $visibility === 'private') { + $out[$key] = $value; + } elseif (! $value['private'] && $visibility === 'public') { + $out[$key] = $value; } } - krsort($out); return $out; } @@ -101,17 +133,24 @@ class LinkFilter * @param string $smallHash permalink hash. * * @return array $filtered array containing permalink data. + * + * @throws LinkNotFoundException if the smallhash doesn't match any link. */ private function filterSmallHash($smallHash) { $filtered = array(); - foreach ($this->links as $l) { - if ($smallHash == smallHash($l['linkdate'])) { + foreach ($this->links as $key => $l) { + if ($smallHash == $l['shorturl']) { // Yes, this is ugly and slow - $filtered[$l['linkdate']] = $l; + $filtered[$key] = $l; return $filtered; } } + + if (empty($filtered)) { + throw new LinkNotFoundException(); + } + return $filtered; } @@ -120,7 +159,9 @@ class LinkFilter * * Searches: * - in the URLs, title and description; - * - are case-insensitive. + * - are case-insensitive; + * - terms surrounded by quotes " are exact terms search. + * - terms starting with a dash - are excluded (except exact terms). * * Example: * print_r($mydb->filterFulltext('hollandais')); @@ -130,71 +171,130 @@ class LinkFilter * - see https://github.com/shaarli/Shaarli/issues/75 for examples * * @param string $searchterms search query. - * @param bool $privateonly return only private links if true. + * @param string $visibility Optional: return only all/private/public links. * * @return array search results. */ - private function filterFulltext($searchterms, $privateonly = false) + private function filterFulltext($searchterms, $visibility = 'all') { + if (empty($searchterms)) { + return $this->noFilter($visibility); + } + + $filtered = array(); $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); - $explodedSearch = explode(' ', trim($search)); - $keys = array('title', 'description', 'url', 'tags'); - $found = true; - $searchExactPhrase = false; - - // Check if we're using double-quotes to search for the exact string - if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { - $searchExactPhrase = true; - - // Remove the double-quotes as they are not what we search for - $search = substr($search, 1, -1); + $exactRegex = '/"([^"]+)"/'; + // Retrieve exact search terms. + preg_match_all($exactRegex, $search, $exactSearch); + $exactSearch = array_values(array_filter($exactSearch[1])); + + // Remove exact search terms to get AND terms search. + $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); + $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); + + // Filter excluding terms and update andSearch. + $excludeSearch = array(); + $andSearch = array(); + foreach ($explodedSearchAnd as $needle) { + if ($needle[0] == '-' && strlen($needle) > 1) { + $excludeSearch[] = substr($needle, 1); + } else { + $andSearch[] = $needle; + } } - // Iterate over every stored link. - foreach ($this->links as $link) { + $keys = array('title', 'description', 'url', 'tags'); + + // Iterate over every stored link. + foreach ($this->links as $id => $link) { // ignore non private links when 'privatonly' is on. - if (! $link['private'] && $privateonly === true) { - continue; + if ($visibility !== 'all') { + if (! $link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } } - // Iterate over searchable link fields. + // Concatenate link fields to search across fields. + // Adds a '\' separator for exact search terms. + $content = ''; foreach ($keys as $key) { - // Be optimistic - $found = true; - - // FIXME: Find a better word for where you're searching in - $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); - - // When searching for the phrase, check if it's in the haystack... - if ( $searchExactPhrase && strpos($haystack, $search) !== false) { - break; - } - else { - // Iterate over keywords, if keyword is not found, - // no need to check for the others. We want all or nothing. - foreach($explodedSearch as $keyword) { - if(strpos($haystack, $keyword) === false) { - $found = false; - break; - } - } - } - - // One of the fields of the link matches, no need to check the other. - if ($found) { - break; - } + $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; + } + + // Be optimistic + $found = true; + + // First, we look for exact term search + for ($i = 0; $i < count($exactSearch) && $found; $i++) { + $found = strpos($content, $exactSearch[$i]) !== false; } - + + // Iterate over keywords, if keyword is not found, + // no need to check for the others. We want all or nothing. + for ($i = 0; $i < count($andSearch) && $found; $i++) { + $found = strpos($content, $andSearch[$i]) !== false; + } + + // Exclude terms. + for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + $found = strpos($content, $excludeSearch[$i]) === false; + } + if ($found) { - $filtered[$link['linkdate']] = $link; + $filtered[$id] = $link; } } - krsort($filtered); return $filtered; } + /** + * generate a regex fragment out of a tag + * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard + * @return string generated regex fragment + */ + private static function tag2regex($tag) + { + $len = strlen($tag); + if (!$len || $tag === "-" || $tag === "*") { + // nothing to search, return empty regex + return ''; + } + if ($tag[0] === "-") { + // query is negated + $i = 1; // use offset to start after '-' character + $regex = '(?!'; // create negative lookahead + } else { + $i = 0; // start at first character + $regex = '(?='; // use positive lookahead + } + $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning + // iterate over string, separating it into placeholder and content + for (; $i < $len; $i++) { + if ($tag[$i] === '*') { + // placeholder found + $regex .= '[^ ]*?'; + } else { + // regular characters + $offset = strpos($tag, '*', $i); + if ($offset === false) { + // no placeholder found, set offset to end of string + $offset = $len; + } + // subtract one, as we want to get before the placeholder or end of string + $offset -= 1; + // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. + $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); + // move $i on + $i = $offset; + } + } + $regex .= '(?:$| ))'; // after the tag may only be a space or the end + return $regex; + } + /** * Returns the list of links associated with a given list of tags * @@ -203,42 +303,94 @@ class LinkFilter * * @param string $tags list of tags separated by commas or blank spaces. * @param bool $casesensitive ignore case if false. - * @param bool $privateonly returns private links only. + * @param string $visibility Optional: return only all/private/public links. * * @return array filtered links. */ - public function filterTags($tags, $casesensitive = false, $privateonly = false) + public function filterTags($tags, $casesensitive = false, $visibility = 'all') { - $searchtags = self::tagsStrToArray($tags, $casesensitive); - $filtered = array(); - if (empty($searchtags)) { - return $filtered; + // get single tags (we may get passed an array, even though the docs say different) + $inputTags = $tags; + if (!is_array($tags)) { + // we got an input string, split tags + $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); } - foreach ($this->links as $link) { - // ignore non private links when 'privatonly' is on. - if (! $link['private'] && $privateonly === true) { + if (!count($inputTags)) { + // no input tags + return $this->noFilter($visibility); + } + + // build regex from all tags + $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + if (!$casesensitive) { + // make regex case insensitive + $re .= 'i'; + } + + // create resulting array + $filtered = array(); + + // iterate over each link + foreach ($this->links as $key => $link) { + // check level of visibility + // ignore non private links when 'privateonly' is on. + if ($visibility !== 'all') { + if (! $link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + $search = $link['tags']; // build search string, start with tags of current link + if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) { + // description given and at least one possible tag found + $descTags = array(); + // find all tags in the form of #tag in the description + preg_match_all( + '/(?links as $key => $link) { + if ($visibility !== 'all') { + if (! $link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; } } - if ($found) { - $filtered[$link['linkdate']] = $link; + if (empty(trim($link['tags']))) { + $filtered[$key] = $link; } } - krsort($filtered); + return $filtered; } @@ -261,13 +413,14 @@ class LinkFilter } $filtered = array(); - foreach ($this->links as $l) { - if (startsWith($l['linkdate'], $day)) { - $filtered[$l['linkdate']] = $l; + foreach ($this->links as $key => $l) { + if ($l['created']->format('Ymd') == $day) { + $filtered[$key] = $l; } } - ksort($filtered); - return $filtered; + + // sort by date ASC + return array_reverse($filtered, true); } /** @@ -279,13 +432,24 @@ class LinkFilter * @param bool $casesensitive will convert everything to lowercase if false. * * @return array filtered tags string. - */ + */ public static function tagsStrToArray($tags, $casesensitive) { // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); $tagsOut = str_replace(',', ' ', $tagsOut); - return array_filter(explode(' ', trim($tagsOut)), 'strlen'); + return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); + } +} + +class LinkNotFoundException extends Exception +{ + /** + * LinkNotFoundException constructor. + */ + public function __construct() + { + $this->message = t('The link you are trying to reach does not exist or has been deleted.'); } }