X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FLinkFilter.php;h=8f147974e9d5cf232f3cf3f509b3401fcf5e6b84;hb=1004742f09b55ff781c13745781b9a7e90986faa;hp=e693b28428ba3951d2a6898b75941e0c5bd50572;hpb=92a381f51737de1e4a03c482fe7c43da311ad556;p=github%2Fshaarli%2FShaarli.git diff --git a/application/LinkFilter.php b/application/LinkFilter.php index e693b284..8f147974 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php @@ -28,12 +28,17 @@ class LinkFilter public static $FILTER_DAY = 'FILTER_DAY'; /** - * @var array all available links. + * @var string Allowed characters for hashtags (regex syntax). + */ + public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; + + /** + * @var LinkDB all available links. */ private $links; /** - * @param array $links initialization. + * @param LinkDB $links initialization. */ public function __construct($links) { @@ -46,61 +51,77 @@ class LinkFilter * @param string $type Type of filter (eg. tags, permalink, etc.). * @param mixed $request Filter content. * @param bool $casesensitive Optional: Perform case sensitive filter if true. - * @param bool $privateonly Optional: Only returns private links if true. + * @param string $visibility Optional: return only all/private/public links + * @param string $untaggedonly Optional: return only untagged links. Applies only if $type includes FILTER_TAG * * @return array filtered link list. */ - public function filter($type, $request, $casesensitive = false, $privateonly = false) + public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) { - switch($type) { + if (! in_array($visibility, ['all', 'public', 'private'])) { + $visibility = 'all'; + } + + switch ($type) { case self::$FILTER_HASH: return $this->filterSmallHash($request); - case self::$FILTER_TAG | self::$FILTER_TEXT: - if (!empty($request)) { - $filtered = $this->links; - if (isset($request[0])) { - $filtered = $this->filterTags($request[0], $casesensitive, $privateonly); + case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" + $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); + if ($noRequest) { + if ($untaggedonly) { + return $this->filterUntagged($visibility); } - if (isset($request[1])) { - $lf = new LinkFilter($filtered); - $filtered = $lf->filterFulltext($request[1], $privateonly); - } - return $filtered; + return $this->noFilter($visibility); } - return $this->noFilter($privateonly); + if ($untaggedonly) { + $filtered = $this->filterUntagged($visibility); + } else { + $filtered = $this->links; + } + if (!empty($request[0])) { + $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); + } + if (!empty($request[1])) { + $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility); + } + return $filtered; case self::$FILTER_TEXT: - return $this->filterFulltext($request, $privateonly); + return $this->filterFulltext($request, $visibility); case self::$FILTER_TAG: - return $this->filterTags($request, $casesensitive, $privateonly); + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } else { + return $this->filterTags($request, $casesensitive, $visibility); + } case self::$FILTER_DAY: return $this->filterDay($request); default: - return $this->noFilter($privateonly); + return $this->noFilter($visibility); } } /** * Unknown filter, but handle private only. * - * @param bool $privateonly returns private link only if true. + * @param string $visibility Optional: return only all/private/public links * * @return array filtered links. */ - private function noFilter($privateonly = false) + private function noFilter($visibility = 'all') { - if (! $privateonly) { - krsort($this->links); + if ($visibility === 'all') { return $this->links; } $out = array(); - foreach ($this->links as $value) { - if ($value['private']) { - $out[$value['linkdate']] = $value; + foreach ($this->links as $key => $value) { + if ($value['private'] && $visibility === 'private') { + $out[$key] = $value; + } elseif (! $value['private'] && $visibility === 'public') { + $out[$key] = $value; } } - krsort($out); return $out; } @@ -116,10 +137,10 @@ class LinkFilter private function filterSmallHash($smallHash) { $filtered = array(); - foreach ($this->links as $l) { - if ($smallHash == smallHash($l['linkdate'])) { + foreach ($this->links as $key => $l) { + if ($smallHash == $l['shorturl']) { // Yes, this is ugly and slow - $filtered[$l['linkdate']] = $l; + $filtered[$key] = $l; return $filtered; } } @@ -148,14 +169,14 @@ class LinkFilter * - see https://github.com/shaarli/Shaarli/issues/75 for examples * * @param string $searchterms search query. - * @param bool $privateonly return only private links if true. + * @param string $visibility Optional: return only all/private/public links. * * @return array search results. */ - private function filterFulltext($searchterms, $privateonly = false) + private function filterFulltext($searchterms, $visibility = 'all') { if (empty($searchterms)) { - return $this->links; + return $this->noFilter($visibility); } $filtered = array(); @@ -183,11 +204,14 @@ class LinkFilter $keys = array('title', 'description', 'url', 'tags'); // Iterate over every stored link. - foreach ($this->links as $link) { - + foreach ($this->links as $id => $link) { // ignore non private links when 'privatonly' is on. - if (! $link['private'] && $privateonly === true) { - continue; + if ($visibility !== 'all') { + if (! $link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } } // Concatenate link fields to search across fields. @@ -217,14 +241,58 @@ class LinkFilter } if ($found) { - $filtered[$link['linkdate']] = $link; + $filtered[$id] = $link; } } - krsort($filtered); return $filtered; } + /** + * generate a regex fragment out of a tag + * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard + * @return string generated regex fragment + */ + private static function tag2regex($tag) + { + $len = strlen($tag); + if (!$len || $tag === "-" || $tag === "*") { + // nothing to search, return empty regex + return ''; + } + if ($tag[0] === "-") { + // query is negated + $i = 1; // use offset to start after '-' character + $regex = '(?!'; // create negative lookahead + } else { + $i = 0; // start at first character + $regex = '(?='; // use positive lookahead + } + $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning + // iterate over string, separating it into placeholder and content + for (; $i < $len; $i++) { + if ($tag[$i] === '*') { + // placeholder found + $regex .= '[^ ]*?'; + } else { + // regular characters + $offset = strpos($tag, '*', $i); + if ($offset === false) { + // no placeholder found, set offset to end of string + $offset = $len; + } + // subtract one, as we want to get before the placeholder or end of string + $offset -= 1; + // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. + $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); + // move $i on + $i = $offset; + } + } + $regex .= '(?:$| ))'; // after the tag may only be a space or the end + return $regex; + } + /** * Returns the list of links associated with a given list of tags * @@ -233,48 +301,94 @@ class LinkFilter * * @param string $tags list of tags separated by commas or blank spaces. * @param bool $casesensitive ignore case if false. - * @param bool $privateonly returns private links only. + * @param string $visibility Optional: return only all/private/public links. * * @return array filtered links. */ - public function filterTags($tags, $casesensitive = false, $privateonly = false) + public function filterTags($tags, $casesensitive = false, $visibility = 'all') { - // Implode if array for clean up. - $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; - if (empty($tags)) { - return $this->links; + // get single tags (we may get passed an array, even though the docs say different) + $inputTags = $tags; + if (!is_array($tags)) { + // we got an input string, split tags + $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); } - $searchtags = self::tagsStrToArray($tags, $casesensitive); - $filtered = array(); - if (empty($searchtags)) { - return $filtered; + if (!count($inputTags)) { + // no input tags + return $this->noFilter($visibility); } - foreach ($this->links as $link) { - // ignore non private links when 'privatonly' is on. - if (! $link['private'] && $privateonly === true) { + // build regex from all tags + $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + if (!$casesensitive) { + // make regex case insensitive + $re .= 'i'; + } + + // create resulting array + $filtered = array(); + + // iterate over each link + foreach ($this->links as $key => $link) { + // check level of visibility + // ignore non private links when 'privateonly' is on. + if ($visibility !== 'all') { + if (! $link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + $search = $link['tags']; // build search string, start with tags of current link + if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) { + // description given and at least one possible tag found + $descTags = array(); + // find all tags in the form of #tag in the description + preg_match_all( + '/(?links as $key => $link) { + if ($visibility !== 'all') { + if (! $link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; } } - if ($found) { - $filtered[$link['linkdate']] = $link; + if (empty(trim($link['tags']))) { + $filtered[$key] = $link; } } - krsort($filtered); + return $filtered; } @@ -297,13 +411,14 @@ class LinkFilter } $filtered = array(); - foreach ($this->links as $l) { - if (startsWith($l['linkdate'], $day)) { - $filtered[$l['linkdate']] = $l; + foreach ($this->links as $key => $l) { + if ($l['created']->format('Ymd') == $day) { + $filtered[$key] = $l; } } - ksort($filtered); - return $filtered; + + // sort by date ASC + return array_reverse($filtered, true); } /** @@ -315,18 +430,24 @@ class LinkFilter * @param bool $casesensitive will convert everything to lowercase if false. * * @return array filtered tags string. - */ + */ public static function tagsStrToArray($tags, $casesensitive) { // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); $tagsOut = str_replace(',', ' ', $tagsOut); - return array_values(array_filter(explode(' ', trim($tagsOut)), 'strlen')); + return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); } } class LinkNotFoundException extends Exception { - protected $message = 'The link you are trying to reach does not exist or has been deleted.'; + /** + * LinkNotFoundException constructor. + */ + public function __construct() + { + $this->message = t('The link you are trying to reach does not exist or has been deleted.'); + } }