X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=application%2FLinkFilter.php;h=daa6d9cc26a8ed139f34581ff619d98969415073;hb=d592daea8343bb4dfecff5d97e93699581ccc58c;hp=ceb47d16c21133e670c20066d126aa23019ed816;hpb=21979ff11ceee0042642ac17147858a4155d54c5;p=github%2Fshaarli%2FShaarli.git diff --git a/application/LinkFilter.php b/application/LinkFilter.php index ceb47d16..daa6d9cc 100644 --- a/application/LinkFilter.php +++ b/application/LinkFilter.php @@ -28,12 +28,17 @@ class LinkFilter public static $FILTER_DAY = 'FILTER_DAY'; /** - * @var array all available links. + * @var string Allowed characters for hashtags (regex syntax). + */ + public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; + + /** + * @var LinkDB all available links. */ private $links; /** - * @param array $links initialization. + * @param LinkDB $links initialization. */ public function __construct($links) { @@ -44,7 +49,7 @@ class LinkFilter * Filter links according to parameters. * * @param string $type Type of filter (eg. tags, permalink, etc.). - * @param string $request Filter content. + * @param mixed $request Filter content. * @param bool $casesensitive Optional: Perform case sensitive filter if true. * @param bool $privateonly Optional: Only returns private links if true. * @@ -55,16 +60,25 @@ class LinkFilter switch($type) { case self::$FILTER_HASH: return $this->filterSmallHash($request); - break; + case self::$FILTER_TAG | self::$FILTER_TEXT: + if (!empty($request)) { + $filtered = $this->links; + if (isset($request[0])) { + $filtered = $this->filterTags($request[0], $casesensitive, $privateonly); + } + if (isset($request[1])) { + $lf = new LinkFilter($filtered); + $filtered = $lf->filterFulltext($request[1], $privateonly); + } + return $filtered; + } + return $this->noFilter($privateonly); case self::$FILTER_TEXT: return $this->filterFulltext($request, $privateonly); - break; case self::$FILTER_TAG: return $this->filterTags($request, $casesensitive, $privateonly); - break; case self::$FILTER_DAY: return $this->filterDay($request); - break; default: return $this->noFilter($privateonly); } @@ -80,18 +94,16 @@ class LinkFilter private function noFilter($privateonly = false) { if (! $privateonly) { - krsort($this->links); return $this->links; } $out = array(); - foreach ($this->links as $value) { + foreach ($this->links as $key => $value) { if ($value['private']) { - $out[$value['linkdate']] = $value; + $out[$key] = $value; } } - krsort($out); return $out; } @@ -101,17 +113,24 @@ class LinkFilter * @param string $smallHash permalink hash. * * @return array $filtered array containing permalink data. + * + * @throws LinkNotFoundException if the smallhash doesn't match any link. */ private function filterSmallHash($smallHash) { $filtered = array(); - foreach ($this->links as $l) { - if ($smallHash == smallHash($l['linkdate'])) { + foreach ($this->links as $key => $l) { + if ($smallHash == $l['shorturl']) { // Yes, this is ugly and slow - $filtered[$l['linkdate']] = $l; + $filtered[$key] = $l; return $filtered; } } + + if (empty($filtered)) { + throw new LinkNotFoundException(); + } + return $filtered; } @@ -120,7 +139,9 @@ class LinkFilter * * Searches: * - in the URLs, title and description; - * - are case-insensitive. + * - are case-insensitive; + * - terms surrounded by quotes " are exact terms search. + * - terms starting with a dash - are excluded (except exact terms). * * Example: * print_r($mydb->filterFulltext('hollandais')); @@ -136,62 +157,73 @@ class LinkFilter */ private function filterFulltext($searchterms, $privateonly = false) { + if (empty($searchterms)) { + return $this->links; + } + + $filtered = array(); $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); - $explodedSearch = explode(' ', trim($search)); - $keys = array('title', 'description', 'url', 'tags'); - $found = true; - $searchExactPhrase = false; - - // Check if we're using double-quotes to search for the exact string - if ($search[0] == '"' && $search[strlen($search) - 1] == '"') { - $searchExactPhrase = true; - - // Remove the double-quotes as they are not what we search for - $search = substr($search, 1, -1); + $exactRegex = '/"([^"]+)"/'; + // Retrieve exact search terms. + preg_match_all($exactRegex, $search, $exactSearch); + $exactSearch = array_values(array_filter($exactSearch[1])); + + // Remove exact search terms to get AND terms search. + $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); + $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); + + // Filter excluding terms and update andSearch. + $excludeSearch = array(); + $andSearch = array(); + foreach ($explodedSearchAnd as $needle) { + if ($needle[0] == '-' && strlen($needle) > 1) { + $excludeSearch[] = substr($needle, 1); + } else { + $andSearch[] = $needle; + } } + + $keys = array('title', 'description', 'url', 'tags'); + // Iterate over every stored link. - foreach ($this->links as $link) { + foreach ($this->links as $id => $link) { // ignore non private links when 'privatonly' is on. if (! $link['private'] && $privateonly === true) { continue; } - // Iterate over searchable link fields. + // Concatenate link fields to search across fields. + // Adds a '\' separator for exact search terms. + $content = ''; foreach ($keys as $key) { - // Be optimistic - $found = true; - - // FIXME: Find a better word for where you're searching in - $haystack = mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8'); - - // When searching for the phrase, check if it's in the haystack... - if ( $searchExactPhrase && strpos($haystack, $search) !== false) { - break; - } - else { - // Iterate over keywords, if keyword is not found, - // no need to check for the others. We want all or nothing. - foreach($explodedSearch as $keyword) { - if(strpos($haystack, $keyword) === false) { - $found = false; - break; - } - } - } - - // One of the fields of the link matches, no need to check the other. - if ($found) { - break; - } + $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; + } + + // Be optimistic + $found = true; + + // First, we look for exact term search + for ($i = 0; $i < count($exactSearch) && $found; $i++) { + $found = strpos($content, $exactSearch[$i]) !== false; + } + + // Iterate over keywords, if keyword is not found, + // no need to check for the others. We want all or nothing. + for ($i = 0; $i < count($andSearch) && $found; $i++) { + $found = strpos($content, $andSearch[$i]) !== false; + } + + // Exclude terms. + for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + $found = strpos($content, $excludeSearch[$i]) === false; } - + if ($found) { - $filtered[$link['linkdate']] = $link; + $filtered[$id] = $link; } } - krsort($filtered); return $filtered; } @@ -209,13 +241,19 @@ class LinkFilter */ public function filterTags($tags, $casesensitive = false, $privateonly = false) { + // Implode if array for clean up. + $tags = is_array($tags) ? trim(implode(' ', $tags)) : $tags; + if (empty($tags)) { + return $this->links; + } + $searchtags = self::tagsStrToArray($tags, $casesensitive); $filtered = array(); if (empty($searchtags)) { return $filtered; } - foreach ($this->links as $link) { + foreach ($this->links as $key => $link) { // ignore non private links when 'privatonly' is on. if (! $link['private'] && $privateonly === true) { continue; @@ -227,18 +265,19 @@ class LinkFilter for ($i = 0 ; $i < count($searchtags) && $found; $i++) { // Exclusive search, quit if tag found. // Or, tag not found in the link, quit. - if (($searchtags[$i][0] == '-' && in_array(substr($searchtags[$i], 1), $linktags)) - || ($searchtags[$i][0] != '-') && ! in_array($searchtags[$i], $linktags) + if (($searchtags[$i][0] == '-' + && $this->searchTagAndHashTag(substr($searchtags[$i], 1), $linktags, $link['description'])) + || ($searchtags[$i][0] != '-') + && ! $this->searchTagAndHashTag($searchtags[$i], $linktags, $link['description']) ) { $found = false; } } if ($found) { - $filtered[$link['linkdate']] = $link; + $filtered[$key] = $link; } } - krsort($filtered); return $filtered; } @@ -261,13 +300,36 @@ class LinkFilter } $filtered = array(); - foreach ($this->links as $l) { - if (startsWith($l['linkdate'], $day)) { - $filtered[$l['linkdate']] = $l; + foreach ($this->links as $key => $l) { + if ($l['created']->format('Ymd') == $day) { + $filtered[$key] = $l; } } - ksort($filtered); - return $filtered; + + // sort by date ASC + return array_reverse($filtered, true); + } + + /** + * Check if a tag is found in the taglist, or as an hashtag in the link description. + * + * @param string $tag Tag to search. + * @param array $taglist List of tags for the current link. + * @param string $description Link description. + * + * @return bool True if found, false otherwise. + */ + protected function searchTagAndHashTag($tag, $taglist, $description) + { + if (in_array($tag, $taglist)) { + return true; + } + + if (preg_match('/(^| )#'. $tag .'([^'. self::$HASHTAG_CHARS .']|$)/mui', $description) > 0) { + return true; + } + + return false; } /** @@ -286,6 +348,11 @@ class LinkFilter $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); $tagsOut = str_replace(',', ' ', $tagsOut); - return array_filter(explode(' ', trim($tagsOut)), 'strlen'); + return array_values(array_filter(explode(' ', trim($tagsOut)), 'strlen')); } } + +class LinkNotFoundException extends Exception +{ + protected $message = 'The link you are trying to reach does not exist or has been deleted.'; +}