3 declare(strict_types
=1);
5 namespace Shaarli\Bookmark
;
8 use Shaarli\Bookmark\Exception\BookmarkNotFoundException
;
9 use Shaarli\Config\ConfigManager
;
14 * Perform search and filter operation on link data list.
19 * @var string permalinks.
21 public static $FILTER_HASH = 'permalink';
24 * @var string text search.
26 public static $FILTER_TEXT = 'fulltext';
29 * @var string tag filter.
31 public static $FILTER_TAG = 'tags';
34 * @var string filter by day.
36 public static $FILTER_DAY = 'FILTER_DAY';
39 * @var string filter by day.
41 public static $DEFAULT = 'NO_FILTER';
43 /** @var string Visibility: all */
44 public static $ALL = 'all';
46 /** @var string Visibility: public */
47 public static $PUBLIC = 'public';
49 /** @var string Visibility: private */
50 public static $PRIVATE = 'private';
53 * @var string Allowed characters for hashtags (regex syntax).
55 public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
58 * @var Bookmark[] all available bookmarks.
62 /** @var ConfigManager */
66 * @param Bookmark[] $bookmarks initialization.
68 public function __construct($bookmarks, ConfigManager
$conf)
70 $this->bookmarks
= $bookmarks;
75 * Filter bookmarks according to parameters.
77 * @param string $type Type of filter (eg. tags, permalink, etc.).
78 * @param mixed $request Filter content.
79 * @param bool $casesensitive Optional: Perform case sensitive filter if true.
80 * @param string $visibility Optional: return only all/private/public bookmarks
81 * @param bool $untaggedonly Optional: return only untagged bookmarks. Applies only if $type includes FILTER_TAG
83 * @return Bookmark[] filtered bookmark list.
85 * @throws BookmarkNotFoundException
87 public function filter(
90 bool $casesensitive = false,
91 string $visibility = 'all',
92 bool $untaggedonly = false
94 if (!in_array($visibility, ['all', 'public', 'private'])) {
99 case self
::$FILTER_HASH:
100 return $this->filterSmallHash($request);
101 case self
::$FILTER_TAG | self
::$FILTER_TEXT: // == "vuotext"
102 $noRequest = empty($request) || (empty($request[0]) && empty($request[1]));
105 return $this->filterUntagged($visibility);
107 return $this->noFilter($visibility);
110 $filtered = $this->filterUntagged($visibility);
112 $filtered = $this->bookmarks
;
114 if (!empty($request[0])) {
115 $filtered = (new BookmarkFilter($filtered, $this->conf
))
116 ->filterTags($request[0], $casesensitive, $visibility)
119 if (!empty($request[1])) {
120 $filtered = (new BookmarkFilter($filtered, $this->conf
))
121 ->filterFulltext($request[1], $visibility)
125 case self
::$FILTER_TEXT:
126 return $this->filterFulltext($request, $visibility);
127 case self
::$FILTER_TAG:
129 return $this->filterUntagged($visibility);
131 return $this->filterTags($request, $casesensitive, $visibility);
133 case self
::$FILTER_DAY:
134 return $this->filterDay($request, $visibility);
136 return $this->noFilter($visibility);
141 * Unknown filter, but handle private only.
143 * @param string $visibility Optional: return only all/private/public bookmarks
145 * @return Bookmark[] filtered bookmarks.
147 private function noFilter(string $visibility = 'all')
149 if ($visibility === 'all') {
150 return $this->bookmarks
;
154 foreach ($this->bookmarks
as $key => $value) {
155 if ($value->isPrivate() && $visibility === 'private') {
157 } elseif (!$value->isPrivate() && $visibility === 'public') {
166 * Returns the shaare corresponding to a smallHash.
168 * @param string $smallHash permalink hash.
170 * @return Bookmark[] $filtered array containing permalink data.
172 * @throws BookmarkNotFoundException if the smallhash doesn't match any link.
174 private function filterSmallHash(string $smallHash)
176 foreach ($this->bookmarks
as $key => $l) {
177 if ($smallHash == $l->getShortUrl()) {
178 // Yes, this is ugly and slow
183 throw new BookmarkNotFoundException();
187 * Returns the list of bookmarks corresponding to a full-text search
190 * - in the URLs, title and description;
191 * - are case-insensitive;
192 * - terms surrounded by quotes " are exact terms search.
193 * - terms starting with a dash - are excluded (except exact terms).
196 * print_r($mydb->filterFulltext('hollandais'));
198 * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8')
199 * - allows to perform searches on Unicode text
200 * - see https://github.com/shaarli/Shaarli/issues/75 for examples
202 * @param string $searchterms search query.
203 * @param string $visibility Optional: return only all/private/public bookmarks.
205 * @return Bookmark[] search results.
207 private function filterFulltext(string $searchterms, string $visibility = 'all')
209 if (empty($searchterms)) {
210 return $this->noFilter($visibility);
214 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER
, 'UTF-8');
215 $exactRegex = '/"([^"]+)"/';
216 // Retrieve exact search terms.
217 preg_match_all($exactRegex, $search, $exactSearch);
218 $exactSearch = array_values(array_filter($exactSearch[1]));
220 // Remove exact search terms to get AND terms search.
221 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
222 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
224 // Filter excluding terms and update andSearch.
227 foreach ($explodedSearchAnd as $needle) {
228 if ($needle[0] == '-' && strlen($needle) > 1) {
229 $excludeSearch[] = substr($needle, 1);
231 $andSearch[] = $needle;
235 // Iterate over every stored link.
236 foreach ($this->bookmarks
as $id => $link) {
237 // ignore non private bookmarks when 'privatonly' is on.
238 if ($visibility !== 'all') {
239 if (!$link->isPrivate() && $visibility === 'private') {
241 } elseif ($link->isPrivate() && $visibility === 'public') {
247 $content = $this->buildFullTextSearchableLink($link, $lengths);
251 $foundPositions = [];
253 // First, we look for exact term search
254 // Then iterate over keywords, if keyword is not found,
255 // no need to check for the others. We want all or nothing.
256 foreach ([$exactSearch, $andSearch] as $search) {
257 for ($i = 0; $i < count($search) && $found !== false; $i++
) {
258 $found = mb_strpos($content, $search[$i]);
259 if ($found === false) {
263 $foundPositions[] = ['start' => $found, 'end' => $found +
mb_strlen($search[$i])];
268 for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++
) {
269 $found = strpos($content, $excludeSearch[$i]) === false;
272 if ($found !== false) {
273 $link->addAdditionalContentEntry(
275 $this->postProcessFoundPositions($lengths, $foundPositions)
278 $filtered[$id] = $link;
286 * generate a regex fragment out of a tag
288 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
290 * @return string generated regex fragment
292 protected function tag2regex(string $tag): string
294 $tagsSeparator = $this->conf
->get('general.tags_separator', ' ');
296 if (!$len || $tag === "-" || $tag === "*") {
297 // nothing to search, return empty regex
300 if ($tag[0] === "-") {
302 $i = 1; // use offset to start after '-' character
303 $regex = '(?!'; // create negative lookahead
305 $i = 0; // start at first character
306 $regex = '(?='; // use positive lookahead
308 // before tag may only be the separator or the beginning
309 $regex .= '.*(?:^|' . $tagsSeparator . ')';
310 // iterate over string, separating it into placeholder and content
311 for (; $i < $len; $i++
) {
312 if ($tag[$i] === '*') {
314 $regex .= '[^' . $tagsSeparator . ']*?';
316 // regular characters
317 $offset = strpos($tag, '*', $i);
318 if ($offset === false) {
319 // no placeholder found, set offset to end of string
322 // subtract one, as we want to get before the placeholder or end of string
324 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
325 $regex .= preg_quote(substr($tag, $i, $offset - $i +
1), '/');
330 // after the tag may only be the separator or the end
331 $regex .= '(?:$|' . $tagsSeparator . '))';
336 * Returns the list of bookmarks associated with a given list of tags
338 * You can specify one or more tags, separated by space or a comma, e.g.
339 * print_r($mydb->filterTags('linux programming
'));
341 * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string.
342 * @param bool $casesensitive ignore case if false.
343 * @param string $visibility Optional: return only all/private/public bookmarks.
345 * @return Bookmark[] filtered bookmarks.
347 public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all
')
349 $tagsSeparator = $this->conf->get('general
.tags_separator
', ' ');
350 // get single tags (we may get passed an array, even though the docs say different)
352 if (!is_array($tags)) {
353 // we got an input string, split tags
354 $inputTags = tags_str2array($inputTags, $tagsSeparator);
357 if (count($inputTags) === 0) {
359 return $this->noFilter($visibility);
362 // If we only have public visibility, we can't look
for hidden tags
363 if ($visibility === self
::$PUBLIC) {
364 $inputTags = array_values(array_filter($inputTags, function ($tag) {
365 return ! startsWith($tag, '.');
368 if (empty($inputTags)) {
373 // build regex from all tags
374 $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
375 if (!$casesensitive) {
376 // make regex case insensitive
380 // create resulting array
383 // iterate over each link
384 foreach ($this->bookmarks
as $key => $link) {
385 // check level of visibility
386 // ignore non private bookmarks when 'privateonly' is on.
387 if ($visibility !== 'all') {
388 if (!$link->isPrivate() && $visibility === 'private') {
390 } elseif ($link->isPrivate() && $visibility === 'public') {
394 // build search string, start with tags of current link
395 $search = $link->getTagsString($tagsSeparator);
396 if (strlen(trim($link->getDescription())) && strpos($link->getDescription(), '#') !== false) {
397 // description given and at least one possible tag found
399 // find all tags in the form of #tag in the description
401 '/(?<![' . self
::$HASHTAG_CHARS . '])#([' . self
::$HASHTAG_CHARS . ']+?)\b/sm',
402 $link->getDescription(),
405 if (count($descTags[1])) {
406 // there were some tags in the description, add them to the search string
407 $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator);
410 // match regular expression with search string
411 if (!preg_match($re, $search)) {
412 // this entry does _not_ match our regex
415 $filtered[$key] = $link;
421 * Return only bookmarks without any tag.
423 * @param string $visibility return only all/private/public bookmarks.
425 * @return Bookmark[] filtered bookmarks.
427 public function filterUntagged(string $visibility)
430 foreach ($this->bookmarks
as $key => $link) {
431 if ($visibility !== 'all') {
432 if (!$link->isPrivate() && $visibility === 'private') {
434 } elseif ($link->isPrivate() && $visibility === 'public') {
439 if (empty($link->getTags())) {
440 $filtered[$key] = $link;
448 * Returns the list of articles for a given day, chronologically sorted
450 * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g.
451 * print_r($mydb->filterDay('20120125'));
453 * @param string $day day to filter.
454 * @param string $visibility return only all/private/public bookmarks.
456 * @return Bookmark[] all link matching given day.
458 * @throws Exception if date format is invalid.
460 public function filterDay(string $day, string $visibility)
462 if (!checkDateFormat('Ymd', $day)) {
463 throw new Exception('Invalid date format');
467 foreach ($this->bookmarks
as $key => $bookmark) {
468 if ($visibility === static::$PUBLIC && $bookmark->isPrivate()) {
472 if ($bookmark->getCreated()->format('Ymd') == $day) {
473 $filtered[$key] = $bookmark;
478 return array_reverse($filtered, true);
482 * Convert a list of tags (str) to an array. Also
483 * - handle case sensitivity.
484 * - accepts spaces commas as separator.
486 * @param string $tags string containing a list of tags.
487 * @param bool $casesensitive will convert everything to lowercase if false.
489 * @return string[] filtered tags string.
491 public static function tagsStrToArray(string $tags, bool $casesensitive): array
493 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
494 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER
, 'UTF-8');
495 $tagsOut = str_replace(',', ' ', $tagsOut);
497 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY
);
501 * This method finalize the content of the foundPositions array,
502 * by associated all search results to their associated bookmark field,
503 * making sure that there is no overlapping results, etc.
505 * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content.
506 * @param array $foundPositions Positions where the search results were found in the aggregated content.
508 * @return array Updated $foundPositions, by bookmark field.
510 protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
512 // Sort results by starting position ASC.
513 usort($foundPositions, function (array $entryA, array $entryB): int {
514 return $entryA['start'] > $entryB['start'] ? 1 : -1;
519 foreach ($foundPositions as $foundPosition) {
520 // we do not allow overlapping highlights
521 if ($foundPosition['start'] < $currentMax) {
525 $currentMax = $foundPosition['end'];
526 foreach ($fieldLengths as $part => $length) {
527 if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) {
532 'start' => $foundPosition['start'] - $length['start'],
533 'end' => $foundPosition['end'] - $length['start'],
543 * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms.
544 * Also populate $length array with starting and ending positions of every bookmark field
545 * inside concatenated content.
547 * @param Bookmark $link
548 * @param array $lengths (by reference)
550 * @return string Lowercase concatenated fields content.
552 protected function buildFullTextSearchableLink(Bookmark
$link, array &$lengths): string
554 $tagString = $link->getTagsString($this->conf
->get('general.tags_separator', ' '));
555 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER
, 'UTF-8') . '\\';
556 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER
, 'UTF-8') . '\\';
557 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER
, 'UTF-8') . '\\';
558 $content .= mb_convert_case($tagString, MB_CASE_LOWER
, 'UTF-8') . '\\';
560 $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
561 $nextField = $lengths['title']['end'] +
1;
562 $lengths['description'] = ['start' => $nextField, 'end' => $nextField +
mb_strlen($link->getDescription())];
563 $nextField = $lengths['description']['end'] +
1;
564 $lengths['url'] = ['start' => $nextField, 'end' => $nextField +
mb_strlen($link->getUrl())];
565 $nextField = $lengths['url']['end'] +
1;
566 $lengths['tags'] = ['start' => $nextField, 'end' => $nextField +
mb_strlen($tagString)];