3 declare(strict_types
=1);
5 namespace Shaarli\Bookmark
;
7 use Shaarli\Bookmark\Exception\BookmarkNotFoundException
;
8 use Shaarli\Config\ConfigManager
;
9 use Shaarli\Plugin\PluginManager
;
14 * Perform search and filter operation on link data list.
19 * @var string permalinks.
21 public static $FILTER_HASH = 'permalink';
24 * @var string text search.
26 public static $FILTER_TEXT = 'fulltext';
29 * @var string tag filter.
31 public static $FILTER_TAG = 'tags';
34 * @var string filter by day.
36 public static $DEFAULT = 'NO_FILTER';
38 /** @var string Visibility: all */
39 public static $ALL = 'all';
41 /** @var string Visibility: public */
42 public static $PUBLIC = 'public';
44 /** @var string Visibility: private */
45 public static $PRIVATE = 'private';
48 * @var string Allowed characters for hashtags (regex syntax).
50 public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
53 * @var Bookmark[] all available bookmarks.
57 /** @var ConfigManager */
60 /** @var PluginManager */
61 protected $pluginManager;
64 * @param Bookmark[] $bookmarks initialization.
66 public function __construct($bookmarks, ConfigManager
$conf, PluginManager
$pluginManager)
68 $this->bookmarks
= $bookmarks;
70 $this->pluginManager
= $pluginManager;
74 * Filter bookmarks according to parameters.
76 * @param string $type Type of filter (eg. tags, permalink, etc.).
77 * @param mixed $request Filter content.
78 * @param bool $casesensitive Optional: Perform case sensitive filter if true.
79 * @param string $visibility Optional: return only all/private/public bookmarks
80 * @param bool $untaggedonly Optional: return only untagged bookmarks. Applies only if $type includes FILTER_TAG
82 * @return Bookmark[] filtered bookmark list.
84 * @throws BookmarkNotFoundException
86 public function filter(
89 bool $casesensitive = false,
90 string $visibility = 'all',
91 bool $untaggedonly = false
93 if (!in_array($visibility, ['all', 'public', 'private'])) {
98 case self
::$FILTER_HASH:
99 return $this->filterSmallHash($request);
100 case self
::$FILTER_TAG | self
::$FILTER_TEXT: // == "vuotext"
101 $noRequest = empty($request) || (empty($request[0]) && empty($request[1]));
104 return $this->filterUntagged($visibility);
106 return $this->noFilter($visibility);
109 $filtered = $this->filterUntagged($visibility);
111 $filtered = $this->bookmarks
;
113 if (!empty($request[0])) {
114 $filtered = (new BookmarkFilter($filtered, $this->conf
, $this->pluginManager
))
115 ->filterTags($request[0], $casesensitive, $visibility)
118 if (!empty($request[1])) {
119 $filtered = (new BookmarkFilter($filtered, $this->conf
, $this->pluginManager
))
120 ->filterFulltext($request[1], $visibility)
124 case self
::$FILTER_TEXT:
125 return $this->filterFulltext($request, $visibility);
126 case self
::$FILTER_TAG:
128 return $this->filterUntagged($visibility);
130 return $this->filterTags($request, $casesensitive, $visibility);
133 return $this->noFilter($visibility);
138 * Unknown filter, but handle private only.
140 * @param string $visibility Optional: return only all/private/public bookmarks
142 * @return Bookmark[] filtered bookmarks.
144 private function noFilter(string $visibility = 'all')
147 foreach ($this->bookmarks
as $key => $value) {
149 !$this->pluginManager
->filterSearchEntry(
151 ['source' => 'no_filter', 'visibility' => $visibility]
157 if ($visibility === 'all') {
159 } elseif ($value->isPrivate() && $visibility === 'private') {
161 } elseif (!$value->isPrivate() && $visibility === 'public') {
170 * Returns the shaare corresponding to a smallHash.
172 * @param string $smallHash permalink hash.
174 * @return Bookmark[] $filtered array containing permalink data.
176 * @throws BookmarkNotFoundException if the smallhash doesn't match any link.
178 private function filterSmallHash(string $smallHash)
180 foreach ($this->bookmarks
as $key => $l) {
181 if ($smallHash == $l->getShortUrl()) {
182 // Yes, this is ugly and slow
187 throw new BookmarkNotFoundException();
191 * Returns the list of bookmarks corresponding to a full-text search
194 * - in the URLs, title and description;
195 * - are case-insensitive;
196 * - terms surrounded by quotes " are exact terms search.
197 * - terms starting with a dash - are excluded (except exact terms).
200 * print_r($mydb->filterFulltext('hollandais'));
202 * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8')
203 * - allows to perform searches on Unicode text
204 * - see https://github.com/shaarli/Shaarli/issues/75 for examples
206 * @param string $searchterms search query.
207 * @param string $visibility Optional: return only all/private/public bookmarks.
209 * @return Bookmark[] search results.
211 private function filterFulltext(string $searchterms, string $visibility = 'all')
213 if (empty($searchterms)) {
214 return $this->noFilter($visibility);
218 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER
, 'UTF-8');
219 $exactRegex = '/"([^"]+)"/';
220 // Retrieve exact search terms.
221 preg_match_all($exactRegex, $search, $exactSearch);
222 $exactSearch = array_values(array_filter($exactSearch[1]));
224 // Remove exact search terms to get AND terms search.
225 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
226 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
228 // Filter excluding terms and update andSearch.
231 foreach ($explodedSearchAnd as $needle) {
232 if ($needle[0] == '-' && strlen($needle) > 1) {
233 $excludeSearch[] = substr($needle, 1);
235 $andSearch[] = $needle;
239 // Iterate over every stored link.
240 foreach ($this->bookmarks
as $id => $bookmark) {
242 !$this->pluginManager
->filterSearchEntry(
245 'source' => 'fulltext',
246 'searchterms' => $searchterms,
247 'andSearch' => $andSearch,
248 'exactSearch' => $exactSearch,
249 'excludeSearch' => $excludeSearch,
250 'visibility' => $visibility
257 // ignore non private bookmarks when 'privatonly' is on.
258 if ($visibility !== 'all') {
259 if (!$bookmark->isPrivate() && $visibility === 'private') {
261 } elseif ($bookmark->isPrivate() && $visibility === 'public') {
267 $content = $this->buildFullTextSearchableLink($bookmark, $lengths);
271 $foundPositions = [];
273 // First, we look for exact term search
274 // Then iterate over keywords, if keyword is not found,
275 // no need to check for the others. We want all or nothing.
276 foreach ([$exactSearch, $andSearch] as $search) {
277 for ($i = 0; $i < count($search) && $found !== false; $i++
) {
278 $found = mb_strpos($content, $search[$i]);
279 if ($found === false) {
283 $foundPositions[] = ['start' => $found, 'end' => $found +
mb_strlen($search[$i])];
288 for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++
) {
289 $found = strpos($content, $excludeSearch[$i]) === false;
292 if ($found !== false) {
293 $bookmark->addAdditionalContentEntry(
295 $this->postProcessFoundPositions($lengths, $foundPositions)
298 $filtered[$id] = $bookmark;
306 * Returns the list of bookmarks associated with a given list of tags
308 * You can specify one or more tags, separated by space or a comma, e.g.
309 * print_r($mydb->filterTags('linux programming'));
311 * @param string|array $tags list of tags, separated by commas or blank spaces if passed as string.
312 * @param bool $casesensitive ignore case if false.
313 * @param string $visibility Optional: return only all/private/public bookmarks.
315 * @return Bookmark[] filtered bookmarks.
317 public function filterTags($tags, bool $casesensitive = false, string $visibility = 'all')
319 $tagsSeparator = $this->conf
->get('general.tags_separator', ' ');
320 // get single tags (we may get passed an array, even though the docs say different)
322 if (!is_array($tags)) {
323 // we got an input string, split tags
324 $inputTags = tags_str2array($inputTags, $tagsSeparator);
327 if (count($inputTags) === 0) {
329 return $this->noFilter($visibility);
332 // If we only have public visibility, we can't look for hidden tags
333 if ($visibility === self
::$PUBLIC) {
334 $inputTags = array_values(array_filter($inputTags, function ($tag) {
335 return ! startsWith($tag, '.');
338 if (empty($inputTags)) {
343 // build regex from all tags
344 $re = '/^' . implode(array_map([$this, 'tag2regex'], $inputTags)) . '.*$/';
345 if (!$casesensitive) {
346 // make regex case insensitive
350 // create resulting array
353 // iterate over each link
354 foreach ($this->bookmarks
as $key => $bookmark) {
356 !$this->pluginManager
->filterSearchEntry(
361 'casesensitive' => $casesensitive,
362 'visibility' => $visibility
369 // check level of visibility
370 // ignore non private bookmarks when 'privateonly' is on.
371 if ($visibility !== 'all') {
372 if (!$bookmark->isPrivate() && $visibility === 'private') {
374 } elseif ($bookmark->isPrivate() && $visibility === 'public') {
378 // build search string, start with tags of current link
379 $search = $bookmark->getTagsString($tagsSeparator);
380 if (strlen(trim($bookmark->getDescription())) && strpos($bookmark->getDescription(), '#') !== false) {
381 // description given and at least one possible tag found
383 // find all tags in the form of #tag in the description
385 '/(?<![' . self
::$HASHTAG_CHARS . '])#([' . self
::$HASHTAG_CHARS . ']+?)\b/sm',
386 $bookmark->getDescription(),
389 if (count($descTags[1])) {
390 // there were some tags in the description, add them to the search string
391 $search .= $tagsSeparator . tags_array2str($descTags[1], $tagsSeparator);
394 // match regular expression with search string
395 if (!preg_match($re, $search)) {
396 // this entry does _not_ match our regex
399 $filtered[$key] = $bookmark;
406 * Return only bookmarks without any tag.
408 * @param string $visibility return only all/private/public bookmarks.
410 * @return Bookmark[] filtered bookmarks.
412 public function filterUntagged(string $visibility)
415 foreach ($this->bookmarks
as $key => $bookmark) {
417 !$this->pluginManager
->filterSearchEntry(
419 ['source' => 'untagged', 'visibility' => $visibility]
425 if ($visibility !== 'all') {
426 if (!$bookmark->isPrivate() && $visibility === 'private') {
428 } elseif ($bookmark->isPrivate() && $visibility === 'public') {
433 if (empty($bookmark->getTags())) {
434 $filtered[$key] = $bookmark;
442 * Convert a list of tags (str) to an array. Also
443 * - handle case sensitivity.
444 * - accepts spaces commas as separator.
446 * @param string $tags string containing a list of tags.
447 * @param bool $casesensitive will convert everything to lowercase if false.
449 * @return string[] filtered tags string.
451 public static function tagsStrToArray(string $tags, bool $casesensitive): array
453 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
454 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER
, 'UTF-8');
455 $tagsOut = str_replace(',', ' ', $tagsOut);
457 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY
);
461 * generate a regex fragment out of a tag
463 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
465 * @return string generated regex fragment
467 protected function tag2regex(string $tag): string
469 $tagsSeparator = $this->conf
->get('general.tags_separator', ' ');
471 if (!$len || $tag === "-" || $tag === "*") {
472 // nothing to search, return empty regex
475 if ($tag[0] === "-") {
477 $i = 1; // use offset to start after '-' character
478 $regex = '(?!'; // create negative lookahead
480 $i = 0; // start at first character
481 $regex = '(?='; // use positive lookahead
483 // before tag may only be the separator or the beginning
484 $regex .= '.*(?:^|' . $tagsSeparator . ')';
485 // iterate over string, separating it into placeholder and content
486 for (; $i < $len; $i++
) {
487 if ($tag[$i] === '*') {
489 $regex .= '[^' . $tagsSeparator . ']*?';
491 // regular characters
492 $offset = strpos($tag, '*', $i);
493 if ($offset === false) {
494 // no placeholder found, set offset to end of string
497 // subtract one, as we want to get before the placeholder or end of string
499 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
500 $regex .= preg_quote(substr($tag, $i, $offset - $i +
1), '/');
505 // after the tag may only be the separator or the end
506 $regex .= '(?:$|' . $tagsSeparator . '))';
511 * This method finalize the content of the foundPositions array,
512 * by associated all search results to their associated bookmark field,
513 * making sure that there is no overlapping results, etc.
515 * @param array $fieldLengths Start and end positions of every bookmark fields in the aggregated bookmark content.
516 * @param array $foundPositions Positions where the search results were found in the aggregated content.
518 * @return array Updated $foundPositions, by bookmark field.
520 protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
522 // Sort results by starting position ASC.
523 usort($foundPositions, function (array $entryA, array $entryB): int {
524 return $entryA['start
'] > $entryB['start
'] ? 1 : -1;
529 foreach ($foundPositions as $foundPosition) {
530 // we do not allow overlapping highlights
531 if ($foundPosition['start
'] < $currentMax) {
535 $currentMax = $foundPosition['end
'];
536 foreach ($fieldLengths as $part => $length) {
537 if ($foundPosition['start
'] < $length['start
'] || $foundPosition['start
'] > $length['end
']) {
542 'start
' => $foundPosition['start
'] - $length['start
'],
543 'end
' => $foundPosition['end
'] - $length['start
'],
553 * Concatenate link fields to search across fields. Adds a '\' separator
for exact search terms
.
554 * Also populate
$length array with starting
and ending positions of every bookmark field
555 * inside concatenated content
.
557 * @param Bookmark
$link
558 * @param
array $lengths (by reference
)
560 * @return string Lowercase concatenated fields content
.
562 protected function buildFullTextSearchableLink(Bookmark
$link, array &$lengths): string
564 $tagString = $link->getTagsString($this->conf
->get('general.tags_separator', ' '));
565 $content = mb_convert_case($link->getTitle(), MB_CASE_LOWER
, 'UTF-8') . '\\';
566 $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER
, 'UTF-8') . '\\';
567 $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER
, 'UTF-8') . '\\';
568 $content .= mb_convert_case($tagString, MB_CASE_LOWER
, 'UTF-8') . '\\';
570 $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
571 $nextField = $lengths['title']['end'] +
1;
572 $lengths['description'] = ['start' => $nextField, 'end' => $nextField +
mb_strlen($link->getDescription())];
573 $nextField = $lengths['description']['end'] +
1;
574 $lengths['url'] = ['start' => $nextField, 'end' => $nextField +
mb_strlen($link->getUrl())];
575 $nextField = $lengths['url']['end'] +
1;
576 $lengths['tags'] = ['start' => $nextField, 'end' => $nextField +
mb_strlen($tagString)];