]> git.immae.eu Git - github/shaarli/Shaarli.git/commitdiff
Feature: highlight fulltext search results
authorArthurHoaro <arthur@hoa.ro>
Mon, 12 Oct 2020 09:35:55 +0000 (11:35 +0200)
committerArthurHoaro <arthur@hoa.ro>
Fri, 16 Oct 2020 18:31:12 +0000 (20:31 +0200)
How it works:

  1. when a fulltext search is made, Shaarli looks for the first
occurence position of every term matching the search. No change here,
but we store these positions in an array, in Bookmark's additionalContent.
  2. when formatting bookmarks (through BookmarkFormatter
implementation):
    1. first we insert specific tokens at every search result positions
    2. we format the content (escape HTML, apply markdown, etc.)
    3. as a last step, we replace our token with displayable span
elements

Cons: this tightens coupling between search filters and formatters
Pros: it was absolutely necessary not to perform the
search twice. this solution has close to no impact on performances.

Fixes #205

application/bookmark/Bookmark.php
application/bookmark/BookmarkFilter.php
application/formatter/BookmarkDefaultFormatter.php
application/formatter/BookmarkFormatter.php
application/formatter/BookmarkMarkdownFormatter.php
assets/default/scss/shaarli.scss
tpl/default/linklist.html

index fa45d2fc04282a40e061dccfc4eaedbcdead75a1..ea565d1f689d0068b7327211025581546189a4d2 100644 (file)
@@ -54,6 +54,9 @@ class Bookmark
     /** @var bool True if the bookmark can only be seen while logged in */
     protected $private;
 
+    /** @var mixed[] Available to store any additional content for a bookmark. Currently used for search highlight. */
+    protected $additionalContent = [];
+
     /**
      * Initialize a link from array data. Especially useful to create a Bookmark from former link storage format.
      *
@@ -95,6 +98,8 @@ class Bookmark
      *   - the URL with the permalink
      *   - the title with the URL
      *
+     * Also make sure that we do not save search highlights in the datastore.
+     *
      * @throws InvalidBookmarkException
      */
     public function validate(): void
@@ -112,6 +117,9 @@ class Bookmark
         if (empty($this->title)) {
             $this->title = $this->url;
         }
+        if (array_key_exists('search_highlight', $this->additionalContent)) {
+            unset($this->additionalContent['search_highlight']);
+        }
     }
 
     /**
@@ -435,6 +443,44 @@ class Bookmark
         return $this;
     }
 
+    /**
+     * Get entire additionalContent array.
+     *
+     * @return mixed[]
+     */
+    public function getAdditionalContent(): array
+    {
+        return $this->additionalContent;
+    }
+
+    /**
+     * Set a single entry in additionalContent, by key.
+     *
+     * @param string     $key
+     * @param mixed|null $value Any type of value can be set.
+     *
+     * @return $this
+     */
+    public function addAdditionalContentEntry(string $key, $value): self
+    {
+        $this->additionalContent[$key] = $value;
+
+        return $this;
+    }
+
+    /**
+     * Get a single entry in additionalContent, by key.
+     *
+     * @param string $key
+     * @param mixed|null $default
+     *
+     * @return mixed|null can be any type or even null.
+     */
+    public function getAdditionalContentEntry(string $key, $default = null)
+    {
+        return array_key_exists($key, $this->additionalContent) ? $this->additionalContent[$key] : $default;
+    }
+
     /**
      * Rename a tag in tags list.
      *
index 4232f11471148758f83fcd4f148c0b8cefcf9434..c79386ea7ba750db4d1d7d7974ea7564154e943a 100644 (file)
@@ -201,7 +201,7 @@ class BookmarkFilter
             return $this->noFilter($visibility);
         }
 
-        $filtered = array();
+        $filtered = [];
         $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
         $exactRegex = '/"([^"]+)"/';
         // Retrieve exact search terms.
@@ -213,8 +213,8 @@ class BookmarkFilter
         $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
 
         // Filter excluding terms and update andSearch.
-        $excludeSearch = array();
-        $andSearch = array();
+        $excludeSearch = [];
+        $andSearch = [];
         foreach ($explodedSearchAnd as $needle) {
             if ($needle[0] == '-' && strlen($needle) > 1) {
                 $excludeSearch[] = substr($needle, 1);
@@ -234,33 +234,38 @@ class BookmarkFilter
                 }
             }
 
-            // Concatenate link fields to search across fields.
-            // Adds a '\' separator for exact search terms.
-            $content  = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\';
-            $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\';
-            $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\';
-            $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\';
+            $lengths = [];
+            $content = $this->buildFullTextSearchableLink($link, $lengths);
 
             // Be optimistic
             $found = true;
+            $foundPositions = [];
 
             // First, we look for exact term search
-            for ($i = 0; $i < count($exactSearch) && $found; $i++) {
-                $found = strpos($content, $exactSearch[$i]) !== false;
-            }
-
-            // Iterate over keywords, if keyword is not found,
+            // Then iterate over keywords, if keyword is not found,
             // no need to check for the others. We want all or nothing.
-            for ($i = 0; $i < count($andSearch) && $found; $i++) {
-                $found = strpos($content, $andSearch[$i]) !== false;
+            foreach ([$exactSearch, $andSearch] as $search) {
+                for ($i = 0; $i < count($search) && $found !== false; $i++) {
+                    $found = mb_strpos($content, $search[$i]);
+                    if ($found === false) {
+                        break;
+                    }
+
+                    $foundPositions[] = ['start' => $found, 'end' => $found + mb_strlen($search[$i])];
+                }
             }
 
             // Exclude terms.
-            for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
+            for ($i = 0; $i < count($excludeSearch) && $found !== false; $i++) {
                 $found = strpos($content, $excludeSearch[$i]) === false;
             }
 
-            if ($found) {
+            if ($found !== false) {
+                $link->addAdditionalContentEntry(
+                    'search_highlight',
+                    $this->postProcessFoundPositions($lengths, $foundPositions)
+                );
+
                 $filtered[$id] = $link;
             }
         }
@@ -477,4 +482,74 @@ class BookmarkFilter
 
         return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
     }
+
+    /**
+     * This method finalize the content of the foundPositions array,
+     * by associated all search results to their associated bookmark field,
+     * making sure that there is no overlapping results, etc.
+     *
+     * @param array $fieldLengths   Start and end positions of every bookmark fields in the aggregated bookmark content.
+     * @param array $foundPositions Positions where the search results were found in the aggregated content.
+     *
+     * @return array Updated $foundPositions, by bookmark field.
+     */
+    protected function postProcessFoundPositions(array $fieldLengths, array $foundPositions): array
+    {
+        // Sort results by starting position ASC.
+        usort($foundPositions, function (array $entryA, array $entryB): int {
+            return $entryA['start'] > $entryB['start'] ? 1 : -1;
+        });
+
+        $out = [];
+        $currentMax = -1;
+        foreach ($foundPositions as $foundPosition) {
+            // we do not allow overlapping highlights
+            if ($foundPosition['start'] < $currentMax) {
+                continue;
+            }
+
+            $currentMax = $foundPosition['end'];
+            foreach ($fieldLengths as $part => $length) {
+                if ($foundPosition['start'] < $length['start'] || $foundPosition['start'] > $length['end']) {
+                    continue;
+                }
+
+                $out[$part][] = [
+                    'start' => $foundPosition['start'] - $length['start'],
+                    'end' => $foundPosition['end'] - $length['start'],
+                ];
+                break;
+            }
+        }
+
+        return $out;
+    }
+
+    /**
+     * Concatenate link fields to search across fields. Adds a '\' separator for exact search terms.
+     * Also populate $length array with starting and ending positions of every bookmark field
+     * inside concatenated content.
+     *
+     * @param Bookmark $link
+     * @param array    $lengths (by reference)
+     *
+     * @return string Lowercase concatenated fields content.
+     */
+    protected function buildFullTextSearchableLink(Bookmark $link, array &$lengths): string
+    {
+        $content  = mb_convert_case($link->getTitle(), MB_CASE_LOWER, 'UTF-8') .'\\';
+        $content .= mb_convert_case($link->getDescription(), MB_CASE_LOWER, 'UTF-8') .'\\';
+        $content .= mb_convert_case($link->getUrl(), MB_CASE_LOWER, 'UTF-8') .'\\';
+        $content .= mb_convert_case($link->getTagsString(), MB_CASE_LOWER, 'UTF-8') .'\\';
+
+        $lengths['title'] = ['start' => 0, 'end' => mb_strlen($link->getTitle())];
+        $nextField = $lengths['title']['end'] + 1;
+        $lengths['description'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getDescription())];
+        $nextField = $lengths['description']['end'] + 1;
+        $lengths['url'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getUrl())];
+        $nextField = $lengths['url']['end'] + 1;
+        $lengths['tags'] = ['start' => $nextField, 'end' => $nextField + mb_strlen($link->getTagsString())];
+
+        return $content;
+    }
 }
index 9d4a0fa0235c591be9a29d7ad3714d15b97c1e49..d58a5e39dde46ca5f5f0c71ac80e1f60fde8e55b 100644 (file)
@@ -12,10 +12,13 @@ namespace Shaarli\Formatter;
  */
 class BookmarkDefaultFormatter extends BookmarkFormatter
 {
+    const SEARCH_HIGHLIGHT_OPEN = '|@@HIGHLIGHT';
+    const SEARCH_HIGHLIGHT_CLOSE = 'HIGHLIGHT@@|';
+
     /**
      * @inheritdoc
      */
-    public function formatTitle($bookmark)
+    protected function formatTitle($bookmark)
     {
         return escape($bookmark->getTitle());
     }
@@ -23,10 +26,28 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
     /**
      * @inheritdoc
      */
-    public function formatDescription($bookmark)
+    protected function formatTitleHtml($bookmark)
+    {
+        $title = $this->tokenizeSearchHighlightField(
+            $bookmark->getTitle() ?? '',
+            $bookmark->getAdditionalContentEntry('search_highlight')['title'] ?? []
+        );
+
+        return $this->replaceTokens(escape($title));
+    }
+
+    /**
+     * @inheritdoc
+     */
+    protected function formatDescription($bookmark)
     {
         $indexUrl = ! empty($this->contextData['index_url']) ? $this->contextData['index_url'] : '';
-        return format_description(escape($bookmark->getDescription()), $indexUrl);
+        $description = $this->tokenizeSearchHighlightField(
+            $bookmark->getDescription() ?? '',
+            $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? []
+        );
+
+        return $this->replaceTokens(format_description(escape($description), $indexUrl));
     }
 
     /**
@@ -40,7 +61,27 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
     /**
      * @inheritdoc
      */
-    public function formatTagString($bookmark)
+    protected function formatTagListHtml($bookmark)
+    {
+        if (empty($bookmark->getAdditionalContentEntry('search_highlight')['tags'])) {
+            return $this->formatTagList($bookmark);
+        }
+
+        $tags = $this->tokenizeSearchHighlightField(
+            $bookmark->getTagsString(),
+            $bookmark->getAdditionalContentEntry('search_highlight')['tags']
+        );
+        $tags = $this->filterTagList(explode(' ', $tags));
+        $tags = escape($tags);
+        $tags = $this->replaceTokensArray($tags);
+
+        return $tags;
+    }
+
+    /**
+     * @inheritdoc
+     */
+    protected function formatTagString($bookmark)
     {
         return implode(' ', $this->formatTagList($bookmark));
     }
@@ -48,7 +89,7 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
     /**
      * @inheritdoc
      */
-    public function formatUrl($bookmark)
+    protected function formatUrl($bookmark)
     {
         if ($bookmark->isNote() && isset($this->contextData['index_url'])) {
             return rtrim($this->contextData['index_url'], '/') . '/' . escape(ltrim($bookmark->getUrl(), '/'));
@@ -77,6 +118,19 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
         return escape($bookmark->getUrl());
     }
 
+    /**
+     * @inheritdoc
+     */
+    protected function formatUrlHtml($bookmark)
+    {
+        $url = $this->tokenizeSearchHighlightField(
+            $bookmark->getUrl() ?? '',
+            $bookmark->getAdditionalContentEntry('search_highlight')['url'] ?? []
+        );
+
+        return $this->replaceTokens(escape($url));
+    }
+
     /**
      * @inheritdoc
      */
@@ -84,4 +138,72 @@ class BookmarkDefaultFormatter extends BookmarkFormatter
     {
         return escape($bookmark->getThumbnail());
     }
+
+    /**
+     * Insert search highlight token in provided field content based on a list of search result positions
+     *
+     * @param string     $fieldContent
+     * @param array|null $positions    List of of search results with 'start' and 'end' positions.
+     *
+     * @return string Updated $fieldContent.
+     */
+    protected function tokenizeSearchHighlightField(string $fieldContent, ?array $positions): string
+    {
+        if (empty($positions)) {
+            return $fieldContent;
+        }
+
+        $insertedTokens = 0;
+        $tokenLength = strlen(static::SEARCH_HIGHLIGHT_OPEN);
+        foreach ($positions as $position) {
+            $position = [
+                'start' => $position['start'] + ($insertedTokens * $tokenLength),
+                'end' => $position['end'] + ($insertedTokens * $tokenLength),
+            ];
+
+            $content = mb_substr($fieldContent, 0, $position['start']);
+            $content .= static::SEARCH_HIGHLIGHT_OPEN;
+            $content .= mb_substr($fieldContent, $position['start'], $position['end'] - $position['start']);
+            $content .= static::SEARCH_HIGHLIGHT_CLOSE;
+            $content .= mb_substr($fieldContent, $position['end']);
+
+            $fieldContent = $content;
+
+            $insertedTokens += 2;
+        }
+
+        return $fieldContent;
+    }
+
+    /**
+     * Replace search highlight tokens with HTML highlighted span.
+     *
+     * @param string $fieldContent
+     *
+     * @return string updated content.
+     */
+    protected function replaceTokens(string $fieldContent): string
+    {
+        return str_replace(
+            [static::SEARCH_HIGHLIGHT_OPEN, static::SEARCH_HIGHLIGHT_CLOSE],
+            ['<span class="search-highlight">', '</span>'],
+            $fieldContent
+        );
+    }
+
+    /**
+     * Apply replaceTokens to an array of content strings.
+     *
+     * @param string[] $fieldContents
+     *
+     * @return array
+     */
+    protected function replaceTokensArray(array $fieldContents): array
+    {
+        foreach ($fieldContents as &$entry) {
+            $entry = $this->replaceTokens($entry);
+        }
+
+        return $fieldContents;
+    }
 }
index 0042dafe402958905b892cdb2617e767dfdb8d11..e1b7f705e29b0e87ee8841c9b2e298a807c4cc2c 100644 (file)
@@ -2,7 +2,7 @@
 
 namespace Shaarli\Formatter;
 
-use DateTime;
+use DateTimeInterface;
 use Shaarli\Bookmark\Bookmark;
 use Shaarli\Config\ConfigManager;
 
@@ -11,6 +11,29 @@ use Shaarli\Config\ConfigManager;
  *
  * Abstract class processing all bookmark attributes through methods designed to be overridden.
  *
+ * List of available formatted fields:
+ *   - id                 ID
+ *   - shorturl           Unique identifier, used in permalinks
+ *   - url                URL, can be altered in some way, e.g. passing through an HTTP reverse proxy
+ *   - real_url           (legacy) same as `url`
+ *   - url_html           URL to be displayed in HTML content (it can contain HTML tags)
+ *   - title              Title
+ *   - title_html         Title to be displayed in HTML content (it can contain HTML tags)
+ *   - description        Description content. It most likely contains HTML tags
+ *   - thumbnail          Thumbnail: path to local cache file, false if there is none, null if hasn't been retrieved
+ *   - taglist            List of tags (array)
+ *   - taglist_urlencoded List of tags (array) URL encoded: it must be used to create a link to a URL containing a tag
+ *   - taglist_html       List of tags (array) to be displayed in HTML content (it can contain HTML tags)
+ *   - tags               Tags separated by a single whitespace
+ *   - tags_urlencoded    Tags separated by a single whitespace, URL encoded: must be used to create a link
+ *   - sticky             Is sticky (bool)
+ *   - private            Is private (bool)
+ *   - class              Additional CSS class
+ *   - created            Creation DateTime
+ *   - updated            Last edit DateTime
+ *   - timestamp          Creation timestamp
+ *   - updated_timestamp  Last edit timestamp
+ *
  * @package Shaarli\Formatter
  */
 abstract class BookmarkFormatter
@@ -55,13 +78,16 @@ abstract class BookmarkFormatter
         $out['shorturl'] = $this->formatShortUrl($bookmark);
         $out['url'] = $this->formatUrl($bookmark);
         $out['real_url'] = $this->formatRealUrl($bookmark);
+        $out['url_html'] = $this->formatUrlHtml($bookmark);
         $out['title'] = $this->formatTitle($bookmark);
+        $out['title_html'] = $this->formatTitleHtml($bookmark);
         $out['description'] = $this->formatDescription($bookmark);
         $out['thumbnail'] = $this->formatThumbnail($bookmark);
-        $out['urlencoded_taglist'] = $this->formatUrlEncodedTagList($bookmark);
         $out['taglist'] = $this->formatTagList($bookmark);
-        $out['urlencoded_tags'] = $this->formatUrlEncodedTagString($bookmark);
+        $out['taglist_urlencoded'] = $this->formatTagListUrlEncoded($bookmark);
+        $out['taglist_html'] = $this->formatTagListHtml($bookmark);
         $out['tags'] = $this->formatTagString($bookmark);
+        $out['tags_urlencoded'] = $this->formatTagStringUrlEncoded($bookmark);
         $out['sticky'] = $bookmark->isSticky();
         $out['private'] = $bookmark->isPrivate();
         $out['class'] = $this->formatClass($bookmark);
@@ -69,6 +95,7 @@ abstract class BookmarkFormatter
         $out['updated'] = $this->formatUpdated($bookmark);
         $out['timestamp'] = $this->formatCreatedTimestamp($bookmark);
         $out['updated_timestamp'] = $this->formatUpdatedTimestamp($bookmark);
+
         return $out;
     }
 
@@ -135,6 +162,18 @@ abstract class BookmarkFormatter
         return $this->formatUrl($bookmark);
     }
 
+    /**
+     * Format Url Html: to be displayed in HTML content, it can contains HTML tags.
+     *
+     * @param Bookmark $bookmark instance
+     *
+     * @return string formatted Url HTML
+     */
+    protected function formatUrlHtml($bookmark)
+    {
+        return $this->formatUrl($bookmark);
+    }
+
     /**
      * Format Title
      *
@@ -147,6 +186,18 @@ abstract class BookmarkFormatter
         return $bookmark->getTitle();
     }
 
+    /**
+     * Format Title HTML: to be displayed in HTML content, it can contains HTML tags.
+     *
+     * @param Bookmark $bookmark instance
+     *
+     * @return string formatted Title
+     */
+    protected function formatTitleHtml($bookmark)
+    {
+        return $bookmark->getTitle();
+    }
+
     /**
      * Format Description
      *
@@ -190,11 +241,23 @@ abstract class BookmarkFormatter
      *
      * @return array formatted Tags
      */
-    protected function formatUrlEncodedTagList($bookmark)
+    protected function formatTagListUrlEncoded($bookmark)
     {
         return array_map('urlencode', $this->filterTagList($bookmark->getTags()));
     }
 
+    /**
+     * Format Tags HTML: to be displayed in HTML content, it can contains HTML tags.
+     *
+     * @param Bookmark $bookmark instance
+     *
+     * @return array formatted Tags
+     */
+    protected function formatTagListHtml($bookmark)
+    {
+        return $this->formatTagList($bookmark);
+    }
+
     /**
      * Format TagString
      *
@@ -214,9 +277,9 @@ abstract class BookmarkFormatter
      *
      * @return string formatted TagString
      */
-    protected function formatUrlEncodedTagString($bookmark)
+    protected function formatTagStringUrlEncoded($bookmark)
     {
-        return implode(' ', $this->formatUrlEncodedTagList($bookmark));
+        return implode(' ', $this->formatTagListUrlEncoded($bookmark));
     }
 
     /**
@@ -237,7 +300,7 @@ abstract class BookmarkFormatter
      *
      * @param Bookmark $bookmark instance
      *
-     * @return DateTime instance
+     * @return DateTimeInterface instance
      */
     protected function formatCreated(Bookmark $bookmark)
     {
@@ -249,7 +312,7 @@ abstract class BookmarkFormatter
      *
      * @param Bookmark $bookmark instance
      *
-     * @return DateTime instance
+     * @return DateTimeInterface instance
      */
     protected function formatUpdated(Bookmark $bookmark)
     {
index 5d244d4c92de249721f0c1c6e18ab79ba2222752..f7714be9ed34df27a70f971aa28aeef8e9c33b48 100644 (file)
@@ -56,7 +56,10 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
             return parent::formatDescription($bookmark);
         }
 
-        $processedDescription = $bookmark->getDescription();
+        $processedDescription = $this->tokenizeSearchHighlightField(
+            $bookmark->getDescription() ?? '',
+            $bookmark->getAdditionalContentEntry('search_highlight')['description'] ?? []
+        );
         $processedDescription = $this->filterProtocols($processedDescription);
         $processedDescription = $this->formatHashTags($processedDescription);
         $processedDescription = $this->reverseEscapedHtml($processedDescription);
@@ -65,6 +68,7 @@ class BookmarkMarkdownFormatter extends BookmarkDefaultFormatter
             ->setBreaksEnabled(true)
             ->text($processedDescription);
         $processedDescription = $this->sanitizeHtml($processedDescription);
+        $processedDescription = $this->replaceTokens($processedDescription);
 
         if (!empty($processedDescription)) {
             $processedDescription = '<div class="markdown">'. $processedDescription . '</div>';
index a528adb0dbe7931ea79c0cf0875b85951a3038c8..2f49bbd21d50690d0e070553b6b3b9e131d5e74c 100644 (file)
@@ -671,6 +671,10 @@ body,
       content: '';
     }
   }
+
+  .search-highlight {
+    background-color: yellow;
+  }
 }
 
 .linklist-item-buttons {
index e1fb54dd463d2626c99f83f5e0f2f061c7b6fc51..beab0eac81ba8d0912d09ec329dba5246a87b8b1 100644 (file)
                   <i class="fa fa-sticky-note" aria-hidden="true"></i>
                 {/if}
 
-                <span class="linklist-link">{$value.title}</span>
+                <span class="linklist-link">{$value.title_html}</span>
               </a>
             </h2>
           </div>
                 {$tag_counter=count($value.taglist)}
                 {loop="value.taglist"}
                   <span class="label label-tag" title="{$strAddTag}">
-                    <a href="{$base_path}/add-tag/{$value1.urlencoded_taglist.$key2}">{$value}</a>
+                    <a href="{$base_path}/add-tag/{$value1.taglist_urlencoded.$key2}">{$value1.taglist_html.$key2}</a>
                   </span>
                   {if="$tag_counter - 1 != $counter"}&middot;{/if}
                 {/loop}
                 {ignore}do not add space or line break between these div - Firefox issue{/ignore}
                 class="linklist-item-infos-url pure-u-lg-5-12 pure-u-1">
                 <a href="{$value.real_url}" aria-label="{$value.title}" title="{$value.title}">
-                  <i class="fa fa-link" aria-hidden="true"></i> {$value.url}
+                  <i class="fa fa-link" aria-hidden="true"></i> {$value.url_html}
                 </a>
                 <div class="linklist-item-buttons pure-u-0 pure-u-lg-visible">
                   <a href="#" aria-label="{$strFold}" title="{$strFold}" class="fold-button"><i class="fa fa-chevron-up" aria-hidden="true"></i></a>