From f24896b237e40718fb6eaa2869592eb0855a47fd Mon Sep 17 00:00:00 2001
From: VirtualTam <virtualtam@flibidi.net>
Date: Mon, 3 Dec 2018 01:10:39 +0100
Subject: namespacing: \Shaarli\Bookmark\LinkDB

Signed-off-by: VirtualTam <virtualtam@flibidi.net>
---
 application/bookmark/LinkDB.php | 601 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 601 insertions(+)
 create mode 100644 application/bookmark/LinkDB.php

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
new file mode 100644
index 00000000..3b77422a
--- /dev/null
+++ b/application/bookmark/LinkDB.php
@@ -0,0 +1,601 @@
+<?php
+
+namespace Shaarli\Bookmark;
+
+use ArrayAccess;
+use Countable;
+use DateTime;
+use Iterator;
+use LinkFilter;
+use LinkNotFoundException;
+use Shaarli\Exceptions\IOException;
+use Shaarli\FileUtils;
+
+/**
+ * Data storage for links.
+ *
+ * This object behaves like an associative array.
+ *
+ * Example:
+ *    $myLinks = new LinkDB();
+ *    echo $myLinks[350]['title'];
+ *    foreach ($myLinks as $link)
+ *       echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
+ *
+ * Available keys:
+ *  - id:       primary key, incremental integer identifier (persistent)
+ *  - description: description of the entry
+ *  - created:  creation date of this entry, DateTime object.
+ *  - updated:  last modification date of this entry, DateTime object.
+ *  - private:  Is this link private? 0=no, other value=yes
+ *  - tags:     tags attached to this entry (separated by spaces)
+ *  - title     Title of the link
+ *  - url       URL of the link. Used for displayable links (no redirector, relative, etc.).
+ *              Can be absolute or relative.
+ *              Relative URLs are permalinks (e.g.'?m-ukcw')
+ *  - real_url  Absolute processed URL.
+ *  - shorturl  Permalink smallhash
+ *
+ * Implements 3 interfaces:
+ *  - ArrayAccess: behaves like an associative array;
+ *  - Countable:   there is a count() method;
+ *  - Iterator:    usable in foreach () loops.
+ *
+ * ID mechanism:
+ *   ArrayAccess is implemented in a way that will allow to access a link
+ *   with the unique identifier ID directly with $link[ID].
+ *   Note that it's not the real key of the link array attribute.
+ *   This mechanism is in place to have persistent link IDs,
+ *   even though the internal array is reordered by date.
+ *   Example:
+ *     - DB: link #1 (2010-01-01) link #2 (2016-01-01)
+ *     - Order: #2 #1
+ *     - Import links containing: link #3 (2013-01-01)
+ *     - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
+ *     - Real order: #2 #3 #1
+ */
+class LinkDB implements Iterator, Countable, ArrayAccess
+{
+    // Links are stored as a PHP serialized string
+    private $datastore;
+
+    // Link date storage format
+    const LINK_DATE_FORMAT = 'Ymd_His';
+
+    // List of links (associative array)
+    //  - key:   link date (e.g. "20110823_124546"),
+    //  - value: associative array (keys: title, description...)
+    private $links;
+
+    // List of all recorded URLs (key=url, value=link offset)
+    // for fast reserve search (url-->link offset)
+    private $urls;
+
+    /**
+     * @var array List of all links IDS mapped with their array offset.
+     *            Map: id->offset.
+     */
+    protected $ids;
+
+    // List of offset keys (for the Iterator interface implementation)
+    private $keys;
+
+    // Position in the $this->keys array (for the Iterator interface)
+    private $position;
+
+    // Is the user logged in? (used to filter private links)
+    private $loggedIn;
+
+    // Hide public links
+    private $hidePublicLinks;
+
+    // link redirector set in user settings.
+    private $redirector;
+
+    /**
+     * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
+     *
+     * Example:
+     *   anonym.to needs clean URL while dereferer.org needs urlencoded URL.
+     *
+     * @var boolean $redirectorEncode parameter: true or false
+     */
+    private $redirectorEncode;
+
+    /**
+     * Creates a new LinkDB
+     *
+     * Checks if the datastore exists; else, attempts to create a dummy one.
+     *
+     * @param string $datastore datastore file path.
+     * @param boolean $isLoggedIn is the user logged in?
+     * @param boolean $hidePublicLinks if true all links are private.
+     * @param string $redirector link redirector set in user settings.
+     * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
+     */
+    public function __construct(
+        $datastore,
+        $isLoggedIn,
+        $hidePublicLinks,
+        $redirector = '',
+        $redirectorEncode = true
+    ) {
+    
+        $this->datastore = $datastore;
+        $this->loggedIn = $isLoggedIn;
+        $this->hidePublicLinks = $hidePublicLinks;
+        $this->redirector = $redirector;
+        $this->redirectorEncode = $redirectorEncode === true;
+        $this->check();
+        $this->read();
+    }
+
+    /**
+     * Countable - Counts elements of an object
+     */
+    public function count()
+    {
+        return count($this->links);
+    }
+
+    /**
+     * ArrayAccess - Assigns a value to the specified offset
+     */
+    public function offsetSet($offset, $value)
+    {
+        // TODO: use exceptions instead of "die"
+        if (!$this->loggedIn) {
+            die(t('You are not authorized to add a link.'));
+        }
+        if (!isset($value['id']) || empty($value['url'])) {
+            die(t('Internal Error: A link should always have an id and URL.'));
+        }
+        if (($offset !== null && !is_int($offset)) || !is_int($value['id'])) {
+            die(t('You must specify an integer as a key.'));
+        }
+        if ($offset !== null && $offset !== $value['id']) {
+            die(t('Array offset and link ID must be equal.'));
+        }
+
+        // If the link exists, we reuse the real offset, otherwise new entry
+        $existing = $this->getLinkOffset($offset);
+        if ($existing !== null) {
+            $offset = $existing;
+        } else {
+            $offset = count($this->links);
+        }
+        $this->links[$offset] = $value;
+        $this->urls[$value['url']] = $offset;
+        $this->ids[$value['id']] = $offset;
+    }
+
+    /**
+     * ArrayAccess - Whether or not an offset exists
+     */
+    public function offsetExists($offset)
+    {
+        return array_key_exists($this->getLinkOffset($offset), $this->links);
+    }
+
+    /**
+     * ArrayAccess - Unsets an offset
+     */
+    public function offsetUnset($offset)
+    {
+        if (!$this->loggedIn) {
+            // TODO: raise an exception
+            die('You are not authorized to delete a link.');
+        }
+        $realOffset = $this->getLinkOffset($offset);
+        $url = $this->links[$realOffset]['url'];
+        unset($this->urls[$url]);
+        unset($this->ids[$realOffset]);
+        unset($this->links[$realOffset]);
+    }
+
+    /**
+     * ArrayAccess - Returns the value at specified offset
+     */
+    public function offsetGet($offset)
+    {
+        $realOffset = $this->getLinkOffset($offset);
+        return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
+    }
+
+    /**
+     * Iterator - Returns the current element
+     */
+    public function current()
+    {
+        return $this[$this->keys[$this->position]];
+    }
+
+    /**
+     * Iterator - Returns the key of the current element
+     */
+    public function key()
+    {
+        return $this->keys[$this->position];
+    }
+
+    /**
+     * Iterator - Moves forward to next element
+     */
+    public function next()
+    {
+        ++$this->position;
+    }
+
+    /**
+     * Iterator - Rewinds the Iterator to the first element
+     *
+     * Entries are sorted by date (latest first)
+     */
+    public function rewind()
+    {
+        $this->keys = array_keys($this->ids);
+        $this->position = 0;
+    }
+
+    /**
+     * Iterator - Checks if current position is valid
+     */
+    public function valid()
+    {
+        return isset($this->keys[$this->position]);
+    }
+
+    /**
+     * Checks if the DB directory and file exist
+     *
+     * If no DB file is found, creates a dummy DB.
+     */
+    private function check()
+    {
+        if (file_exists($this->datastore)) {
+            return;
+        }
+
+        // Create a dummy database for example
+        $this->links = array();
+        $link = array(
+            'id' => 1,
+            'title' => t('The personal, minimalist, super-fast, database free, bookmarking service'),
+            'url' => 'https://shaarli.readthedocs.io',
+            'description' => t(
+                'Welcome to Shaarli! This is your first public bookmark. '
+                . 'To edit or delete me, you must first login.
+
+To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page.
+
+You use the community supported version of the original Shaarli project, by Sebastien Sauvage.'
+            ),
+            'private' => 0,
+            'created' => new DateTime(),
+            'tags' => 'opensource software'
+        );
+        $link['shorturl'] = link_small_hash($link['created'], $link['id']);
+        $this->links[1] = $link;
+
+        $link = array(
+            'id' => 0,
+            'title' => t('My secret stuff... - Pastebin.com'),
+            'url' => 'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
+            'description' => t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'),
+            'private' => 1,
+            'created' => new DateTime('1 minute ago'),
+            'tags' => 'secretstuff',
+        );
+        $link['shorturl'] = link_small_hash($link['created'], $link['id']);
+        $this->links[0] = $link;
+
+        // Write database to disk
+        $this->write();
+    }
+
+    /**
+     * Reads database from disk to memory
+     */
+    private function read()
+    {
+        // Public links are hidden and user not logged in => nothing to show
+        if ($this->hidePublicLinks && !$this->loggedIn) {
+            $this->links = array();
+            return;
+        }
+
+        $this->urls = [];
+        $this->ids = [];
+        $this->links = FileUtils::readFlatDB($this->datastore, []);
+
+        $toremove = array();
+        foreach ($this->links as $key => &$link) {
+            if (!$this->loggedIn && $link['private'] != 0) {
+                // Transition for not upgraded databases.
+                unset($this->links[$key]);
+                continue;
+            }
+
+            // Sanitize data fields.
+            sanitizeLink($link);
+
+            // Remove private tags if the user is not logged in.
+            if (!$this->loggedIn) {
+                $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
+            }
+
+            // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
+            if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
+                $link['real_url'] = $this->redirector;
+                if ($this->redirectorEncode) {
+                    $link['real_url'] .= urlencode(unescape($link['url']));
+                } else {
+                    $link['real_url'] .= $link['url'];
+                }
+            } else {
+                $link['real_url'] = $link['url'];
+            }
+
+            // To be able to load links before running the update, and prepare the update
+            if (!isset($link['created'])) {
+                $link['id'] = $link['linkdate'];
+                $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
+                if (!empty($link['updated'])) {
+                    $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
+                }
+                $link['shorturl'] = smallHash($link['linkdate']);
+            }
+
+            $this->urls[$link['url']] = $key;
+            $this->ids[$link['id']] = $key;
+        }
+    }
+
+    /**
+     * Saves the database from memory to disk
+     *
+     * @throws IOException the datastore is not writable
+     */
+    private function write()
+    {
+        $this->reorder();
+        FileUtils::writeFlatDB($this->datastore, $this->links);
+    }
+
+    /**
+     * Saves the database from memory to disk
+     *
+     * @param string $pageCacheDir page cache directory
+     */
+    public function save($pageCacheDir)
+    {
+        if (!$this->loggedIn) {
+            // TODO: raise an Exception instead
+            die('You are not authorized to change the database.');
+        }
+
+        $this->write();
+
+        invalidateCaches($pageCacheDir);
+    }
+
+    /**
+     * Returns the link for a given URL, or False if it does not exist.
+     *
+     * @param string $url URL to search for
+     *
+     * @return mixed the existing link if it exists, else 'false'
+     */
+    public function getLinkFromUrl($url)
+    {
+        if (isset($this->urls[$url])) {
+            return $this->links[$this->urls[$url]];
+        }
+        return false;
+    }
+
+    /**
+     * Returns the shaare corresponding to a smallHash.
+     *
+     * @param string $request QUERY_STRING server parameter.
+     *
+     * @return array $filtered array containing permalink data.
+     *
+     * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
+     */
+    public function filterHash($request)
+    {
+        $request = substr($request, 0, 6);
+        $linkFilter = new LinkFilter($this->links);
+        return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
+    }
+
+    /**
+     * Returns the list of articles for a given day.
+     *
+     * @param string $request day to filter. Format: YYYYMMDD.
+     *
+     * @return array list of shaare found.
+     */
+    public function filterDay($request)
+    {
+        $linkFilter = new LinkFilter($this->links);
+        return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
+    }
+
+    /**
+     * Filter links according to search parameters.
+     *
+     * @param array $filterRequest Search request content. Supported keys:
+     *                                - searchtags: list of tags
+     *                                - searchterm: term search
+     * @param bool $casesensitive Optional: Perform case sensitive filter
+     * @param string $visibility return only all/private/public links
+     * @param string $untaggedonly return only untagged links
+     *
+     * @return array filtered links, all links if no suitable filter was provided.
+     */
+    public function filterSearch(
+        $filterRequest = array(),
+        $casesensitive = false,
+        $visibility = 'all',
+        $untaggedonly = false
+    ) {
+    
+        // Filter link database according to parameters.
+        $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
+        $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
+
+        // Search tags + fullsearch - blank string parameter will return all links.
+        $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext"
+        $request = [$searchtags, $searchterm];
+
+        $linkFilter = new LinkFilter($this);
+        return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly);
+    }
+
+    /**
+     * Returns the list tags appearing in the links with the given tags
+     *
+     * @param array $filteringTags tags selecting the links to consider
+     * @param string $visibility process only all/private/public links
+     *
+     * @return array tag => linksCount
+     */
+    public function linksCountPerTag($filteringTags = [], $visibility = 'all')
+    {
+        $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility);
+        $tags = [];
+        $caseMapping = [];
+        foreach ($links as $link) {
+            foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
+                if (empty($tag)) {
+                    continue;
+                }
+                // The first case found will be displayed.
+                if (!isset($caseMapping[strtolower($tag)])) {
+                    $caseMapping[strtolower($tag)] = $tag;
+                    $tags[$caseMapping[strtolower($tag)]] = 0;
+                }
+                $tags[$caseMapping[strtolower($tag)]]++;
+            }
+        }
+
+        /*
+         * Formerly used arsort(), which doesn't define the sort behaviour for equal values.
+         * Also, this function doesn't produce the same result between PHP 5.6 and 7.
+         *
+         * So we now use array_multisort() to sort tags by DESC occurrences,
+         * then ASC alphabetically for equal values.
+         *
+         * @see https://github.com/shaarli/Shaarli/issues/1142
+         */
+        $keys = array_keys($tags);
+        $tmpTags = array_combine($keys, $keys);
+        array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags);
+        return $tags;
+    }
+
+    /**
+     * Rename or delete a tag across all links.
+     *
+     * @param string $from Tag to rename
+     * @param string $to New tag. If none is provided, the from tag will be deleted
+     *
+     * @return array|bool List of altered links or false on error
+     */
+    public function renameTag($from, $to)
+    {
+        if (empty($from)) {
+            return false;
+        }
+        $delete = empty($to);
+        // True for case-sensitive tag search.
+        $linksToAlter = $this->filterSearch(['searchtags' => $from], true);
+        foreach ($linksToAlter as $key => &$value) {
+            $tags = preg_split('/\s+/', trim($value['tags']));
+            if (($pos = array_search($from, $tags)) !== false) {
+                if ($delete) {
+                    unset($tags[$pos]); // Remove tag.
+                } else {
+                    $tags[$pos] = trim($to);
+                }
+                $value['tags'] = trim(implode(' ', array_unique($tags)));
+                $this[$value['id']] = $value;
+            }
+        }
+
+        return $linksToAlter;
+    }
+
+    /**
+     * Returns the list of days containing articles (oldest first)
+     * Output: An array containing days (in format YYYYMMDD).
+     */
+    public function days()
+    {
+        $linkDays = array();
+        foreach ($this->links as $link) {
+            $linkDays[$link['created']->format('Ymd')] = 0;
+        }
+        $linkDays = array_keys($linkDays);
+        sort($linkDays);
+
+        return $linkDays;
+    }
+
+    /**
+     * Reorder links by creation date (newest first).
+     *
+     * Also update the urls and ids mapping arrays.
+     *
+     * @param string $order ASC|DESC
+     */
+    public function reorder($order = 'DESC')
+    {
+        $order = $order === 'ASC' ? -1 : 1;
+        // Reorder array by dates.
+        usort($this->links, function ($a, $b) use ($order) {
+            if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) {
+                return $a['sticky'] ? -1 : 1;
+            }
+            return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
+        });
+
+        $this->urls = [];
+        $this->ids = [];
+        foreach ($this->links as $key => $link) {
+            $this->urls[$link['url']] = $key;
+            $this->ids[$link['id']] = $key;
+        }
+    }
+
+    /**
+     * Return the next key for link creation.
+     * E.g. If the last ID is 597, the next will be 598.
+     *
+     * @return int next ID.
+     */
+    public function getNextId()
+    {
+        if (!empty($this->ids)) {
+            return max(array_keys($this->ids)) + 1;
+        }
+        return 0;
+    }
+
+    /**
+     * Returns a link offset in links array from its unique ID.
+     *
+     * @param int $id Persistent ID of a link.
+     *
+     * @return int Real offset in local array, or null if doesn't exist.
+     */
+    protected function getLinkOffset($id)
+    {
+        if (isset($this->ids[$id])) {
+            return $this->ids[$id];
+        }
+        return null;
+    }
+}
-- 
cgit v1.2.3


From 6696729b88e67504fdd333cbaab43a63c3617d86 Mon Sep 17 00:00:00 2001
From: VirtualTam <virtualtam@flibidi.net>
Date: Mon, 3 Dec 2018 01:22:45 +0100
Subject: namespacing: \Shaarli\Bookmark\LinkFilter

Signed-off-by: VirtualTam <virtualtam@flibidi.net>
---
 application/bookmark/LinkDB.php                    |  26 +-
 application/bookmark/LinkFilter.php                | 449 +++++++++++++++++++++
 .../bookmark/exception/LinkNotFoundException.php   |  15 +
 3 files changed, 477 insertions(+), 13 deletions(-)
 create mode 100644 application/bookmark/LinkFilter.php
 create mode 100644 application/bookmark/exception/LinkNotFoundException.php

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
index 3b77422a..6041c088 100644
--- a/application/bookmark/LinkDB.php
+++ b/application/bookmark/LinkDB.php
@@ -6,8 +6,8 @@ use ArrayAccess;
 use Countable;
 use DateTime;
 use Iterator;
-use LinkFilter;
-use LinkNotFoundException;
+use Shaarli\Bookmark\LinkFilter;
+use Shaarli\Bookmark\Exception\LinkNotFoundException;
 use Shaarli\Exceptions\IOException;
 use Shaarli\FileUtils;
 
@@ -107,10 +107,10 @@ class LinkDB implements Iterator, Countable, ArrayAccess
      *
      * Checks if the datastore exists; else, attempts to create a dummy one.
      *
-     * @param string $datastore datastore file path.
-     * @param boolean $isLoggedIn is the user logged in?
-     * @param boolean $hidePublicLinks if true all links are private.
-     * @param string $redirector link redirector set in user settings.
+     * @param string  $datastore        datastore file path.
+     * @param boolean $isLoggedIn       is the user logged in?
+     * @param boolean $hidePublicLinks  if true all links are private.
+     * @param string  $redirector       link redirector set in user settings.
      * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
      */
     public function __construct(
@@ -426,12 +426,12 @@ You use the community supported version of the original Shaarli project, by Seba
     /**
      * Filter links according to search parameters.
      *
-     * @param array $filterRequest Search request content. Supported keys:
+     * @param array  $filterRequest  Search request content. Supported keys:
      *                                - searchtags: list of tags
      *                                - searchterm: term search
-     * @param bool $casesensitive Optional: Perform case sensitive filter
-     * @param string $visibility return only all/private/public links
-     * @param string $untaggedonly return only untagged links
+     * @param bool   $casesensitive  Optional: Perform case sensitive filter
+     * @param string $visibility     return only all/private/public links
+     * @param bool   $untaggedonly   return only untagged links
      *
      * @return array filtered links, all links if no suitable filter was provided.
      */
@@ -457,8 +457,8 @@ You use the community supported version of the original Shaarli project, by Seba
     /**
      * Returns the list tags appearing in the links with the given tags
      *
-     * @param array $filteringTags tags selecting the links to consider
-     * @param string $visibility process only all/private/public links
+     * @param array  $filteringTags tags selecting the links to consider
+     * @param string $visibility    process only all/private/public links
      *
      * @return array tag => linksCount
      */
@@ -500,7 +500,7 @@ You use the community supported version of the original Shaarli project, by Seba
      * Rename or delete a tag across all links.
      *
      * @param string $from Tag to rename
-     * @param string $to New tag. If none is provided, the from tag will be deleted
+     * @param string $to   New tag. If none is provided, the from tag will be deleted
      *
      * @return array|bool List of altered links or false on error
      */
diff --git a/application/bookmark/LinkFilter.php b/application/bookmark/LinkFilter.php
new file mode 100644
index 00000000..9b966307
--- /dev/null
+++ b/application/bookmark/LinkFilter.php
@@ -0,0 +1,449 @@
+<?php
+
+namespace Shaarli\Bookmark;
+
+use Exception;
+use Shaarli\Bookmark\Exception\LinkNotFoundException;
+
+/**
+ * Class LinkFilter.
+ *
+ * Perform search and filter operation on link data list.
+ */
+class LinkFilter
+{
+    /**
+     * @var string permalinks.
+     */
+    public static $FILTER_HASH = 'permalink';
+
+    /**
+     * @var string text search.
+     */
+    public static $FILTER_TEXT = 'fulltext';
+
+    /**
+     * @var string tag filter.
+     */
+    public static $FILTER_TAG = 'tags';
+
+    /**
+     * @var string filter by day.
+     */
+    public static $FILTER_DAY = 'FILTER_DAY';
+
+    /**
+     * @var string Allowed characters for hashtags (regex syntax).
+     */
+    public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
+
+    /**
+     * @var LinkDB all available links.
+     */
+    private $links;
+
+    /**
+     * @param LinkDB $links initialization.
+     */
+    public function __construct($links)
+    {
+        $this->links = $links;
+    }
+
+    /**
+     * Filter links according to parameters.
+     *
+     * @param string $type          Type of filter (eg. tags, permalink, etc.).
+     * @param mixed  $request       Filter content.
+     * @param bool   $casesensitive Optional: Perform case sensitive filter if true.
+     * @param string $visibility    Optional: return only all/private/public links
+     * @param string $untaggedonly  Optional: return only untagged links. Applies only if $type includes FILTER_TAG
+     *
+     * @return array filtered link list.
+     */
+    public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false)
+    {
+        if (!in_array($visibility, ['all', 'public', 'private'])) {
+            $visibility = 'all';
+        }
+
+        switch ($type) {
+            case self::$FILTER_HASH:
+                return $this->filterSmallHash($request);
+            case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext"
+                $noRequest = empty($request) || (empty($request[0]) && empty($request[1]));
+                if ($noRequest) {
+                    if ($untaggedonly) {
+                        return $this->filterUntagged($visibility);
+                    }
+                    return $this->noFilter($visibility);
+                }
+                if ($untaggedonly) {
+                    $filtered = $this->filterUntagged($visibility);
+                } else {
+                    $filtered = $this->links;
+                }
+                if (!empty($request[0])) {
+                    $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility);
+                }
+                if (!empty($request[1])) {
+                    $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility);
+                }
+                return $filtered;
+            case self::$FILTER_TEXT:
+                return $this->filterFulltext($request, $visibility);
+            case self::$FILTER_TAG:
+                if ($untaggedonly) {
+                    return $this->filterUntagged($visibility);
+                } else {
+                    return $this->filterTags($request, $casesensitive, $visibility);
+                }
+            case self::$FILTER_DAY:
+                return $this->filterDay($request);
+            default:
+                return $this->noFilter($visibility);
+        }
+    }
+
+    /**
+     * Unknown filter, but handle private only.
+     *
+     * @param string $visibility Optional: return only all/private/public links
+     *
+     * @return array filtered links.
+     */
+    private function noFilter($visibility = 'all')
+    {
+        if ($visibility === 'all') {
+            return $this->links;
+        }
+
+        $out = array();
+        foreach ($this->links as $key => $value) {
+            if ($value['private'] && $visibility === 'private') {
+                $out[$key] = $value;
+            } elseif (!$value['private'] && $visibility === 'public') {
+                $out[$key] = $value;
+            }
+        }
+
+        return $out;
+    }
+
+    /**
+     * Returns the shaare corresponding to a smallHash.
+     *
+     * @param string $smallHash permalink hash.
+     *
+     * @return array $filtered array containing permalink data.
+     *
+     * @throws \Shaarli\Bookmark\Exception\LinkNotFoundException if the smallhash doesn't match any link.
+     */
+    private function filterSmallHash($smallHash)
+    {
+        $filtered = array();
+        foreach ($this->links as $key => $l) {
+            if ($smallHash == $l['shorturl']) {
+                // Yes, this is ugly and slow
+                $filtered[$key] = $l;
+                return $filtered;
+            }
+        }
+
+        if (empty($filtered)) {
+            throw new LinkNotFoundException();
+        }
+
+        return $filtered;
+    }
+
+    /**
+     * Returns the list of links corresponding to a full-text search
+     *
+     * Searches:
+     *  - in the URLs, title and description;
+     *  - are case-insensitive;
+     *  - terms surrounded by quotes " are exact terms search.
+     *  - terms starting with a dash - are excluded (except exact terms).
+     *
+     * Example:
+     *    print_r($mydb->filterFulltext('hollandais'));
+     *
+     * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8')
+     *  - allows to perform searches on Unicode text
+     *  - see https://github.com/shaarli/Shaarli/issues/75 for examples
+     *
+     * @param string $searchterms search query.
+     * @param string $visibility  Optional: return only all/private/public links.
+     *
+     * @return array search results.
+     */
+    private function filterFulltext($searchterms, $visibility = 'all')
+    {
+        if (empty($searchterms)) {
+            return $this->noFilter($visibility);
+        }
+
+        $filtered = array();
+        $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
+        $exactRegex = '/"([^"]+)"/';
+        // Retrieve exact search terms.
+        preg_match_all($exactRegex, $search, $exactSearch);
+        $exactSearch = array_values(array_filter($exactSearch[1]));
+
+        // Remove exact search terms to get AND terms search.
+        $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
+        $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
+
+        // Filter excluding terms and update andSearch.
+        $excludeSearch = array();
+        $andSearch = array();
+        foreach ($explodedSearchAnd as $needle) {
+            if ($needle[0] == '-' && strlen($needle) > 1) {
+                $excludeSearch[] = substr($needle, 1);
+            } else {
+                $andSearch[] = $needle;
+            }
+        }
+
+        $keys = array('title', 'description', 'url', 'tags');
+
+        // Iterate over every stored link.
+        foreach ($this->links as $id => $link) {
+            // ignore non private links when 'privatonly' is on.
+            if ($visibility !== 'all') {
+                if (!$link['private'] && $visibility === 'private') {
+                    continue;
+                } elseif ($link['private'] && $visibility === 'public') {
+                    continue;
+                }
+            }
+
+            // Concatenate link fields to search across fields.
+            // Adds a '\' separator for exact search terms.
+            $content = '';
+            foreach ($keys as $key) {
+                $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\';
+            }
+
+            // Be optimistic
+            $found = true;
+
+            // First, we look for exact term search
+            for ($i = 0; $i < count($exactSearch) && $found; $i++) {
+                $found = strpos($content, $exactSearch[$i]) !== false;
+            }
+
+            // Iterate over keywords, if keyword is not found,
+            // no need to check for the others. We want all or nothing.
+            for ($i = 0; $i < count($andSearch) && $found; $i++) {
+                $found = strpos($content, $andSearch[$i]) !== false;
+            }
+
+            // Exclude terms.
+            for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
+                $found = strpos($content, $excludeSearch[$i]) === false;
+            }
+
+            if ($found) {
+                $filtered[$id] = $link;
+            }
+        }
+
+        return $filtered;
+    }
+
+    /**
+     * generate a regex fragment out of a tag
+     *
+     * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
+     *
+     * @return string generated regex fragment
+     */
+    private static function tag2regex($tag)
+    {
+        $len = strlen($tag);
+        if (!$len || $tag === "-" || $tag === "*") {
+            // nothing to search, return empty regex
+            return '';
+        }
+        if ($tag[0] === "-") {
+            // query is negated
+            $i = 1; // use offset to start after '-' character
+            $regex = '(?!'; // create negative lookahead
+        } else {
+            $i = 0; // start at first character
+            $regex = '(?='; // use positive lookahead
+        }
+        $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning
+        // iterate over string, separating it into placeholder and content
+        for (; $i < $len; $i++) {
+            if ($tag[$i] === '*') {
+                // placeholder found
+                $regex .= '[^ ]*?';
+            } else {
+                // regular characters
+                $offset = strpos($tag, '*', $i);
+                if ($offset === false) {
+                    // no placeholder found, set offset to end of string
+                    $offset = $len;
+                }
+                // subtract one, as we want to get before the placeholder or end of string
+                $offset -= 1;
+                // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
+                $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
+                // move $i on
+                $i = $offset;
+            }
+        }
+        $regex .= '(?:$| ))'; // after the tag may only be a space or the end
+        return $regex;
+    }
+
+    /**
+     * Returns the list of links associated with a given list of tags
+     *
+     * You can specify one or more tags, separated by space or a comma, e.g.
+     *  print_r($mydb->filterTags('linux programming'));
+     *
+     * @param string $tags          list of tags separated by commas or blank spaces.
+     * @param bool   $casesensitive ignore case if false.
+     * @param string $visibility    Optional: return only all/private/public links.
+     *
+     * @return array filtered links.
+     */
+    public function filterTags($tags, $casesensitive = false, $visibility = 'all')
+    {
+        // get single tags (we may get passed an array, even though the docs say different)
+        $inputTags = $tags;
+        if (!is_array($tags)) {
+            // we got an input string, split tags
+            $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY);
+        }
+
+        if (!count($inputTags)) {
+            // no input tags
+            return $this->noFilter($visibility);
+        }
+
+        // build regex from all tags
+        $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/';
+        if (!$casesensitive) {
+            // make regex case insensitive
+            $re .= 'i';
+        }
+
+        // create resulting array
+        $filtered = array();
+
+        // iterate over each link
+        foreach ($this->links as $key => $link) {
+            // check level of visibility
+            // ignore non private links when 'privateonly' is on.
+            if ($visibility !== 'all') {
+                if (!$link['private'] && $visibility === 'private') {
+                    continue;
+                } elseif ($link['private'] && $visibility === 'public') {
+                    continue;
+                }
+            }
+            $search = $link['tags']; // build search string, start with tags of current link
+            if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) {
+                // description given and at least one possible tag found
+                $descTags = array();
+                // find all tags in the form of #tag in the description
+                preg_match_all(
+                    '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
+                    $link['description'],
+                    $descTags
+                );
+                if (count($descTags[1])) {
+                    // there were some tags in the description, add them to the search string
+                    $search .= ' ' . implode(' ', $descTags[1]);
+                }
+            };
+            // match regular expression with search string
+            if (!preg_match($re, $search)) {
+                // this entry does _not_ match our regex
+                continue;
+            }
+            $filtered[$key] = $link;
+        }
+        return $filtered;
+    }
+
+    /**
+     * Return only links without any tag.
+     *
+     * @param string $visibility return only all/private/public links.
+     *
+     * @return array filtered links.
+     */
+    public function filterUntagged($visibility)
+    {
+        $filtered = [];
+        foreach ($this->links as $key => $link) {
+            if ($visibility !== 'all') {
+                if (!$link['private'] && $visibility === 'private') {
+                    continue;
+                } elseif ($link['private'] && $visibility === 'public') {
+                    continue;
+                }
+            }
+
+            if (empty(trim($link['tags']))) {
+                $filtered[$key] = $link;
+            }
+        }
+
+        return $filtered;
+    }
+
+    /**
+     * Returns the list of articles for a given day, chronologically sorted
+     *
+     * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g.
+     *  print_r($mydb->filterDay('20120125'));
+     *
+     * @param string $day day to filter.
+     *
+     * @return array all link matching given day.
+     *
+     * @throws Exception if date format is invalid.
+     */
+    public function filterDay($day)
+    {
+        if (!checkDateFormat('Ymd', $day)) {
+            throw new Exception('Invalid date format');
+        }
+
+        $filtered = array();
+        foreach ($this->links as $key => $l) {
+            if ($l['created']->format('Ymd') == $day) {
+                $filtered[$key] = $l;
+            }
+        }
+
+        // sort by date ASC
+        return array_reverse($filtered, true);
+    }
+
+    /**
+     * Convert a list of tags (str) to an array. Also
+     * - handle case sensitivity.
+     * - accepts spaces commas as separator.
+     *
+     * @param string $tags          string containing a list of tags.
+     * @param bool   $casesensitive will convert everything to lowercase if false.
+     *
+     * @return array filtered tags string.
+     */
+    public static function tagsStrToArray($tags, $casesensitive)
+    {
+        // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
+        $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8');
+        $tagsOut = str_replace(',', ' ', $tagsOut);
+
+        return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
+    }
+}
diff --git a/application/bookmark/exception/LinkNotFoundException.php b/application/bookmark/exception/LinkNotFoundException.php
new file mode 100644
index 00000000..f9414428
--- /dev/null
+++ b/application/bookmark/exception/LinkNotFoundException.php
@@ -0,0 +1,15 @@
+<?php
+namespace Shaarli\Bookmark\Exception;
+
+use Exception;
+
+class LinkNotFoundException extends Exception
+{
+    /**
+     * LinkNotFoundException constructor.
+     */
+    public function __construct()
+    {
+        $this->message = t('The link you are trying to reach does not exist or has been deleted.');
+    }
+}
-- 
cgit v1.2.3


From fe3713d2e5c91e2d07af72b39f321521d3dd470c Mon Sep 17 00:00:00 2001
From: VirtualTam <virtualtam@flibidi.net>
Date: Mon, 3 Dec 2018 01:35:14 +0100
Subject: namespacing: move LinkUtils along \Shaarli\Bookmark classes

Signed-off-by: VirtualTam <virtualtam@flibidi.net>
---
 application/bookmark/LinkUtils.php | 222 +++++++++++++++++++++++++++++++++++++
 1 file changed, 222 insertions(+)
 create mode 100644 application/bookmark/LinkUtils.php

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
new file mode 100644
index 00000000..de5b61cb
--- /dev/null
+++ b/application/bookmark/LinkUtils.php
@@ -0,0 +1,222 @@
+<?php
+
+use Shaarli\Bookmark\LinkDB;
+
+/**
+ * Get cURL callback function for CURLOPT_WRITEFUNCTION
+ *
+ * @param string $charset     to extract from the downloaded page (reference)
+ * @param string $title       to extract from the downloaded page (reference)
+ * @param string $curlGetInfo Optionally overrides curl_getinfo function
+ *
+ * @return Closure
+ */
+function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
+{
+    $isRedirected = false;
+    /**
+     * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
+     *
+     * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
+     * Then we extract the title and the charset and stop the download when it's done.
+     *
+     * @param resource $ch   cURL resource
+     * @param string   $data chunk of data being downloaded
+     *
+     * @return int|bool length of $data or false if we need to stop the download
+     */
+    return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
+        $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
+        if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
+            $isRedirected = true;
+            return strlen($data);
+        }
+        if (!empty($responseCode) && $responseCode !== 200) {
+            return false;
+        }
+        // After a redirection, the content type will keep the previous request value
+        // until it finds the next content-type header.
+        if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
+            $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
+        }
+        if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
+            return false;
+        }
+        if (!empty($contentType) && empty($charset)) {
+            $charset = header_extract_charset($contentType);
+        }
+        if (empty($charset)) {
+            $charset = html_extract_charset($data);
+        }
+        if (empty($title)) {
+            $title = html_extract_title($data);
+        }
+        // We got everything we want, stop the download.
+        if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
+            return false;
+        }
+
+        return strlen($data);
+    };
+}
+
+/**
+ * Extract title from an HTML document.
+ *
+ * @param string $html HTML content where to look for a title.
+ *
+ * @return bool|string Extracted title if found, false otherwise.
+ */
+function html_extract_title($html)
+{
+    if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
+        return trim(str_replace("\n", '', $matches[1]));
+    }
+    return false;
+}
+
+/**
+ * Extract charset from HTTP header if it's defined.
+ *
+ * @param string $header HTTP header Content-Type line.
+ *
+ * @return bool|string Charset string if found (lowercase), false otherwise.
+ */
+function header_extract_charset($header)
+{
+    preg_match('/charset="?([^; ]+)/i', $header, $match);
+    if (! empty($match[1])) {
+        return strtolower(trim($match[1]));
+    }
+
+    return false;
+}
+
+/**
+ * Extract charset HTML content (tag <meta charset>).
+ *
+ * @param string $html HTML content where to look for charset.
+ *
+ * @return bool|string Charset string if found, false otherwise.
+ */
+function html_extract_charset($html)
+{
+    // Get encoding specified in HTML header.
+    preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
+    if (!empty($enc[1])) {
+        return strtolower($enc[1]);
+    }
+
+    return false;
+}
+
+/**
+ * Count private links in given linklist.
+ *
+ * @param array|Countable $links Linklist.
+ *
+ * @return int Number of private links.
+ */
+function count_private($links)
+{
+    $cpt = 0;
+    foreach ($links as $link) {
+        if ($link['private']) {
+            $cpt += 1;
+        }
+    }
+
+    return $cpt;
+}
+
+/**
+ * In a string, converts URLs to clickable links.
+ *
+ * @param string $text       input string.
+ * @param string $redirector if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode  Use `urlencode()` on the URL after the redirector or not.
+ *
+ * @return string returns $text with all links converted to HTML links.
+ *
+ * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
+ */
+function text2clickable($text, $redirector = '', $urlEncode = true)
+{
+    $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
+
+    if (empty($redirector)) {
+        return preg_replace($regex, '<a href="$1">$1</a>', $text);
+    }
+    // Redirector is set, urlencode the final URL.
+    return preg_replace_callback(
+        $regex,
+        function ($matches) use ($redirector, $urlEncode) {
+            $url = $urlEncode ? urlencode($matches[1]) : $matches[1];
+            return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>';
+        },
+        $text
+    );
+}
+
+/**
+ * Auto-link hashtags.
+ *
+ * @param string $description Given description.
+ * @param string $indexUrl    Root URL.
+ *
+ * @return string Description with auto-linked hashtags.
+ */
+function hashtag_autolink($description, $indexUrl = '')
+{
+    /*
+     * To support unicode: http://stackoverflow.com/a/35498078/1484919
+     * \p{Pc} - to match underscore
+     * \p{N} - numeric character in any script
+     * \p{L} - letter from any language
+     * \p{Mn} - any non marking space (accents, umlauts, etc)
+     */
+    $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
+    $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
+    return preg_replace($regex, $replacement, $description);
+}
+
+/**
+ * This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
+ * even in the absence of <pre>  (This is used in description to keep text formatting).
+ *
+ * @param string $text input text.
+ *
+ * @return string formatted text.
+ */
+function space2nbsp($text)
+{
+    return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
+}
+
+/**
+ * Format Shaarli's description
+ *
+ * @param string $description shaare's description.
+ * @param string $redirector  if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode   Use `urlencode()` on the URL after the redirector or not.
+ * @param string $indexUrl    URL to Shaarli's index.
+
+ * @return string formatted description.
+ */
+function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '')
+{
+    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
+}
+
+/**
+ * Generate a small hash for a link.
+ *
+ * @param DateTime $date Link creation date.
+ * @param int      $id   Link ID.
+ *
+ * @return string the small hash generated from link data.
+ */
+function link_small_hash($date, $id)
+{
+    return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
+}
-- 
cgit v1.2.3


From dea72c711ff740b3b829d238fcf85648465143a0 Mon Sep 17 00:00:00 2001
From: VirtualTam <virtualtam@flibidi.net>
Date: Sat, 12 Jan 2019 23:55:38 +0100
Subject: Optimize and cleanup imports

Signed-off-by: VirtualTam <virtualtam@flibidi.net>
---
 application/bookmark/LinkDB.php | 1 -
 1 file changed, 1 deletion(-)

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
index 6041c088..c13a1141 100644
--- a/application/bookmark/LinkDB.php
+++ b/application/bookmark/LinkDB.php
@@ -6,7 +6,6 @@ use ArrayAccess;
 use Countable;
 use DateTime;
 use Iterator;
-use Shaarli\Bookmark\LinkFilter;
 use Shaarli\Bookmark\Exception\LinkNotFoundException;
 use Shaarli\Exceptions\IOException;
 use Shaarli\FileUtils;
-- 
cgit v1.2.3