From f24896b237e40718fb6eaa2869592eb0855a47fd Mon Sep 17 00:00:00 2001 From: VirtualTam <virtualtam@flibidi.net> Date: Mon, 3 Dec 2018 01:10:39 +0100 Subject: namespacing: \Shaarli\Bookmark\LinkDB Signed-off-by: VirtualTam <virtualtam@flibidi.net> --- application/bookmark/LinkDB.php | 601 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 601 insertions(+) create mode 100644 application/bookmark/LinkDB.php (limited to 'application/bookmark') diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php new file mode 100644 index 00000000..3b77422a --- /dev/null +++ b/application/bookmark/LinkDB.php @@ -0,0 +1,601 @@ +<?php + +namespace Shaarli\Bookmark; + +use ArrayAccess; +use Countable; +use DateTime; +use Iterator; +use LinkFilter; +use LinkNotFoundException; +use Shaarli\Exceptions\IOException; +use Shaarli\FileUtils; + +/** + * Data storage for links. + * + * This object behaves like an associative array. + * + * Example: + * $myLinks = new LinkDB(); + * echo $myLinks[350]['title']; + * foreach ($myLinks as $link) + * echo $link['title'].' at url '.$link['url'].'; description:'.$link['description']; + * + * Available keys: + * - id: primary key, incremental integer identifier (persistent) + * - description: description of the entry + * - created: creation date of this entry, DateTime object. + * - updated: last modification date of this entry, DateTime object. + * - private: Is this link private? 0=no, other value=yes + * - tags: tags attached to this entry (separated by spaces) + * - title Title of the link + * - url URL of the link. Used for displayable links (no redirector, relative, etc.). + * Can be absolute or relative. + * Relative URLs are permalinks (e.g.'?m-ukcw') + * - real_url Absolute processed URL. + * - shorturl Permalink smallhash + * + * Implements 3 interfaces: + * - ArrayAccess: behaves like an associative array; + * - Countable: there is a count() method; + * - Iterator: usable in foreach () loops. + * + * ID mechanism: + * ArrayAccess is implemented in a way that will allow to access a link + * with the unique identifier ID directly with $link[ID]. + * Note that it's not the real key of the link array attribute. + * This mechanism is in place to have persistent link IDs, + * even though the internal array is reordered by date. + * Example: + * - DB: link #1 (2010-01-01) link #2 (2016-01-01) + * - Order: #2 #1 + * - Import links containing: link #3 (2013-01-01) + * - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01) + * - Real order: #2 #3 #1 + */ +class LinkDB implements Iterator, Countable, ArrayAccess +{ + // Links are stored as a PHP serialized string + private $datastore; + + // Link date storage format + const LINK_DATE_FORMAT = 'Ymd_His'; + + // List of links (associative array) + // - key: link date (e.g. "20110823_124546"), + // - value: associative array (keys: title, description...) + private $links; + + // List of all recorded URLs (key=url, value=link offset) + // for fast reserve search (url-->link offset) + private $urls; + + /** + * @var array List of all links IDS mapped with their array offset. + * Map: id->offset. + */ + protected $ids; + + // List of offset keys (for the Iterator interface implementation) + private $keys; + + // Position in the $this->keys array (for the Iterator interface) + private $position; + + // Is the user logged in? (used to filter private links) + private $loggedIn; + + // Hide public links + private $hidePublicLinks; + + // link redirector set in user settings. + private $redirector; + + /** + * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched. + * + * Example: + * anonym.to needs clean URL while dereferer.org needs urlencoded URL. + * + * @var boolean $redirectorEncode parameter: true or false + */ + private $redirectorEncode; + + /** + * Creates a new LinkDB + * + * Checks if the datastore exists; else, attempts to create a dummy one. + * + * @param string $datastore datastore file path. + * @param boolean $isLoggedIn is the user logged in? + * @param boolean $hidePublicLinks if true all links are private. + * @param string $redirector link redirector set in user settings. + * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true). + */ + public function __construct( + $datastore, + $isLoggedIn, + $hidePublicLinks, + $redirector = '', + $redirectorEncode = true + ) { + + $this->datastore = $datastore; + $this->loggedIn = $isLoggedIn; + $this->hidePublicLinks = $hidePublicLinks; + $this->redirector = $redirector; + $this->redirectorEncode = $redirectorEncode === true; + $this->check(); + $this->read(); + } + + /** + * Countable - Counts elements of an object + */ + public function count() + { + return count($this->links); + } + + /** + * ArrayAccess - Assigns a value to the specified offset + */ + public function offsetSet($offset, $value) + { + // TODO: use exceptions instead of "die" + if (!$this->loggedIn) { + die(t('You are not authorized to add a link.')); + } + if (!isset($value['id']) || empty($value['url'])) { + die(t('Internal Error: A link should always have an id and URL.')); + } + if (($offset !== null && !is_int($offset)) || !is_int($value['id'])) { + die(t('You must specify an integer as a key.')); + } + if ($offset !== null && $offset !== $value['id']) { + die(t('Array offset and link ID must be equal.')); + } + + // If the link exists, we reuse the real offset, otherwise new entry + $existing = $this->getLinkOffset($offset); + if ($existing !== null) { + $offset = $existing; + } else { + $offset = count($this->links); + } + $this->links[$offset] = $value; + $this->urls[$value['url']] = $offset; + $this->ids[$value['id']] = $offset; + } + + /** + * ArrayAccess - Whether or not an offset exists + */ + public function offsetExists($offset) + { + return array_key_exists($this->getLinkOffset($offset), $this->links); + } + + /** + * ArrayAccess - Unsets an offset + */ + public function offsetUnset($offset) + { + if (!$this->loggedIn) { + // TODO: raise an exception + die('You are not authorized to delete a link.'); + } + $realOffset = $this->getLinkOffset($offset); + $url = $this->links[$realOffset]['url']; + unset($this->urls[$url]); + unset($this->ids[$realOffset]); + unset($this->links[$realOffset]); + } + + /** + * ArrayAccess - Returns the value at specified offset + */ + public function offsetGet($offset) + { + $realOffset = $this->getLinkOffset($offset); + return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null; + } + + /** + * Iterator - Returns the current element + */ + public function current() + { + return $this[$this->keys[$this->position]]; + } + + /** + * Iterator - Returns the key of the current element + */ + public function key() + { + return $this->keys[$this->position]; + } + + /** + * Iterator - Moves forward to next element + */ + public function next() + { + ++$this->position; + } + + /** + * Iterator - Rewinds the Iterator to the first element + * + * Entries are sorted by date (latest first) + */ + public function rewind() + { + $this->keys = array_keys($this->ids); + $this->position = 0; + } + + /** + * Iterator - Checks if current position is valid + */ + public function valid() + { + return isset($this->keys[$this->position]); + } + + /** + * Checks if the DB directory and file exist + * + * If no DB file is found, creates a dummy DB. + */ + private function check() + { + if (file_exists($this->datastore)) { + return; + } + + // Create a dummy database for example + $this->links = array(); + $link = array( + 'id' => 1, + 'title' => t('The personal, minimalist, super-fast, database free, bookmarking service'), + 'url' => 'https://shaarli.readthedocs.io', + 'description' => t( + 'Welcome to Shaarli! This is your first public bookmark. ' + . 'To edit or delete me, you must first login. + +To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page. + +You use the community supported version of the original Shaarli project, by Sebastien Sauvage.' + ), + 'private' => 0, + 'created' => new DateTime(), + 'tags' => 'opensource software' + ); + $link['shorturl'] = link_small_hash($link['created'], $link['id']); + $this->links[1] = $link; + + $link = array( + 'id' => 0, + 'title' => t('My secret stuff... - Pastebin.com'), + 'url' => 'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=', + 'description' => t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'), + 'private' => 1, + 'created' => new DateTime('1 minute ago'), + 'tags' => 'secretstuff', + ); + $link['shorturl'] = link_small_hash($link['created'], $link['id']); + $this->links[0] = $link; + + // Write database to disk + $this->write(); + } + + /** + * Reads database from disk to memory + */ + private function read() + { + // Public links are hidden and user not logged in => nothing to show + if ($this->hidePublicLinks && !$this->loggedIn) { + $this->links = array(); + return; + } + + $this->urls = []; + $this->ids = []; + $this->links = FileUtils::readFlatDB($this->datastore, []); + + $toremove = array(); + foreach ($this->links as $key => &$link) { + if (!$this->loggedIn && $link['private'] != 0) { + // Transition for not upgraded databases. + unset($this->links[$key]); + continue; + } + + // Sanitize data fields. + sanitizeLink($link); + + // Remove private tags if the user is not logged in. + if (!$this->loggedIn) { + $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']); + } + + // Do not use the redirector for internal links (Shaarli note URL starting with a '?'). + if (!empty($this->redirector) && !startsWith($link['url'], '?')) { + $link['real_url'] = $this->redirector; + if ($this->redirectorEncode) { + $link['real_url'] .= urlencode(unescape($link['url'])); + } else { + $link['real_url'] .= $link['url']; + } + } else { + $link['real_url'] = $link['url']; + } + + // To be able to load links before running the update, and prepare the update + if (!isset($link['created'])) { + $link['id'] = $link['linkdate']; + $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']); + if (!empty($link['updated'])) { + $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']); + } + $link['shorturl'] = smallHash($link['linkdate']); + } + + $this->urls[$link['url']] = $key; + $this->ids[$link['id']] = $key; + } + } + + /** + * Saves the database from memory to disk + * + * @throws IOException the datastore is not writable + */ + private function write() + { + $this->reorder(); + FileUtils::writeFlatDB($this->datastore, $this->links); + } + + /** + * Saves the database from memory to disk + * + * @param string $pageCacheDir page cache directory + */ + public function save($pageCacheDir) + { + if (!$this->loggedIn) { + // TODO: raise an Exception instead + die('You are not authorized to change the database.'); + } + + $this->write(); + + invalidateCaches($pageCacheDir); + } + + /** + * Returns the link for a given URL, or False if it does not exist. + * + * @param string $url URL to search for + * + * @return mixed the existing link if it exists, else 'false' + */ + public function getLinkFromUrl($url) + { + if (isset($this->urls[$url])) { + return $this->links[$this->urls[$url]]; + } + return false; + } + + /** + * Returns the shaare corresponding to a smallHash. + * + * @param string $request QUERY_STRING server parameter. + * + * @return array $filtered array containing permalink data. + * + * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link. + */ + public function filterHash($request) + { + $request = substr($request, 0, 6); + $linkFilter = new LinkFilter($this->links); + return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request); + } + + /** + * Returns the list of articles for a given day. + * + * @param string $request day to filter. Format: YYYYMMDD. + * + * @return array list of shaare found. + */ + public function filterDay($request) + { + $linkFilter = new LinkFilter($this->links); + return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request); + } + + /** + * Filter links according to search parameters. + * + * @param array $filterRequest Search request content. Supported keys: + * - searchtags: list of tags + * - searchterm: term search + * @param bool $casesensitive Optional: Perform case sensitive filter + * @param string $visibility return only all/private/public links + * @param string $untaggedonly return only untagged links + * + * @return array filtered links, all links if no suitable filter was provided. + */ + public function filterSearch( + $filterRequest = array(), + $casesensitive = false, + $visibility = 'all', + $untaggedonly = false + ) { + + // Filter link database according to parameters. + $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : ''; + $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : ''; + + // Search tags + fullsearch - blank string parameter will return all links. + $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext" + $request = [$searchtags, $searchterm]; + + $linkFilter = new LinkFilter($this); + return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly); + } + + /** + * Returns the list tags appearing in the links with the given tags + * + * @param array $filteringTags tags selecting the links to consider + * @param string $visibility process only all/private/public links + * + * @return array tag => linksCount + */ + public function linksCountPerTag($filteringTags = [], $visibility = 'all') + { + $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility); + $tags = []; + $caseMapping = []; + foreach ($links as $link) { + foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) { + if (empty($tag)) { + continue; + } + // The first case found will be displayed. + if (!isset($caseMapping[strtolower($tag)])) { + $caseMapping[strtolower($tag)] = $tag; + $tags[$caseMapping[strtolower($tag)]] = 0; + } + $tags[$caseMapping[strtolower($tag)]]++; + } + } + + /* + * Formerly used arsort(), which doesn't define the sort behaviour for equal values. + * Also, this function doesn't produce the same result between PHP 5.6 and 7. + * + * So we now use array_multisort() to sort tags by DESC occurrences, + * then ASC alphabetically for equal values. + * + * @see https://github.com/shaarli/Shaarli/issues/1142 + */ + $keys = array_keys($tags); + $tmpTags = array_combine($keys, $keys); + array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags); + return $tags; + } + + /** + * Rename or delete a tag across all links. + * + * @param string $from Tag to rename + * @param string $to New tag. If none is provided, the from tag will be deleted + * + * @return array|bool List of altered links or false on error + */ + public function renameTag($from, $to) + { + if (empty($from)) { + return false; + } + $delete = empty($to); + // True for case-sensitive tag search. + $linksToAlter = $this->filterSearch(['searchtags' => $from], true); + foreach ($linksToAlter as $key => &$value) { + $tags = preg_split('/\s+/', trim($value['tags'])); + if (($pos = array_search($from, $tags)) !== false) { + if ($delete) { + unset($tags[$pos]); // Remove tag. + } else { + $tags[$pos] = trim($to); + } + $value['tags'] = trim(implode(' ', array_unique($tags))); + $this[$value['id']] = $value; + } + } + + return $linksToAlter; + } + + /** + * Returns the list of days containing articles (oldest first) + * Output: An array containing days (in format YYYYMMDD). + */ + public function days() + { + $linkDays = array(); + foreach ($this->links as $link) { + $linkDays[$link['created']->format('Ymd')] = 0; + } + $linkDays = array_keys($linkDays); + sort($linkDays); + + return $linkDays; + } + + /** + * Reorder links by creation date (newest first). + * + * Also update the urls and ids mapping arrays. + * + * @param string $order ASC|DESC + */ + public function reorder($order = 'DESC') + { + $order = $order === 'ASC' ? -1 : 1; + // Reorder array by dates. + usort($this->links, function ($a, $b) use ($order) { + if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) { + return $a['sticky'] ? -1 : 1; + } + return $a['created'] < $b['created'] ? 1 * $order : -1 * $order; + }); + + $this->urls = []; + $this->ids = []; + foreach ($this->links as $key => $link) { + $this->urls[$link['url']] = $key; + $this->ids[$link['id']] = $key; + } + } + + /** + * Return the next key for link creation. + * E.g. If the last ID is 597, the next will be 598. + * + * @return int next ID. + */ + public function getNextId() + { + if (!empty($this->ids)) { + return max(array_keys($this->ids)) + 1; + } + return 0; + } + + /** + * Returns a link offset in links array from its unique ID. + * + * @param int $id Persistent ID of a link. + * + * @return int Real offset in local array, or null if doesn't exist. + */ + protected function getLinkOffset($id) + { + if (isset($this->ids[$id])) { + return $this->ids[$id]; + } + return null; + } +} -- cgit v1.2.3 From 6696729b88e67504fdd333cbaab43a63c3617d86 Mon Sep 17 00:00:00 2001 From: VirtualTam <virtualtam@flibidi.net> Date: Mon, 3 Dec 2018 01:22:45 +0100 Subject: namespacing: \Shaarli\Bookmark\LinkFilter Signed-off-by: VirtualTam <virtualtam@flibidi.net> --- application/bookmark/LinkDB.php | 26 +- application/bookmark/LinkFilter.php | 449 +++++++++++++++++++++ .../bookmark/exception/LinkNotFoundException.php | 15 + 3 files changed, 477 insertions(+), 13 deletions(-) create mode 100644 application/bookmark/LinkFilter.php create mode 100644 application/bookmark/exception/LinkNotFoundException.php (limited to 'application/bookmark') diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php index 3b77422a..6041c088 100644 --- a/application/bookmark/LinkDB.php +++ b/application/bookmark/LinkDB.php @@ -6,8 +6,8 @@ use ArrayAccess; use Countable; use DateTime; use Iterator; -use LinkFilter; -use LinkNotFoundException; +use Shaarli\Bookmark\LinkFilter; +use Shaarli\Bookmark\Exception\LinkNotFoundException; use Shaarli\Exceptions\IOException; use Shaarli\FileUtils; @@ -107,10 +107,10 @@ class LinkDB implements Iterator, Countable, ArrayAccess * * Checks if the datastore exists; else, attempts to create a dummy one. * - * @param string $datastore datastore file path. - * @param boolean $isLoggedIn is the user logged in? - * @param boolean $hidePublicLinks if true all links are private. - * @param string $redirector link redirector set in user settings. + * @param string $datastore datastore file path. + * @param boolean $isLoggedIn is the user logged in? + * @param boolean $hidePublicLinks if true all links are private. + * @param string $redirector link redirector set in user settings. * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true). */ public function __construct( @@ -426,12 +426,12 @@ You use the community supported version of the original Shaarli project, by Seba /** * Filter links according to search parameters. * - * @param array $filterRequest Search request content. Supported keys: + * @param array $filterRequest Search request content. Supported keys: * - searchtags: list of tags * - searchterm: term search - * @param bool $casesensitive Optional: Perform case sensitive filter - * @param string $visibility return only all/private/public links - * @param string $untaggedonly return only untagged links + * @param bool $casesensitive Optional: Perform case sensitive filter + * @param string $visibility return only all/private/public links + * @param bool $untaggedonly return only untagged links * * @return array filtered links, all links if no suitable filter was provided. */ @@ -457,8 +457,8 @@ You use the community supported version of the original Shaarli project, by Seba /** * Returns the list tags appearing in the links with the given tags * - * @param array $filteringTags tags selecting the links to consider - * @param string $visibility process only all/private/public links + * @param array $filteringTags tags selecting the links to consider + * @param string $visibility process only all/private/public links * * @return array tag => linksCount */ @@ -500,7 +500,7 @@ You use the community supported version of the original Shaarli project, by Seba * Rename or delete a tag across all links. * * @param string $from Tag to rename - * @param string $to New tag. If none is provided, the from tag will be deleted + * @param string $to New tag. If none is provided, the from tag will be deleted * * @return array|bool List of altered links or false on error */ diff --git a/application/bookmark/LinkFilter.php b/application/bookmark/LinkFilter.php new file mode 100644 index 00000000..9b966307 --- /dev/null +++ b/application/bookmark/LinkFilter.php @@ -0,0 +1,449 @@ +<?php + +namespace Shaarli\Bookmark; + +use Exception; +use Shaarli\Bookmark\Exception\LinkNotFoundException; + +/** + * Class LinkFilter. + * + * Perform search and filter operation on link data list. + */ +class LinkFilter +{ + /** + * @var string permalinks. + */ + public static $FILTER_HASH = 'permalink'; + + /** + * @var string text search. + */ + public static $FILTER_TEXT = 'fulltext'; + + /** + * @var string tag filter. + */ + public static $FILTER_TAG = 'tags'; + + /** + * @var string filter by day. + */ + public static $FILTER_DAY = 'FILTER_DAY'; + + /** + * @var string Allowed characters for hashtags (regex syntax). + */ + public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}'; + + /** + * @var LinkDB all available links. + */ + private $links; + + /** + * @param LinkDB $links initialization. + */ + public function __construct($links) + { + $this->links = $links; + } + + /** + * Filter links according to parameters. + * + * @param string $type Type of filter (eg. tags, permalink, etc.). + * @param mixed $request Filter content. + * @param bool $casesensitive Optional: Perform case sensitive filter if true. + * @param string $visibility Optional: return only all/private/public links + * @param string $untaggedonly Optional: return only untagged links. Applies only if $type includes FILTER_TAG + * + * @return array filtered link list. + */ + public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) + { + if (!in_array($visibility, ['all', 'public', 'private'])) { + $visibility = 'all'; + } + + switch ($type) { + case self::$FILTER_HASH: + return $this->filterSmallHash($request); + case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" + $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); + if ($noRequest) { + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } + return $this->noFilter($visibility); + } + if ($untaggedonly) { + $filtered = $this->filterUntagged($visibility); + } else { + $filtered = $this->links; + } + if (!empty($request[0])) { + $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); + } + if (!empty($request[1])) { + $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility); + } + return $filtered; + case self::$FILTER_TEXT: + return $this->filterFulltext($request, $visibility); + case self::$FILTER_TAG: + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } else { + return $this->filterTags($request, $casesensitive, $visibility); + } + case self::$FILTER_DAY: + return $this->filterDay($request); + default: + return $this->noFilter($visibility); + } + } + + /** + * Unknown filter, but handle private only. + * + * @param string $visibility Optional: return only all/private/public links + * + * @return array filtered links. + */ + private function noFilter($visibility = 'all') + { + if ($visibility === 'all') { + return $this->links; + } + + $out = array(); + foreach ($this->links as $key => $value) { + if ($value['private'] && $visibility === 'private') { + $out[$key] = $value; + } elseif (!$value['private'] && $visibility === 'public') { + $out[$key] = $value; + } + } + + return $out; + } + + /** + * Returns the shaare corresponding to a smallHash. + * + * @param string $smallHash permalink hash. + * + * @return array $filtered array containing permalink data. + * + * @throws \Shaarli\Bookmark\Exception\LinkNotFoundException if the smallhash doesn't match any link. + */ + private function filterSmallHash($smallHash) + { + $filtered = array(); + foreach ($this->links as $key => $l) { + if ($smallHash == $l['shorturl']) { + // Yes, this is ugly and slow + $filtered[$key] = $l; + return $filtered; + } + } + + if (empty($filtered)) { + throw new LinkNotFoundException(); + } + + return $filtered; + } + + /** + * Returns the list of links corresponding to a full-text search + * + * Searches: + * - in the URLs, title and description; + * - are case-insensitive; + * - terms surrounded by quotes " are exact terms search. + * - terms starting with a dash - are excluded (except exact terms). + * + * Example: + * print_r($mydb->filterFulltext('hollandais')); + * + * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') + * - allows to perform searches on Unicode text + * - see https://github.com/shaarli/Shaarli/issues/75 for examples + * + * @param string $searchterms search query. + * @param string $visibility Optional: return only all/private/public links. + * + * @return array search results. + */ + private function filterFulltext($searchterms, $visibility = 'all') + { + if (empty($searchterms)) { + return $this->noFilter($visibility); + } + + $filtered = array(); + $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); + $exactRegex = '/"([^"]+)"/'; + // Retrieve exact search terms. + preg_match_all($exactRegex, $search, $exactSearch); + $exactSearch = array_values(array_filter($exactSearch[1])); + + // Remove exact search terms to get AND terms search. + $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); + $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); + + // Filter excluding terms and update andSearch. + $excludeSearch = array(); + $andSearch = array(); + foreach ($explodedSearchAnd as $needle) { + if ($needle[0] == '-' && strlen($needle) > 1) { + $excludeSearch[] = substr($needle, 1); + } else { + $andSearch[] = $needle; + } + } + + $keys = array('title', 'description', 'url', 'tags'); + + // Iterate over every stored link. + foreach ($this->links as $id => $link) { + // ignore non private links when 'privatonly' is on. + if ($visibility !== 'all') { + if (!$link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + + // Concatenate link fields to search across fields. + // Adds a '\' separator for exact search terms. + $content = ''; + foreach ($keys as $key) { + $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; + } + + // Be optimistic + $found = true; + + // First, we look for exact term search + for ($i = 0; $i < count($exactSearch) && $found; $i++) { + $found = strpos($content, $exactSearch[$i]) !== false; + } + + // Iterate over keywords, if keyword is not found, + // no need to check for the others. We want all or nothing. + for ($i = 0; $i < count($andSearch) && $found; $i++) { + $found = strpos($content, $andSearch[$i]) !== false; + } + + // Exclude terms. + for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + $found = strpos($content, $excludeSearch[$i]) === false; + } + + if ($found) { + $filtered[$id] = $link; + } + } + + return $filtered; + } + + /** + * generate a regex fragment out of a tag + * + * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard + * + * @return string generated regex fragment + */ + private static function tag2regex($tag) + { + $len = strlen($tag); + if (!$len || $tag === "-" || $tag === "*") { + // nothing to search, return empty regex + return ''; + } + if ($tag[0] === "-") { + // query is negated + $i = 1; // use offset to start after '-' character + $regex = '(?!'; // create negative lookahead + } else { + $i = 0; // start at first character + $regex = '(?='; // use positive lookahead + } + $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning + // iterate over string, separating it into placeholder and content + for (; $i < $len; $i++) { + if ($tag[$i] === '*') { + // placeholder found + $regex .= '[^ ]*?'; + } else { + // regular characters + $offset = strpos($tag, '*', $i); + if ($offset === false) { + // no placeholder found, set offset to end of string + $offset = $len; + } + // subtract one, as we want to get before the placeholder or end of string + $offset -= 1; + // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. + $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); + // move $i on + $i = $offset; + } + } + $regex .= '(?:$| ))'; // after the tag may only be a space or the end + return $regex; + } + + /** + * Returns the list of links associated with a given list of tags + * + * You can specify one or more tags, separated by space or a comma, e.g. + * print_r($mydb->filterTags('linux programming')); + * + * @param string $tags list of tags separated by commas or blank spaces. + * @param bool $casesensitive ignore case if false. + * @param string $visibility Optional: return only all/private/public links. + * + * @return array filtered links. + */ + public function filterTags($tags, $casesensitive = false, $visibility = 'all') + { + // get single tags (we may get passed an array, even though the docs say different) + $inputTags = $tags; + if (!is_array($tags)) { + // we got an input string, split tags + $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); + } + + if (!count($inputTags)) { + // no input tags + return $this->noFilter($visibility); + } + + // build regex from all tags + $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + if (!$casesensitive) { + // make regex case insensitive + $re .= 'i'; + } + + // create resulting array + $filtered = array(); + + // iterate over each link + foreach ($this->links as $key => $link) { + // check level of visibility + // ignore non private links when 'privateonly' is on. + if ($visibility !== 'all') { + if (!$link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + $search = $link['tags']; // build search string, start with tags of current link + if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) { + // description given and at least one possible tag found + $descTags = array(); + // find all tags in the form of #tag in the description + preg_match_all( + '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm', + $link['description'], + $descTags + ); + if (count($descTags[1])) { + // there were some tags in the description, add them to the search string + $search .= ' ' . implode(' ', $descTags[1]); + } + }; + // match regular expression with search string + if (!preg_match($re, $search)) { + // this entry does _not_ match our regex + continue; + } + $filtered[$key] = $link; + } + return $filtered; + } + + /** + * Return only links without any tag. + * + * @param string $visibility return only all/private/public links. + * + * @return array filtered links. + */ + public function filterUntagged($visibility) + { + $filtered = []; + foreach ($this->links as $key => $link) { + if ($visibility !== 'all') { + if (!$link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + + if (empty(trim($link['tags']))) { + $filtered[$key] = $link; + } + } + + return $filtered; + } + + /** + * Returns the list of articles for a given day, chronologically sorted + * + * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. + * print_r($mydb->filterDay('20120125')); + * + * @param string $day day to filter. + * + * @return array all link matching given day. + * + * @throws Exception if date format is invalid. + */ + public function filterDay($day) + { + if (!checkDateFormat('Ymd', $day)) { + throw new Exception('Invalid date format'); + } + + $filtered = array(); + foreach ($this->links as $key => $l) { + if ($l['created']->format('Ymd') == $day) { + $filtered[$key] = $l; + } + } + + // sort by date ASC + return array_reverse($filtered, true); + } + + /** + * Convert a list of tags (str) to an array. Also + * - handle case sensitivity. + * - accepts spaces commas as separator. + * + * @param string $tags string containing a list of tags. + * @param bool $casesensitive will convert everything to lowercase if false. + * + * @return array filtered tags string. + */ + public static function tagsStrToArray($tags, $casesensitive) + { + // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) + $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); + $tagsOut = str_replace(',', ' ', $tagsOut); + + return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); + } +} diff --git a/application/bookmark/exception/LinkNotFoundException.php b/application/bookmark/exception/LinkNotFoundException.php new file mode 100644 index 00000000..f9414428 --- /dev/null +++ b/application/bookmark/exception/LinkNotFoundException.php @@ -0,0 +1,15 @@ +<?php +namespace Shaarli\Bookmark\Exception; + +use Exception; + +class LinkNotFoundException extends Exception +{ + /** + * LinkNotFoundException constructor. + */ + public function __construct() + { + $this->message = t('The link you are trying to reach does not exist or has been deleted.'); + } +} -- cgit v1.2.3 From fe3713d2e5c91e2d07af72b39f321521d3dd470c Mon Sep 17 00:00:00 2001 From: VirtualTam <virtualtam@flibidi.net> Date: Mon, 3 Dec 2018 01:35:14 +0100 Subject: namespacing: move LinkUtils along \Shaarli\Bookmark classes Signed-off-by: VirtualTam <virtualtam@flibidi.net> --- application/bookmark/LinkUtils.php | 222 +++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 application/bookmark/LinkUtils.php (limited to 'application/bookmark') diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php new file mode 100644 index 00000000..de5b61cb --- /dev/null +++ b/application/bookmark/LinkUtils.php @@ -0,0 +1,222 @@ +<?php + +use Shaarli\Bookmark\LinkDB; + +/** + * Get cURL callback function for CURLOPT_WRITEFUNCTION + * + * @param string $charset to extract from the downloaded page (reference) + * @param string $title to extract from the downloaded page (reference) + * @param string $curlGetInfo Optionally overrides curl_getinfo function + * + * @return Closure + */ +function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo') +{ + $isRedirected = false; + /** + * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download). + * + * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text' + * Then we extract the title and the charset and stop the download when it's done. + * + * @param resource $ch cURL resource + * @param string $data chunk of data being downloaded + * + * @return int|bool length of $data or false if we need to stop the download + */ + return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) { + $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE); + if (!empty($responseCode) && in_array($responseCode, [301, 302])) { + $isRedirected = true; + return strlen($data); + } + if (!empty($responseCode) && $responseCode !== 200) { + return false; + } + // After a redirection, the content type will keep the previous request value + // until it finds the next content-type header. + if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) { + $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE); + } + if (!empty($contentType) && strpos($contentType, 'text/html') === false) { + return false; + } + if (!empty($contentType) && empty($charset)) { + $charset = header_extract_charset($contentType); + } + if (empty($charset)) { + $charset = html_extract_charset($data); + } + if (empty($title)) { + $title = html_extract_title($data); + } + // We got everything we want, stop the download. + if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) { + return false; + } + + return strlen($data); + }; +} + +/** + * Extract title from an HTML document. + * + * @param string $html HTML content where to look for a title. + * + * @return bool|string Extracted title if found, false otherwise. + */ +function html_extract_title($html) +{ + if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) { + return trim(str_replace("\n", '', $matches[1])); + } + return false; +} + +/** + * Extract charset from HTTP header if it's defined. + * + * @param string $header HTTP header Content-Type line. + * + * @return bool|string Charset string if found (lowercase), false otherwise. + */ +function header_extract_charset($header) +{ + preg_match('/charset="?([^; ]+)/i', $header, $match); + if (! empty($match[1])) { + return strtolower(trim($match[1])); + } + + return false; +} + +/** + * Extract charset HTML content (tag <meta charset>). + * + * @param string $html HTML content where to look for charset. + * + * @return bool|string Charset string if found, false otherwise. + */ +function html_extract_charset($html) +{ + // Get encoding specified in HTML header. + preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc); + if (!empty($enc[1])) { + return strtolower($enc[1]); + } + + return false; +} + +/** + * Count private links in given linklist. + * + * @param array|Countable $links Linklist. + * + * @return int Number of private links. + */ +function count_private($links) +{ + $cpt = 0; + foreach ($links as $link) { + if ($link['private']) { + $cpt += 1; + } + } + + return $cpt; +} + +/** + * In a string, converts URLs to clickable links. + * + * @param string $text input string. + * @param string $redirector if a redirector is set, use it to gerenate links. + * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not. + * + * @return string returns $text with all links converted to HTML links. + * + * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722 + */ +function text2clickable($text, $redirector = '', $urlEncode = true) +{ + $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si'; + + if (empty($redirector)) { + return preg_replace($regex, '<a href="$1">$1</a>', $text); + } + // Redirector is set, urlencode the final URL. + return preg_replace_callback( + $regex, + function ($matches) use ($redirector, $urlEncode) { + $url = $urlEncode ? urlencode($matches[1]) : $matches[1]; + return '<a href="' . $redirector . $url .'">'. $matches[1] .'</a>'; + }, + $text + ); +} + +/** + * Auto-link hashtags. + * + * @param string $description Given description. + * @param string $indexUrl Root URL. + * + * @return string Description with auto-linked hashtags. + */ +function hashtag_autolink($description, $indexUrl = '') +{ + /* + * To support unicode: http://stackoverflow.com/a/35498078/1484919 + * \p{Pc} - to match underscore + * \p{N} - numeric character in any script + * \p{L} - letter from any language + * \p{Mn} - any non marking space (accents, umlauts, etc) + */ + $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; + $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>'; + return preg_replace($regex, $replacement, $description); +} + +/** + * This function inserts where relevant so that multiple spaces are properly displayed in HTML + * even in the absence of <pre> (This is used in description to keep text formatting). + * + * @param string $text input text. + * + * @return string formatted text. + */ +function space2nbsp($text) +{ + return preg_replace('/(^| ) /m', '$1 ', $text); +} + +/** + * Format Shaarli's description + * + * @param string $description shaare's description. + * @param string $redirector if a redirector is set, use it to gerenate links. + * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not. + * @param string $indexUrl URL to Shaarli's index. + + * @return string formatted description. + */ +function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '') +{ + return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl))); +} + +/** + * Generate a small hash for a link. + * + * @param DateTime $date Link creation date. + * @param int $id Link ID. + * + * @return string the small hash generated from link data. + */ +function link_small_hash($date, $id) +{ + return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id); +} -- cgit v1.2.3 From dea72c711ff740b3b829d238fcf85648465143a0 Mon Sep 17 00:00:00 2001 From: VirtualTam <virtualtam@flibidi.net> Date: Sat, 12 Jan 2019 23:55:38 +0100 Subject: Optimize and cleanup imports Signed-off-by: VirtualTam <virtualtam@flibidi.net> --- application/bookmark/LinkDB.php | 1 - 1 file changed, 1 deletion(-) (limited to 'application/bookmark') diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php index 6041c088..c13a1141 100644 --- a/application/bookmark/LinkDB.php +++ b/application/bookmark/LinkDB.php @@ -6,7 +6,6 @@ use ArrayAccess; use Countable; use DateTime; use Iterator; -use Shaarli\Bookmark\LinkFilter; use Shaarli\Bookmark\Exception\LinkNotFoundException; use Shaarli\Exceptions\IOException; use Shaarli\FileUtils; -- cgit v1.2.3