From f24896b237e40718fb6eaa2869592eb0855a47fd Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Mon, 3 Dec 2018 01:10:39 +0100 Subject: namespacing: \Shaarli\Bookmark\LinkDB Signed-off-by: VirtualTam --- application/bookmark/LinkDB.php | 601 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 601 insertions(+) create mode 100644 application/bookmark/LinkDB.php (limited to 'application/bookmark') diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php new file mode 100644 index 00000000..3b77422a --- /dev/null +++ b/application/bookmark/LinkDB.php @@ -0,0 +1,601 @@ +link offset) + private $urls; + + /** + * @var array List of all links IDS mapped with their array offset. + * Map: id->offset. + */ + protected $ids; + + // List of offset keys (for the Iterator interface implementation) + private $keys; + + // Position in the $this->keys array (for the Iterator interface) + private $position; + + // Is the user logged in? (used to filter private links) + private $loggedIn; + + // Hide public links + private $hidePublicLinks; + + // link redirector set in user settings. + private $redirector; + + /** + * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched. + * + * Example: + * anonym.to needs clean URL while dereferer.org needs urlencoded URL. + * + * @var boolean $redirectorEncode parameter: true or false + */ + private $redirectorEncode; + + /** + * Creates a new LinkDB + * + * Checks if the datastore exists; else, attempts to create a dummy one. + * + * @param string $datastore datastore file path. + * @param boolean $isLoggedIn is the user logged in? + * @param boolean $hidePublicLinks if true all links are private. + * @param string $redirector link redirector set in user settings. + * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true). + */ + public function __construct( + $datastore, + $isLoggedIn, + $hidePublicLinks, + $redirector = '', + $redirectorEncode = true + ) { + + $this->datastore = $datastore; + $this->loggedIn = $isLoggedIn; + $this->hidePublicLinks = $hidePublicLinks; + $this->redirector = $redirector; + $this->redirectorEncode = $redirectorEncode === true; + $this->check(); + $this->read(); + } + + /** + * Countable - Counts elements of an object + */ + public function count() + { + return count($this->links); + } + + /** + * ArrayAccess - Assigns a value to the specified offset + */ + public function offsetSet($offset, $value) + { + // TODO: use exceptions instead of "die" + if (!$this->loggedIn) { + die(t('You are not authorized to add a link.')); + } + if (!isset($value['id']) || empty($value['url'])) { + die(t('Internal Error: A link should always have an id and URL.')); + } + if (($offset !== null && !is_int($offset)) || !is_int($value['id'])) { + die(t('You must specify an integer as a key.')); + } + if ($offset !== null && $offset !== $value['id']) { + die(t('Array offset and link ID must be equal.')); + } + + // If the link exists, we reuse the real offset, otherwise new entry + $existing = $this->getLinkOffset($offset); + if ($existing !== null) { + $offset = $existing; + } else { + $offset = count($this->links); + } + $this->links[$offset] = $value; + $this->urls[$value['url']] = $offset; + $this->ids[$value['id']] = $offset; + } + + /** + * ArrayAccess - Whether or not an offset exists + */ + public function offsetExists($offset) + { + return array_key_exists($this->getLinkOffset($offset), $this->links); + } + + /** + * ArrayAccess - Unsets an offset + */ + public function offsetUnset($offset) + { + if (!$this->loggedIn) { + // TODO: raise an exception + die('You are not authorized to delete a link.'); + } + $realOffset = $this->getLinkOffset($offset); + $url = $this->links[$realOffset]['url']; + unset($this->urls[$url]); + unset($this->ids[$realOffset]); + unset($this->links[$realOffset]); + } + + /** + * ArrayAccess - Returns the value at specified offset + */ + public function offsetGet($offset) + { + $realOffset = $this->getLinkOffset($offset); + return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null; + } + + /** + * Iterator - Returns the current element + */ + public function current() + { + return $this[$this->keys[$this->position]]; + } + + /** + * Iterator - Returns the key of the current element + */ + public function key() + { + return $this->keys[$this->position]; + } + + /** + * Iterator - Moves forward to next element + */ + public function next() + { + ++$this->position; + } + + /** + * Iterator - Rewinds the Iterator to the first element + * + * Entries are sorted by date (latest first) + */ + public function rewind() + { + $this->keys = array_keys($this->ids); + $this->position = 0; + } + + /** + * Iterator - Checks if current position is valid + */ + public function valid() + { + return isset($this->keys[$this->position]); + } + + /** + * Checks if the DB directory and file exist + * + * If no DB file is found, creates a dummy DB. + */ + private function check() + { + if (file_exists($this->datastore)) { + return; + } + + // Create a dummy database for example + $this->links = array(); + $link = array( + 'id' => 1, + 'title' => t('The personal, minimalist, super-fast, database free, bookmarking service'), + 'url' => 'https://shaarli.readthedocs.io', + 'description' => t( + 'Welcome to Shaarli! This is your first public bookmark. ' + . 'To edit or delete me, you must first login. + +To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page. + +You use the community supported version of the original Shaarli project, by Sebastien Sauvage.' + ), + 'private' => 0, + 'created' => new DateTime(), + 'tags' => 'opensource software' + ); + $link['shorturl'] = link_small_hash($link['created'], $link['id']); + $this->links[1] = $link; + + $link = array( + 'id' => 0, + 'title' => t('My secret stuff... - Pastebin.com'), + 'url' => 'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=', + 'description' => t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'), + 'private' => 1, + 'created' => new DateTime('1 minute ago'), + 'tags' => 'secretstuff', + ); + $link['shorturl'] = link_small_hash($link['created'], $link['id']); + $this->links[0] = $link; + + // Write database to disk + $this->write(); + } + + /** + * Reads database from disk to memory + */ + private function read() + { + // Public links are hidden and user not logged in => nothing to show + if ($this->hidePublicLinks && !$this->loggedIn) { + $this->links = array(); + return; + } + + $this->urls = []; + $this->ids = []; + $this->links = FileUtils::readFlatDB($this->datastore, []); + + $toremove = array(); + foreach ($this->links as $key => &$link) { + if (!$this->loggedIn && $link['private'] != 0) { + // Transition for not upgraded databases. + unset($this->links[$key]); + continue; + } + + // Sanitize data fields. + sanitizeLink($link); + + // Remove private tags if the user is not logged in. + if (!$this->loggedIn) { + $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']); + } + + // Do not use the redirector for internal links (Shaarli note URL starting with a '?'). + if (!empty($this->redirector) && !startsWith($link['url'], '?')) { + $link['real_url'] = $this->redirector; + if ($this->redirectorEncode) { + $link['real_url'] .= urlencode(unescape($link['url'])); + } else { + $link['real_url'] .= $link['url']; + } + } else { + $link['real_url'] = $link['url']; + } + + // To be able to load links before running the update, and prepare the update + if (!isset($link['created'])) { + $link['id'] = $link['linkdate']; + $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']); + if (!empty($link['updated'])) { + $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']); + } + $link['shorturl'] = smallHash($link['linkdate']); + } + + $this->urls[$link['url']] = $key; + $this->ids[$link['id']] = $key; + } + } + + /** + * Saves the database from memory to disk + * + * @throws IOException the datastore is not writable + */ + private function write() + { + $this->reorder(); + FileUtils::writeFlatDB($this->datastore, $this->links); + } + + /** + * Saves the database from memory to disk + * + * @param string $pageCacheDir page cache directory + */ + public function save($pageCacheDir) + { + if (!$this->loggedIn) { + // TODO: raise an Exception instead + die('You are not authorized to change the database.'); + } + + $this->write(); + + invalidateCaches($pageCacheDir); + } + + /** + * Returns the link for a given URL, or False if it does not exist. + * + * @param string $url URL to search for + * + * @return mixed the existing link if it exists, else 'false' + */ + public function getLinkFromUrl($url) + { + if (isset($this->urls[$url])) { + return $this->links[$this->urls[$url]]; + } + return false; + } + + /** + * Returns the shaare corresponding to a smallHash. + * + * @param string $request QUERY_STRING server parameter. + * + * @return array $filtered array containing permalink data. + * + * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link. + */ + public function filterHash($request) + { + $request = substr($request, 0, 6); + $linkFilter = new LinkFilter($this->links); + return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request); + } + + /** + * Returns the list of articles for a given day. + * + * @param string $request day to filter. Format: YYYYMMDD. + * + * @return array list of shaare found. + */ + public function filterDay($request) + { + $linkFilter = new LinkFilter($this->links); + return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request); + } + + /** + * Filter links according to search parameters. + * + * @param array $filterRequest Search request content. Supported keys: + * - searchtags: list of tags + * - searchterm: term search + * @param bool $casesensitive Optional: Perform case sensitive filter + * @param string $visibility return only all/private/public links + * @param string $untaggedonly return only untagged links + * + * @return array filtered links, all links if no suitable filter was provided. + */ + public function filterSearch( + $filterRequest = array(), + $casesensitive = false, + $visibility = 'all', + $untaggedonly = false + ) { + + // Filter link database according to parameters. + $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : ''; + $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : ''; + + // Search tags + fullsearch - blank string parameter will return all links. + $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext" + $request = [$searchtags, $searchterm]; + + $linkFilter = new LinkFilter($this); + return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly); + } + + /** + * Returns the list tags appearing in the links with the given tags + * + * @param array $filteringTags tags selecting the links to consider + * @param string $visibility process only all/private/public links + * + * @return array tag => linksCount + */ + public function linksCountPerTag($filteringTags = [], $visibility = 'all') + { + $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility); + $tags = []; + $caseMapping = []; + foreach ($links as $link) { + foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) { + if (empty($tag)) { + continue; + } + // The first case found will be displayed. + if (!isset($caseMapping[strtolower($tag)])) { + $caseMapping[strtolower($tag)] = $tag; + $tags[$caseMapping[strtolower($tag)]] = 0; + } + $tags[$caseMapping[strtolower($tag)]]++; + } + } + + /* + * Formerly used arsort(), which doesn't define the sort behaviour for equal values. + * Also, this function doesn't produce the same result between PHP 5.6 and 7. + * + * So we now use array_multisort() to sort tags by DESC occurrences, + * then ASC alphabetically for equal values. + * + * @see https://github.com/shaarli/Shaarli/issues/1142 + */ + $keys = array_keys($tags); + $tmpTags = array_combine($keys, $keys); + array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags); + return $tags; + } + + /** + * Rename or delete a tag across all links. + * + * @param string $from Tag to rename + * @param string $to New tag. If none is provided, the from tag will be deleted + * + * @return array|bool List of altered links or false on error + */ + public function renameTag($from, $to) + { + if (empty($from)) { + return false; + } + $delete = empty($to); + // True for case-sensitive tag search. + $linksToAlter = $this->filterSearch(['searchtags' => $from], true); + foreach ($linksToAlter as $key => &$value) { + $tags = preg_split('/\s+/', trim($value['tags'])); + if (($pos = array_search($from, $tags)) !== false) { + if ($delete) { + unset($tags[$pos]); // Remove tag. + } else { + $tags[$pos] = trim($to); + } + $value['tags'] = trim(implode(' ', array_unique($tags))); + $this[$value['id']] = $value; + } + } + + return $linksToAlter; + } + + /** + * Returns the list of days containing articles (oldest first) + * Output: An array containing days (in format YYYYMMDD). + */ + public function days() + { + $linkDays = array(); + foreach ($this->links as $link) { + $linkDays[$link['created']->format('Ymd')] = 0; + } + $linkDays = array_keys($linkDays); + sort($linkDays); + + return $linkDays; + } + + /** + * Reorder links by creation date (newest first). + * + * Also update the urls and ids mapping arrays. + * + * @param string $order ASC|DESC + */ + public function reorder($order = 'DESC') + { + $order = $order === 'ASC' ? -1 : 1; + // Reorder array by dates. + usort($this->links, function ($a, $b) use ($order) { + if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) { + return $a['sticky'] ? -1 : 1; + } + return $a['created'] < $b['created'] ? 1 * $order : -1 * $order; + }); + + $this->urls = []; + $this->ids = []; + foreach ($this->links as $key => $link) { + $this->urls[$link['url']] = $key; + $this->ids[$link['id']] = $key; + } + } + + /** + * Return the next key for link creation. + * E.g. If the last ID is 597, the next will be 598. + * + * @return int next ID. + */ + public function getNextId() + { + if (!empty($this->ids)) { + return max(array_keys($this->ids)) + 1; + } + return 0; + } + + /** + * Returns a link offset in links array from its unique ID. + * + * @param int $id Persistent ID of a link. + * + * @return int Real offset in local array, or null if doesn't exist. + */ + protected function getLinkOffset($id) + { + if (isset($this->ids[$id])) { + return $this->ids[$id]; + } + return null; + } +} -- cgit v1.2.3 From 6696729b88e67504fdd333cbaab43a63c3617d86 Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Mon, 3 Dec 2018 01:22:45 +0100 Subject: namespacing: \Shaarli\Bookmark\LinkFilter Signed-off-by: VirtualTam --- application/bookmark/LinkDB.php | 26 +- application/bookmark/LinkFilter.php | 449 +++++++++++++++++++++ .../bookmark/exception/LinkNotFoundException.php | 15 + 3 files changed, 477 insertions(+), 13 deletions(-) create mode 100644 application/bookmark/LinkFilter.php create mode 100644 application/bookmark/exception/LinkNotFoundException.php (limited to 'application/bookmark') diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php index 3b77422a..6041c088 100644 --- a/application/bookmark/LinkDB.php +++ b/application/bookmark/LinkDB.php @@ -6,8 +6,8 @@ use ArrayAccess; use Countable; use DateTime; use Iterator; -use LinkFilter; -use LinkNotFoundException; +use Shaarli\Bookmark\LinkFilter; +use Shaarli\Bookmark\Exception\LinkNotFoundException; use Shaarli\Exceptions\IOException; use Shaarli\FileUtils; @@ -107,10 +107,10 @@ class LinkDB implements Iterator, Countable, ArrayAccess * * Checks if the datastore exists; else, attempts to create a dummy one. * - * @param string $datastore datastore file path. - * @param boolean $isLoggedIn is the user logged in? - * @param boolean $hidePublicLinks if true all links are private. - * @param string $redirector link redirector set in user settings. + * @param string $datastore datastore file path. + * @param boolean $isLoggedIn is the user logged in? + * @param boolean $hidePublicLinks if true all links are private. + * @param string $redirector link redirector set in user settings. * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true). */ public function __construct( @@ -426,12 +426,12 @@ You use the community supported version of the original Shaarli project, by Seba /** * Filter links according to search parameters. * - * @param array $filterRequest Search request content. Supported keys: + * @param array $filterRequest Search request content. Supported keys: * - searchtags: list of tags * - searchterm: term search - * @param bool $casesensitive Optional: Perform case sensitive filter - * @param string $visibility return only all/private/public links - * @param string $untaggedonly return only untagged links + * @param bool $casesensitive Optional: Perform case sensitive filter + * @param string $visibility return only all/private/public links + * @param bool $untaggedonly return only untagged links * * @return array filtered links, all links if no suitable filter was provided. */ @@ -457,8 +457,8 @@ You use the community supported version of the original Shaarli project, by Seba /** * Returns the list tags appearing in the links with the given tags * - * @param array $filteringTags tags selecting the links to consider - * @param string $visibility process only all/private/public links + * @param array $filteringTags tags selecting the links to consider + * @param string $visibility process only all/private/public links * * @return array tag => linksCount */ @@ -500,7 +500,7 @@ You use the community supported version of the original Shaarli project, by Seba * Rename or delete a tag across all links. * * @param string $from Tag to rename - * @param string $to New tag. If none is provided, the from tag will be deleted + * @param string $to New tag. If none is provided, the from tag will be deleted * * @return array|bool List of altered links or false on error */ diff --git a/application/bookmark/LinkFilter.php b/application/bookmark/LinkFilter.php new file mode 100644 index 00000000..9b966307 --- /dev/null +++ b/application/bookmark/LinkFilter.php @@ -0,0 +1,449 @@ +links = $links; + } + + /** + * Filter links according to parameters. + * + * @param string $type Type of filter (eg. tags, permalink, etc.). + * @param mixed $request Filter content. + * @param bool $casesensitive Optional: Perform case sensitive filter if true. + * @param string $visibility Optional: return only all/private/public links + * @param string $untaggedonly Optional: return only untagged links. Applies only if $type includes FILTER_TAG + * + * @return array filtered link list. + */ + public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false) + { + if (!in_array($visibility, ['all', 'public', 'private'])) { + $visibility = 'all'; + } + + switch ($type) { + case self::$FILTER_HASH: + return $this->filterSmallHash($request); + case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext" + $noRequest = empty($request) || (empty($request[0]) && empty($request[1])); + if ($noRequest) { + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } + return $this->noFilter($visibility); + } + if ($untaggedonly) { + $filtered = $this->filterUntagged($visibility); + } else { + $filtered = $this->links; + } + if (!empty($request[0])) { + $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility); + } + if (!empty($request[1])) { + $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility); + } + return $filtered; + case self::$FILTER_TEXT: + return $this->filterFulltext($request, $visibility); + case self::$FILTER_TAG: + if ($untaggedonly) { + return $this->filterUntagged($visibility); + } else { + return $this->filterTags($request, $casesensitive, $visibility); + } + case self::$FILTER_DAY: + return $this->filterDay($request); + default: + return $this->noFilter($visibility); + } + } + + /** + * Unknown filter, but handle private only. + * + * @param string $visibility Optional: return only all/private/public links + * + * @return array filtered links. + */ + private function noFilter($visibility = 'all') + { + if ($visibility === 'all') { + return $this->links; + } + + $out = array(); + foreach ($this->links as $key => $value) { + if ($value['private'] && $visibility === 'private') { + $out[$key] = $value; + } elseif (!$value['private'] && $visibility === 'public') { + $out[$key] = $value; + } + } + + return $out; + } + + /** + * Returns the shaare corresponding to a smallHash. + * + * @param string $smallHash permalink hash. + * + * @return array $filtered array containing permalink data. + * + * @throws \Shaarli\Bookmark\Exception\LinkNotFoundException if the smallhash doesn't match any link. + */ + private function filterSmallHash($smallHash) + { + $filtered = array(); + foreach ($this->links as $key => $l) { + if ($smallHash == $l['shorturl']) { + // Yes, this is ugly and slow + $filtered[$key] = $l; + return $filtered; + } + } + + if (empty($filtered)) { + throw new LinkNotFoundException(); + } + + return $filtered; + } + + /** + * Returns the list of links corresponding to a full-text search + * + * Searches: + * - in the URLs, title and description; + * - are case-insensitive; + * - terms surrounded by quotes " are exact terms search. + * - terms starting with a dash - are excluded (except exact terms). + * + * Example: + * print_r($mydb->filterFulltext('hollandais')); + * + * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8') + * - allows to perform searches on Unicode text + * - see https://github.com/shaarli/Shaarli/issues/75 for examples + * + * @param string $searchterms search query. + * @param string $visibility Optional: return only all/private/public links. + * + * @return array search results. + */ + private function filterFulltext($searchterms, $visibility = 'all') + { + if (empty($searchterms)) { + return $this->noFilter($visibility); + } + + $filtered = array(); + $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8'); + $exactRegex = '/"([^"]+)"/'; + // Retrieve exact search terms. + preg_match_all($exactRegex, $search, $exactSearch); + $exactSearch = array_values(array_filter($exactSearch[1])); + + // Remove exact search terms to get AND terms search. + $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search))); + $explodedSearchAnd = array_values(array_filter($explodedSearchAnd)); + + // Filter excluding terms and update andSearch. + $excludeSearch = array(); + $andSearch = array(); + foreach ($explodedSearchAnd as $needle) { + if ($needle[0] == '-' && strlen($needle) > 1) { + $excludeSearch[] = substr($needle, 1); + } else { + $andSearch[] = $needle; + } + } + + $keys = array('title', 'description', 'url', 'tags'); + + // Iterate over every stored link. + foreach ($this->links as $id => $link) { + // ignore non private links when 'privatonly' is on. + if ($visibility !== 'all') { + if (!$link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + + // Concatenate link fields to search across fields. + // Adds a '\' separator for exact search terms. + $content = ''; + foreach ($keys as $key) { + $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\'; + } + + // Be optimistic + $found = true; + + // First, we look for exact term search + for ($i = 0; $i < count($exactSearch) && $found; $i++) { + $found = strpos($content, $exactSearch[$i]) !== false; + } + + // Iterate over keywords, if keyword is not found, + // no need to check for the others. We want all or nothing. + for ($i = 0; $i < count($andSearch) && $found; $i++) { + $found = strpos($content, $andSearch[$i]) !== false; + } + + // Exclude terms. + for ($i = 0; $i < count($excludeSearch) && $found; $i++) { + $found = strpos($content, $excludeSearch[$i]) === false; + } + + if ($found) { + $filtered[$id] = $link; + } + } + + return $filtered; + } + + /** + * generate a regex fragment out of a tag + * + * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard + * + * @return string generated regex fragment + */ + private static function tag2regex($tag) + { + $len = strlen($tag); + if (!$len || $tag === "-" || $tag === "*") { + // nothing to search, return empty regex + return ''; + } + if ($tag[0] === "-") { + // query is negated + $i = 1; // use offset to start after '-' character + $regex = '(?!'; // create negative lookahead + } else { + $i = 0; // start at first character + $regex = '(?='; // use positive lookahead + } + $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning + // iterate over string, separating it into placeholder and content + for (; $i < $len; $i++) { + if ($tag[$i] === '*') { + // placeholder found + $regex .= '[^ ]*?'; + } else { + // regular characters + $offset = strpos($tag, '*', $i); + if ($offset === false) { + // no placeholder found, set offset to end of string + $offset = $len; + } + // subtract one, as we want to get before the placeholder or end of string + $offset -= 1; + // we got a tag name that we want to search for. escape any regex characters to prevent conflicts. + $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/'); + // move $i on + $i = $offset; + } + } + $regex .= '(?:$| ))'; // after the tag may only be a space or the end + return $regex; + } + + /** + * Returns the list of links associated with a given list of tags + * + * You can specify one or more tags, separated by space or a comma, e.g. + * print_r($mydb->filterTags('linux programming')); + * + * @param string $tags list of tags separated by commas or blank spaces. + * @param bool $casesensitive ignore case if false. + * @param string $visibility Optional: return only all/private/public links. + * + * @return array filtered links. + */ + public function filterTags($tags, $casesensitive = false, $visibility = 'all') + { + // get single tags (we may get passed an array, even though the docs say different) + $inputTags = $tags; + if (!is_array($tags)) { + // we got an input string, split tags + $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY); + } + + if (!count($inputTags)) { + // no input tags + return $this->noFilter($visibility); + } + + // build regex from all tags + $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/'; + if (!$casesensitive) { + // make regex case insensitive + $re .= 'i'; + } + + // create resulting array + $filtered = array(); + + // iterate over each link + foreach ($this->links as $key => $link) { + // check level of visibility + // ignore non private links when 'privateonly' is on. + if ($visibility !== 'all') { + if (!$link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + $search = $link['tags']; // build search string, start with tags of current link + if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) { + // description given and at least one possible tag found + $descTags = array(); + // find all tags in the form of #tag in the description + preg_match_all( + '/(?links as $key => $link) { + if ($visibility !== 'all') { + if (!$link['private'] && $visibility === 'private') { + continue; + } elseif ($link['private'] && $visibility === 'public') { + continue; + } + } + + if (empty(trim($link['tags']))) { + $filtered[$key] = $link; + } + } + + return $filtered; + } + + /** + * Returns the list of articles for a given day, chronologically sorted + * + * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g. + * print_r($mydb->filterDay('20120125')); + * + * @param string $day day to filter. + * + * @return array all link matching given day. + * + * @throws Exception if date format is invalid. + */ + public function filterDay($day) + { + if (!checkDateFormat('Ymd', $day)) { + throw new Exception('Invalid date format'); + } + + $filtered = array(); + foreach ($this->links as $key => $l) { + if ($l['created']->format('Ymd') == $day) { + $filtered[$key] = $l; + } + } + + // sort by date ASC + return array_reverse($filtered, true); + } + + /** + * Convert a list of tags (str) to an array. Also + * - handle case sensitivity. + * - accepts spaces commas as separator. + * + * @param string $tags string containing a list of tags. + * @param bool $casesensitive will convert everything to lowercase if false. + * + * @return array filtered tags string. + */ + public static function tagsStrToArray($tags, $casesensitive) + { + // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek) + $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8'); + $tagsOut = str_replace(',', ' ', $tagsOut); + + return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY); + } +} diff --git a/application/bookmark/exception/LinkNotFoundException.php b/application/bookmark/exception/LinkNotFoundException.php new file mode 100644 index 00000000..f9414428 --- /dev/null +++ b/application/bookmark/exception/LinkNotFoundException.php @@ -0,0 +1,15 @@ +message = t('The link you are trying to reach does not exist or has been deleted.'); + } +} -- cgit v1.2.3 From fe3713d2e5c91e2d07af72b39f321521d3dd470c Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Mon, 3 Dec 2018 01:35:14 +0100 Subject: namespacing: move LinkUtils along \Shaarli\Bookmark classes Signed-off-by: VirtualTam --- application/bookmark/LinkUtils.php | 222 +++++++++++++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 application/bookmark/LinkUtils.php (limited to 'application/bookmark') diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php new file mode 100644 index 00000000..de5b61cb --- /dev/null +++ b/application/bookmark/LinkUtils.php @@ -0,0 +1,222 @@ +(.*?)!is', $html, $matches)) { + return trim(str_replace("\n", '', $matches[1])); + } + return false; +} + +/** + * Extract charset from HTTP header if it's defined. + * + * @param string $header HTTP header Content-Type line. + * + * @return bool|string Charset string if found (lowercase), false otherwise. + */ +function header_extract_charset($header) +{ + preg_match('/charset="?([^; ]+)/i', $header, $match); + if (! empty($match[1])) { + return strtolower(trim($match[1])); + } + + return false; +} + +/** + * Extract charset HTML content (tag ). + * + * @param string $html HTML content where to look for charset. + * + * @return bool|string Charset string if found, false otherwise. + */ +function html_extract_charset($html) +{ + // Get encoding specified in HTML header. + preg_match('#/]+)["\']? */?>#Usi', $html, $enc); + if (!empty($enc[1])) { + return strtolower($enc[1]); + } + + return false; +} + +/** + * Count private links in given linklist. + * + * @param array|Countable $links Linklist. + * + * @return int Number of private links. + */ +function count_private($links) +{ + $cpt = 0; + foreach ($links as $link) { + if ($link['private']) { + $cpt += 1; + } + } + + return $cpt; +} + +/** + * In a string, converts URLs to clickable links. + * + * @param string $text input string. + * @param string $redirector if a redirector is set, use it to gerenate links. + * @param bool $urlEncode Use `urlencode()` on the URL after the redirector or not. + * + * @return string returns $text with all links converted to HTML links. + * + * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722 + */ +function text2clickable($text, $redirector = '', $urlEncode = true) +{ + $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si'; + + if (empty($redirector)) { + return preg_replace($regex, '$1', $text); + } + // Redirector is set, urlencode the final URL. + return preg_replace_callback( + $regex, + function ($matches) use ($redirector, $urlEncode) { + $url = $urlEncode ? urlencode($matches[1]) : $matches[1]; + return ''. $matches[1] .''; + }, + $text + ); +} + +/** + * Auto-link hashtags. + * + * @param string $description Given description. + * @param string $indexUrl Root URL. + * + * @return string Description with auto-linked hashtags. + */ +function hashtag_autolink($description, $indexUrl = '') +{ + /* + * To support unicode: http://stackoverflow.com/a/35498078/1484919 + * \p{Pc} - to match underscore + * \p{N} - numeric character in any script + * \p{L} - letter from any language + * \p{Mn} - any non marking space (accents, umlauts, etc) + */ + $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui'; + $replacement = '$1#$2'; + return preg_replace($regex, $replacement, $description); +} + +/** + * This function inserts   where relevant so that multiple spaces are properly displayed in HTML + * even in the absence of
  (This is used in description to keep text formatting).
+ *
+ * @param string $text input text.
+ *
+ * @return string formatted text.
+ */
+function space2nbsp($text)
+{
+    return preg_replace('/(^| ) /m', '$1 ', $text);
+}
+
+/**
+ * Format Shaarli's description
+ *
+ * @param string $description shaare's description.
+ * @param string $redirector  if a redirector is set, use it to gerenate links.
+ * @param bool   $urlEncode   Use `urlencode()` on the URL after the redirector or not.
+ * @param string $indexUrl    URL to Shaarli's index.
+
+ * @return string formatted description.
+ */
+function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '')
+{
+    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
+}
+
+/**
+ * Generate a small hash for a link.
+ *
+ * @param DateTime $date Link creation date.
+ * @param int      $id   Link ID.
+ *
+ * @return string the small hash generated from link data.
+ */
+function link_small_hash($date, $id)
+{
+    return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
+}
-- 
cgit v1.2.3


From dea72c711ff740b3b829d238fcf85648465143a0 Mon Sep 17 00:00:00 2001
From: VirtualTam 
Date: Sat, 12 Jan 2019 23:55:38 +0100
Subject: Optimize and cleanup imports

Signed-off-by: VirtualTam 
---
 application/bookmark/LinkDB.php | 1 -
 1 file changed, 1 deletion(-)

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
index 6041c088..c13a1141 100644
--- a/application/bookmark/LinkDB.php
+++ b/application/bookmark/LinkDB.php
@@ -6,7 +6,6 @@ use ArrayAccess;
 use Countable;
 use DateTime;
 use Iterator;
-use Shaarli\Bookmark\LinkFilter;
 use Shaarli\Bookmark\Exception\LinkNotFoundException;
 use Shaarli\Exceptions\IOException;
 use Shaarli\FileUtils;
-- 
cgit v1.2.3


From 520d29578c57e476ece3bdd20c286d196b7b61b4 Mon Sep 17 00:00:00 2001
From: ArthurHoaro 
Date: Sat, 9 Feb 2019 13:52:12 +0100
Subject: Remove the redirector setting

Fixes #1239
---
 application/bookmark/LinkDB.php    | 41 ++++++--------------------------------
 application/bookmark/LinkUtils.php | 24 ++++------------------
 2 files changed, 10 insertions(+), 55 deletions(-)

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
index c13a1141..266632e3 100644
--- a/application/bookmark/LinkDB.php
+++ b/application/bookmark/LinkDB.php
@@ -29,10 +29,10 @@ use Shaarli\FileUtils;
  *  - private:  Is this link private? 0=no, other value=yes
  *  - tags:     tags attached to this entry (separated by spaces)
  *  - title     Title of the link
- *  - url       URL of the link. Used for displayable links (no redirector, relative, etc.).
- *              Can be absolute or relative.
- *              Relative URLs are permalinks (e.g.'?m-ukcw')
- *  - real_url  Absolute processed URL.
+ *  - url       URL of the link. Used for displayable links.
+ *              Can be absolute or relative in the database but the relative links
+ *              will be converted to absolute ones in templates.
+ *  - real_url  Raw URL in stored in the DB (absolute or relative).
  *  - shorturl  Permalink smallhash
  *
  * Implements 3 interfaces:
@@ -88,19 +88,6 @@ class LinkDB implements Iterator, Countable, ArrayAccess
     // Hide public links
     private $hidePublicLinks;
 
-    // link redirector set in user settings.
-    private $redirector;
-
-    /**
-     * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
-     *
-     * Example:
-     *   anonym.to needs clean URL while dereferer.org needs urlencoded URL.
-     *
-     * @var boolean $redirectorEncode parameter: true or false
-     */
-    private $redirectorEncode;
-
     /**
      * Creates a new LinkDB
      *
@@ -109,22 +96,16 @@ class LinkDB implements Iterator, Countable, ArrayAccess
      * @param string  $datastore        datastore file path.
      * @param boolean $isLoggedIn       is the user logged in?
      * @param boolean $hidePublicLinks  if true all links are private.
-     * @param string  $redirector       link redirector set in user settings.
-     * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
      */
     public function __construct(
         $datastore,
         $isLoggedIn,
-        $hidePublicLinks,
-        $redirector = '',
-        $redirectorEncode = true
+        $hidePublicLinks
     ) {
     
         $this->datastore = $datastore;
         $this->loggedIn = $isLoggedIn;
         $this->hidePublicLinks = $hidePublicLinks;
-        $this->redirector = $redirector;
-        $this->redirectorEncode = $redirectorEncode === true;
         $this->check();
         $this->read();
     }
@@ -323,17 +304,7 @@ You use the community supported version of the original Shaarli project, by Seba
                 $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
             }
 
-            // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
-            if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
-                $link['real_url'] = $this->redirector;
-                if ($this->redirectorEncode) {
-                    $link['real_url'] .= urlencode(unescape($link['url']));
-                } else {
-                    $link['real_url'] .= $link['url'];
-                }
-            } else {
-                $link['real_url'] = $link['url'];
-            }
+            $link['real_url'] = $link['url'];
 
             // To be able to load links before running the update, and prepare the update
             if (!isset($link['created'])) {
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
index de5b61cb..988970bd 100644
--- a/application/bookmark/LinkUtils.php
+++ b/application/bookmark/LinkUtils.php
@@ -133,29 +133,15 @@ function count_private($links)
  * In a string, converts URLs to clickable links.
  *
  * @param string $text       input string.
- * @param string $redirector if a redirector is set, use it to gerenate links.
- * @param bool   $urlEncode  Use `urlencode()` on the URL after the redirector or not.
  *
  * @return string returns $text with all links converted to HTML links.
  *
  * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
  */
-function text2clickable($text, $redirector = '', $urlEncode = true)
+function text2clickable($text)
 {
     $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
-
-    if (empty($redirector)) {
-        return preg_replace($regex, '$1', $text);
-    }
-    // Redirector is set, urlencode the final URL.
-    return preg_replace_callback(
-        $regex,
-        function ($matches) use ($redirector, $urlEncode) {
-            $url = $urlEncode ? urlencode($matches[1]) : $matches[1];
-            return ''. $matches[1] .'';
-        },
-        $text
-    );
+    return preg_replace($regex, '$1', $text);
 }
 
 /**
@@ -197,15 +183,13 @@ function space2nbsp($text)
  * Format Shaarli's description
  *
  * @param string $description shaare's description.
- * @param string $redirector  if a redirector is set, use it to gerenate links.
- * @param bool   $urlEncode   Use `urlencode()` on the URL after the redirector or not.
  * @param string $indexUrl    URL to Shaarli's index.
 
  * @return string formatted description.
  */
-function format_description($description, $redirector = '', $urlEncode = true, $indexUrl = '')
+function format_description($description, $indexUrl = '')
 {
-    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description, $redirector, $urlEncode), $indexUrl)));
+    return nl2br(space2nbsp(hashtag_autolink(text2clickable($description), $indexUrl)));
 }
 
 /**
-- 
cgit v1.2.3


From b790f900c937d0d8f6eccc15d2b4c26023f3d276 Mon Sep 17 00:00:00 2001
From: ArthurHoaro 
Date: Sat, 9 Feb 2019 14:04:16 +0100
Subject: Fix a warning if links sticky status isn't set

  - initiate its status to false when the link is created
  - if not defined, initiate its status to false (can happen if the updater hasn't run)
---
 application/bookmark/LinkDB.php | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
index c13a1141..41d5591f 100644
--- a/application/bookmark/LinkDB.php
+++ b/application/bookmark/LinkDB.php
@@ -271,7 +271,8 @@ You use the community supported version of the original Shaarli project, by Seba
             ),
             'private' => 0,
             'created' => new DateTime(),
-            'tags' => 'opensource software'
+            'tags' => 'opensource software',
+            'sticky' => false,
         );
         $link['shorturl'] = link_small_hash($link['created'], $link['id']);
         $this->links[1] = $link;
@@ -284,6 +285,7 @@ You use the community supported version of the original Shaarli project, by Seba
             'private' => 1,
             'created' => new DateTime('1 minute ago'),
             'tags' => 'secretstuff',
+            'sticky' => false,
         );
         $link['shorturl'] = link_small_hash($link['created'], $link['id']);
         $this->links[0] = $link;
@@ -335,6 +337,8 @@ You use the community supported version of the original Shaarli project, by Seba
                 $link['real_url'] = $link['url'];
             }
 
+            $link['sticky'] = isset($link['sticky']) ? $link['sticky'] : false;
+
             // To be able to load links before running the update, and prepare the update
             if (!isset($link['created'])) {
                 $link['id'] = $link['linkdate'];
-- 
cgit v1.2.3


From a8e7da01146455f13ef06b151a7dafedd3acf769 Mon Sep 17 00:00:00 2001
From: ArthurHoaro 
Date: Sat, 9 Feb 2019 14:13:08 +0100
Subject: Do not try to retrieve thumbnails for internal link

Also adds a helper function to determine if a link is a note and apply it across multiple files.
---
 application/bookmark/LinkUtils.php | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
index de5b61cb..9e9d4f0a 100644
--- a/application/bookmark/LinkUtils.php
+++ b/application/bookmark/LinkUtils.php
@@ -220,3 +220,16 @@ function link_small_hash($date, $id)
 {
     return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
 }
+
+/**
+ * Returns whether or not the link is an internal note.
+ * Its URL starts by `?` because it's actually a permalink.
+ *
+ * @param string $linkUrl
+ *
+ * @return bool true if internal note, false otherwise.
+ */
+function is_note($linkUrl)
+{
+    return isset($linkUrl[0]) && $linkUrl[0] === '?';
+}
-- 
cgit v1.2.3


From 6a4872520cbbc012b5a8358cd50c78844afe8d07 Mon Sep 17 00:00:00 2001
From: ArthurHoaro 
Date: Sat, 8 Jun 2019 13:59:19 +0200
Subject: Automatically retrieve description for new bookmarks

If the option is enabled, it will try to find a meta tag containing
the page description and keywords, just like we do for the page title.
It will either look for regular meta tag or OpenGraph ones.

The option is disabled by default.

Note that keywords meta tags is mostly not used.

In `configure` template, the variable associated with this setting
is `$retrieve_description`.

Fixes #1302
---
 application/bookmark/LinkUtils.php | 85 ++++++++++++++++++++++++++++++++++++--
 1 file changed, 81 insertions(+), 4 deletions(-)

(limited to 'application/bookmark')

diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
index 35a5b290..77eb2d95 100644
--- a/application/bookmark/LinkUtils.php
+++ b/application/bookmark/LinkUtils.php
@@ -7,13 +7,25 @@ use Shaarli\Bookmark\LinkDB;
  *
  * @param string $charset     to extract from the downloaded page (reference)
  * @param string $title       to extract from the downloaded page (reference)
+ * @param string $description to extract from the downloaded page (reference)
+ * @param string $keywords    to extract from the downloaded page (reference)
+ * @param bool   $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
  * @param string $curlGetInfo Optionally overrides curl_getinfo function
  *
  * @return Closure
  */
-function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_getinfo')
-{
+function get_curl_download_callback(
+    &$charset,
+    &$title,
+    &$description,
+    &$keywords,
+    $retrieveDescription,
+    $curlGetInfo = 'curl_getinfo'
+) {
     $isRedirected = false;
+    $currentChunk = 0;
+    $foundChunk = null;
+
     /**
      * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
      *
@@ -25,7 +37,18 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
      *
      * @return int|bool length of $data or false if we need to stop the download
      */
-    return function (&$ch, $data) use ($curlGetInfo, &$charset, &$title, &$isRedirected) {
+    return function (&$ch, $data) use (
+        $retrieveDescription,
+        $curlGetInfo,
+        &$charset,
+        &$title,
+        &$description,
+        &$keywords,
+        &$isRedirected,
+        &$currentChunk,
+        &$foundChunk
+    ) {
+        $currentChunk++;
         $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
         if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
             $isRedirected = true;
@@ -50,9 +73,34 @@ function get_curl_download_callback(&$charset, &$title, $curlGetInfo = 'curl_get
         }
         if (empty($title)) {
             $title = html_extract_title($data);
+            $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
+        }
+        if ($retrieveDescription && empty($description)) {
+            $description = html_extract_tag('description', $data);
+            $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
         }
+        if ($retrieveDescription && empty($keywords)) {
+            $keywords = html_extract_tag('keywords', $data);
+            if (! empty($keywords)) {
+                $foundChunk = $currentChunk;
+                // Keywords use the format tag1, tag2 multiple words, tag
+                // So we format them to match Shaarli's separator and glue multiple words with '-'
+                $keywords = implode(' ', array_map(function($keyword) {
+                    return implode('-', preg_split('/\s+/', trim($keyword)));
+                }, explode(',', $keywords)));
+            }
+        }
+
         // We got everything we want, stop the download.
-        if (!empty($responseCode) && !empty($contentType) && !empty($charset) && !empty($title)) {
+        // If we already found either the title, description or keywords,
+        // it's highly unlikely that we'll found the other metas further than
+        // in the same chunk of data or the next one. So we also stop the download after that.
+        if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
+            && (! $retrieveDescription
+                || $foundChunk < $currentChunk
+                || (!empty($title) && !empty($description) && !empty($keywords))
+            )
+        ) {
             return false;
         }
 
@@ -110,6 +158,35 @@ function html_extract_charset($html)
     return false;
 }
 
+/**
+ * Extract meta tag from HTML content in either:
+ *   - OpenGraph: 
+ *   - Meta tag: 
+ *
+ * @param string $tag  Name of the tag to retrieve.
+ * @param string $html HTML content where to look for charset.
+ *
+ * @return bool|string Charset string if found, false otherwise.
+ */
+function html_extract_tag($tag, $html)
+{
+    $propertiesKey = ['property', 'name', 'itemprop'];
+    $properties = implode('|', $propertiesKey);
+    // Try to retrieve OpenGraph image.
+    $ogRegex = '#]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#';
+    // If the attributes are not in the order property => content (e.g. Github)
+    // New regex to keep this readable... more or less.
+    $ogRegexReverse = '#]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#';
+
+    if (preg_match($ogRegex, $html, $matches) > 0
+        || preg_match($ogRegexReverse, $html, $matches) > 0
+    ) {
+        return $matches[1];
+    }
+
+    return false;
+}
+
 /**
  * Count private links in given linklist.
  *
-- 
cgit v1.2.3