aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/bookmark
diff options
context:
space:
mode:
Diffstat (limited to 'application/bookmark')
-rw-r--r--application/bookmark/LinkDB.php575
-rw-r--r--application/bookmark/LinkFilter.php449
-rw-r--r--application/bookmark/LinkUtils.php296
-rw-r--r--application/bookmark/exception/LinkNotFoundException.php15
4 files changed, 1335 insertions, 0 deletions
diff --git a/application/bookmark/LinkDB.php b/application/bookmark/LinkDB.php
new file mode 100644
index 00000000..76ba95f0
--- /dev/null
+++ b/application/bookmark/LinkDB.php
@@ -0,0 +1,575 @@
1<?php
2
3namespace Shaarli\Bookmark;
4
5use ArrayAccess;
6use Countable;
7use DateTime;
8use Iterator;
9use Shaarli\Bookmark\Exception\LinkNotFoundException;
10use Shaarli\Exceptions\IOException;
11use Shaarli\FileUtils;
12
13/**
14 * Data storage for links.
15 *
16 * This object behaves like an associative array.
17 *
18 * Example:
19 * $myLinks = new LinkDB();
20 * echo $myLinks[350]['title'];
21 * foreach ($myLinks as $link)
22 * echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
23 *
24 * Available keys:
25 * - id: primary key, incremental integer identifier (persistent)
26 * - description: description of the entry
27 * - created: creation date of this entry, DateTime object.
28 * - updated: last modification date of this entry, DateTime object.
29 * - private: Is this link private? 0=no, other value=yes
30 * - tags: tags attached to this entry (separated by spaces)
31 * - title Title of the link
32 * - url URL of the link. Used for displayable links.
33 * Can be absolute or relative in the database but the relative links
34 * will be converted to absolute ones in templates.
35 * - real_url Raw URL in stored in the DB (absolute or relative).
36 * - shorturl Permalink smallhash
37 *
38 * Implements 3 interfaces:
39 * - ArrayAccess: behaves like an associative array;
40 * - Countable: there is a count() method;
41 * - Iterator: usable in foreach () loops.
42 *
43 * ID mechanism:
44 * ArrayAccess is implemented in a way that will allow to access a link
45 * with the unique identifier ID directly with $link[ID].
46 * Note that it's not the real key of the link array attribute.
47 * This mechanism is in place to have persistent link IDs,
48 * even though the internal array is reordered by date.
49 * Example:
50 * - DB: link #1 (2010-01-01) link #2 (2016-01-01)
51 * - Order: #2 #1
52 * - Import links containing: link #3 (2013-01-01)
53 * - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
54 * - Real order: #2 #3 #1
55 */
56class LinkDB implements Iterator, Countable, ArrayAccess
57{
58 // Links are stored as a PHP serialized string
59 private $datastore;
60
61 // Link date storage format
62 const LINK_DATE_FORMAT = 'Ymd_His';
63
64 // List of links (associative array)
65 // - key: link date (e.g. "20110823_124546"),
66 // - value: associative array (keys: title, description...)
67 private $links;
68
69 // List of all recorded URLs (key=url, value=link offset)
70 // for fast reserve search (url-->link offset)
71 private $urls;
72
73 /**
74 * @var array List of all links IDS mapped with their array offset.
75 * Map: id->offset.
76 */
77 protected $ids;
78
79 // List of offset keys (for the Iterator interface implementation)
80 private $keys;
81
82 // Position in the $this->keys array (for the Iterator interface)
83 private $position;
84
85 // Is the user logged in? (used to filter private links)
86 private $loggedIn;
87
88 // Hide public links
89 private $hidePublicLinks;
90
91 /**
92 * Creates a new LinkDB
93 *
94 * Checks if the datastore exists; else, attempts to create a dummy one.
95 *
96 * @param string $datastore datastore file path.
97 * @param boolean $isLoggedIn is the user logged in?
98 * @param boolean $hidePublicLinks if true all links are private.
99 */
100 public function __construct(
101 $datastore,
102 $isLoggedIn,
103 $hidePublicLinks
104 ) {
105
106 $this->datastore = $datastore;
107 $this->loggedIn = $isLoggedIn;
108 $this->hidePublicLinks = $hidePublicLinks;
109 $this->check();
110 $this->read();
111 }
112
113 /**
114 * Countable - Counts elements of an object
115 */
116 public function count()
117 {
118 return count($this->links);
119 }
120
121 /**
122 * ArrayAccess - Assigns a value to the specified offset
123 */
124 public function offsetSet($offset, $value)
125 {
126 // TODO: use exceptions instead of "die"
127 if (!$this->loggedIn) {
128 die(t('You are not authorized to add a link.'));
129 }
130 if (!isset($value['id']) || empty($value['url'])) {
131 die(t('Internal Error: A link should always have an id and URL.'));
132 }
133 if (($offset !== null && !is_int($offset)) || !is_int($value['id'])) {
134 die(t('You must specify an integer as a key.'));
135 }
136 if ($offset !== null && $offset !== $value['id']) {
137 die(t('Array offset and link ID must be equal.'));
138 }
139
140 // If the link exists, we reuse the real offset, otherwise new entry
141 $existing = $this->getLinkOffset($offset);
142 if ($existing !== null) {
143 $offset = $existing;
144 } else {
145 $offset = count($this->links);
146 }
147 $this->links[$offset] = $value;
148 $this->urls[$value['url']] = $offset;
149 $this->ids[$value['id']] = $offset;
150 }
151
152 /**
153 * ArrayAccess - Whether or not an offset exists
154 */
155 public function offsetExists($offset)
156 {
157 return array_key_exists($this->getLinkOffset($offset), $this->links);
158 }
159
160 /**
161 * ArrayAccess - Unsets an offset
162 */
163 public function offsetUnset($offset)
164 {
165 if (!$this->loggedIn) {
166 // TODO: raise an exception
167 die('You are not authorized to delete a link.');
168 }
169 $realOffset = $this->getLinkOffset($offset);
170 $url = $this->links[$realOffset]['url'];
171 unset($this->urls[$url]);
172 unset($this->ids[$realOffset]);
173 unset($this->links[$realOffset]);
174 }
175
176 /**
177 * ArrayAccess - Returns the value at specified offset
178 */
179 public function offsetGet($offset)
180 {
181 $realOffset = $this->getLinkOffset($offset);
182 return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
183 }
184
185 /**
186 * Iterator - Returns the current element
187 */
188 public function current()
189 {
190 return $this[$this->keys[$this->position]];
191 }
192
193 /**
194 * Iterator - Returns the key of the current element
195 */
196 public function key()
197 {
198 return $this->keys[$this->position];
199 }
200
201 /**
202 * Iterator - Moves forward to next element
203 */
204 public function next()
205 {
206 ++$this->position;
207 }
208
209 /**
210 * Iterator - Rewinds the Iterator to the first element
211 *
212 * Entries are sorted by date (latest first)
213 */
214 public function rewind()
215 {
216 $this->keys = array_keys($this->ids);
217 $this->position = 0;
218 }
219
220 /**
221 * Iterator - Checks if current position is valid
222 */
223 public function valid()
224 {
225 return isset($this->keys[$this->position]);
226 }
227
228 /**
229 * Checks if the DB directory and file exist
230 *
231 * If no DB file is found, creates a dummy DB.
232 */
233 private function check()
234 {
235 if (file_exists($this->datastore)) {
236 return;
237 }
238
239 // Create a dummy database for example
240 $this->links = array();
241 $link = array(
242 'id' => 1,
243 'title' => t('The personal, minimalist, super-fast, database free, bookmarking service'),
244 'url' => 'https://shaarli.readthedocs.io',
245 'description' => t(
246 'Welcome to Shaarli! This is your first public bookmark. '
247 . 'To edit or delete me, you must first login.
248
249To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page.
250
251You use the community supported version of the original Shaarli project, by Sebastien Sauvage.'
252 ),
253 'private' => 0,
254 'created' => new DateTime(),
255 'tags' => 'opensource software',
256 'sticky' => false,
257 );
258 $link['shorturl'] = link_small_hash($link['created'], $link['id']);
259 $this->links[1] = $link;
260
261 $link = array(
262 'id' => 0,
263 'title' => t('My secret stuff... - Pastebin.com'),
264 'url' => 'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
265 'description' => t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'),
266 'private' => 1,
267 'created' => new DateTime('1 minute ago'),
268 'tags' => 'secretstuff',
269 'sticky' => false,
270 );
271 $link['shorturl'] = link_small_hash($link['created'], $link['id']);
272 $this->links[0] = $link;
273
274 // Write database to disk
275 $this->write();
276 }
277
278 /**
279 * Reads database from disk to memory
280 */
281 private function read()
282 {
283 // Public links are hidden and user not logged in => nothing to show
284 if ($this->hidePublicLinks && !$this->loggedIn) {
285 $this->links = array();
286 return;
287 }
288
289 $this->urls = [];
290 $this->ids = [];
291 $this->links = FileUtils::readFlatDB($this->datastore, []);
292
293 $toremove = array();
294 foreach ($this->links as $key => &$link) {
295 if (!$this->loggedIn && $link['private'] != 0) {
296 // Transition for not upgraded databases.
297 unset($this->links[$key]);
298 continue;
299 }
300
301 // Sanitize data fields.
302 sanitizeLink($link);
303
304 // Remove private tags if the user is not logged in.
305 if (!$this->loggedIn) {
306 $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
307 }
308
309 $link['real_url'] = $link['url'];
310
311 $link['sticky'] = isset($link['sticky']) ? $link['sticky'] : false;
312
313 // To be able to load links before running the update, and prepare the update
314 if (!isset($link['created'])) {
315 $link['id'] = $link['linkdate'];
316 $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
317 if (!empty($link['updated'])) {
318 $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
319 }
320 $link['shorturl'] = smallHash($link['linkdate']);
321 }
322
323 $this->urls[$link['url']] = $key;
324 $this->ids[$link['id']] = $key;
325 }
326 }
327
328 /**
329 * Saves the database from memory to disk
330 *
331 * @throws IOException the datastore is not writable
332 */
333 private function write()
334 {
335 $this->reorder();
336 FileUtils::writeFlatDB($this->datastore, $this->links);
337 }
338
339 /**
340 * Saves the database from memory to disk
341 *
342 * @param string $pageCacheDir page cache directory
343 */
344 public function save($pageCacheDir)
345 {
346 if (!$this->loggedIn) {
347 // TODO: raise an Exception instead
348 die('You are not authorized to change the database.');
349 }
350
351 $this->write();
352
353 invalidateCaches($pageCacheDir);
354 }
355
356 /**
357 * Returns the link for a given URL, or False if it does not exist.
358 *
359 * @param string $url URL to search for
360 *
361 * @return mixed the existing link if it exists, else 'false'
362 */
363 public function getLinkFromUrl($url)
364 {
365 if (isset($this->urls[$url])) {
366 return $this->links[$this->urls[$url]];
367 }
368 return false;
369 }
370
371 /**
372 * Returns the shaare corresponding to a smallHash.
373 *
374 * @param string $request QUERY_STRING server parameter.
375 *
376 * @return array $filtered array containing permalink data.
377 *
378 * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
379 */
380 public function filterHash($request)
381 {
382 $request = substr($request, 0, 6);
383 $linkFilter = new LinkFilter($this->links);
384 return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
385 }
386
387 /**
388 * Returns the list of articles for a given day.
389 *
390 * @param string $request day to filter. Format: YYYYMMDD.
391 *
392 * @return array list of shaare found.
393 */
394 public function filterDay($request)
395 {
396 $linkFilter = new LinkFilter($this->links);
397 return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
398 }
399
400 /**
401 * Filter links according to search parameters.
402 *
403 * @param array $filterRequest Search request content. Supported keys:
404 * - searchtags: list of tags
405 * - searchterm: term search
406 * @param bool $casesensitive Optional: Perform case sensitive filter
407 * @param string $visibility return only all/private/public links
408 * @param bool $untaggedonly return only untagged links
409 *
410 * @return array filtered links, all links if no suitable filter was provided.
411 */
412 public function filterSearch(
413 $filterRequest = array(),
414 $casesensitive = false,
415 $visibility = 'all',
416 $untaggedonly = false
417 ) {
418
419 // Filter link database according to parameters.
420 $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
421 $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
422
423 // Search tags + fullsearch - blank string parameter will return all links.
424 $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext"
425 $request = [$searchtags, $searchterm];
426
427 $linkFilter = new LinkFilter($this);
428 return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly);
429 }
430
431 /**
432 * Returns the list tags appearing in the links with the given tags
433 *
434 * @param array $filteringTags tags selecting the links to consider
435 * @param string $visibility process only all/private/public links
436 *
437 * @return array tag => linksCount
438 */
439 public function linksCountPerTag($filteringTags = [], $visibility = 'all')
440 {
441 $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility);
442 $tags = [];
443 $caseMapping = [];
444 foreach ($links as $link) {
445 foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
446 if (empty($tag)) {
447 continue;
448 }
449 // The first case found will be displayed.
450 if (!isset($caseMapping[strtolower($tag)])) {
451 $caseMapping[strtolower($tag)] = $tag;
452 $tags[$caseMapping[strtolower($tag)]] = 0;
453 }
454 $tags[$caseMapping[strtolower($tag)]]++;
455 }
456 }
457
458 /*
459 * Formerly used arsort(), which doesn't define the sort behaviour for equal values.
460 * Also, this function doesn't produce the same result between PHP 5.6 and 7.
461 *
462 * So we now use array_multisort() to sort tags by DESC occurrences,
463 * then ASC alphabetically for equal values.
464 *
465 * @see https://github.com/shaarli/Shaarli/issues/1142
466 */
467 $keys = array_keys($tags);
468 $tmpTags = array_combine($keys, $keys);
469 array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags);
470 return $tags;
471 }
472
473 /**
474 * Rename or delete a tag across all links.
475 *
476 * @param string $from Tag to rename
477 * @param string $to New tag. If none is provided, the from tag will be deleted
478 *
479 * @return array|bool List of altered links or false on error
480 */
481 public function renameTag($from, $to)
482 {
483 if (empty($from)) {
484 return false;
485 }
486 $delete = empty($to);
487 // True for case-sensitive tag search.
488 $linksToAlter = $this->filterSearch(['searchtags' => $from], true);
489 foreach ($linksToAlter as $key => &$value) {
490 $tags = preg_split('/\s+/', trim($value['tags']));
491 if (($pos = array_search($from, $tags)) !== false) {
492 if ($delete) {
493 unset($tags[$pos]); // Remove tag.
494 } else {
495 $tags[$pos] = trim($to);
496 }
497 $value['tags'] = trim(implode(' ', array_unique($tags)));
498 $this[$value['id']] = $value;
499 }
500 }
501
502 return $linksToAlter;
503 }
504
505 /**
506 * Returns the list of days containing articles (oldest first)
507 * Output: An array containing days (in format YYYYMMDD).
508 */
509 public function days()
510 {
511 $linkDays = array();
512 foreach ($this->links as $link) {
513 $linkDays[$link['created']->format('Ymd')] = 0;
514 }
515 $linkDays = array_keys($linkDays);
516 sort($linkDays);
517
518 return $linkDays;
519 }
520
521 /**
522 * Reorder links by creation date (newest first).
523 *
524 * Also update the urls and ids mapping arrays.
525 *
526 * @param string $order ASC|DESC
527 */
528 public function reorder($order = 'DESC')
529 {
530 $order = $order === 'ASC' ? -1 : 1;
531 // Reorder array by dates.
532 usort($this->links, function ($a, $b) use ($order) {
533 if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) {
534 return $a['sticky'] ? -1 : 1;
535 }
536 return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
537 });
538
539 $this->urls = [];
540 $this->ids = [];
541 foreach ($this->links as $key => $link) {
542 $this->urls[$link['url']] = $key;
543 $this->ids[$link['id']] = $key;
544 }
545 }
546
547 /**
548 * Return the next key for link creation.
549 * E.g. If the last ID is 597, the next will be 598.
550 *
551 * @return int next ID.
552 */
553 public function getNextId()
554 {
555 if (!empty($this->ids)) {
556 return max(array_keys($this->ids)) + 1;
557 }
558 return 0;
559 }
560
561 /**
562 * Returns a link offset in links array from its unique ID.
563 *
564 * @param int $id Persistent ID of a link.
565 *
566 * @return int Real offset in local array, or null if doesn't exist.
567 */
568 protected function getLinkOffset($id)
569 {
570 if (isset($this->ids[$id])) {
571 return $this->ids[$id];
572 }
573 return null;
574 }
575}
diff --git a/application/bookmark/LinkFilter.php b/application/bookmark/LinkFilter.php
new file mode 100644
index 00000000..9b966307
--- /dev/null
+++ b/application/bookmark/LinkFilter.php
@@ -0,0 +1,449 @@
1<?php
2
3namespace Shaarli\Bookmark;
4
5use Exception;
6use Shaarli\Bookmark\Exception\LinkNotFoundException;
7
8/**
9 * Class LinkFilter.
10 *
11 * Perform search and filter operation on link data list.
12 */
13class LinkFilter
14{
15 /**
16 * @var string permalinks.
17 */
18 public static $FILTER_HASH = 'permalink';
19
20 /**
21 * @var string text search.
22 */
23 public static $FILTER_TEXT = 'fulltext';
24
25 /**
26 * @var string tag filter.
27 */
28 public static $FILTER_TAG = 'tags';
29
30 /**
31 * @var string filter by day.
32 */
33 public static $FILTER_DAY = 'FILTER_DAY';
34
35 /**
36 * @var string Allowed characters for hashtags (regex syntax).
37 */
38 public static $HASHTAG_CHARS = '\p{Pc}\p{N}\p{L}\p{Mn}';
39
40 /**
41 * @var LinkDB all available links.
42 */
43 private $links;
44
45 /**
46 * @param LinkDB $links initialization.
47 */
48 public function __construct($links)
49 {
50 $this->links = $links;
51 }
52
53 /**
54 * Filter links according to parameters.
55 *
56 * @param string $type Type of filter (eg. tags, permalink, etc.).
57 * @param mixed $request Filter content.
58 * @param bool $casesensitive Optional: Perform case sensitive filter if true.
59 * @param string $visibility Optional: return only all/private/public links
60 * @param string $untaggedonly Optional: return only untagged links. Applies only if $type includes FILTER_TAG
61 *
62 * @return array filtered link list.
63 */
64 public function filter($type, $request, $casesensitive = false, $visibility = 'all', $untaggedonly = false)
65 {
66 if (!in_array($visibility, ['all', 'public', 'private'])) {
67 $visibility = 'all';
68 }
69
70 switch ($type) {
71 case self::$FILTER_HASH:
72 return $this->filterSmallHash($request);
73 case self::$FILTER_TAG | self::$FILTER_TEXT: // == "vuotext"
74 $noRequest = empty($request) || (empty($request[0]) && empty($request[1]));
75 if ($noRequest) {
76 if ($untaggedonly) {
77 return $this->filterUntagged($visibility);
78 }
79 return $this->noFilter($visibility);
80 }
81 if ($untaggedonly) {
82 $filtered = $this->filterUntagged($visibility);
83 } else {
84 $filtered = $this->links;
85 }
86 if (!empty($request[0])) {
87 $filtered = (new LinkFilter($filtered))->filterTags($request[0], $casesensitive, $visibility);
88 }
89 if (!empty($request[1])) {
90 $filtered = (new LinkFilter($filtered))->filterFulltext($request[1], $visibility);
91 }
92 return $filtered;
93 case self::$FILTER_TEXT:
94 return $this->filterFulltext($request, $visibility);
95 case self::$FILTER_TAG:
96 if ($untaggedonly) {
97 return $this->filterUntagged($visibility);
98 } else {
99 return $this->filterTags($request, $casesensitive, $visibility);
100 }
101 case self::$FILTER_DAY:
102 return $this->filterDay($request);
103 default:
104 return $this->noFilter($visibility);
105 }
106 }
107
108 /**
109 * Unknown filter, but handle private only.
110 *
111 * @param string $visibility Optional: return only all/private/public links
112 *
113 * @return array filtered links.
114 */
115 private function noFilter($visibility = 'all')
116 {
117 if ($visibility === 'all') {
118 return $this->links;
119 }
120
121 $out = array();
122 foreach ($this->links as $key => $value) {
123 if ($value['private'] && $visibility === 'private') {
124 $out[$key] = $value;
125 } elseif (!$value['private'] && $visibility === 'public') {
126 $out[$key] = $value;
127 }
128 }
129
130 return $out;
131 }
132
133 /**
134 * Returns the shaare corresponding to a smallHash.
135 *
136 * @param string $smallHash permalink hash.
137 *
138 * @return array $filtered array containing permalink data.
139 *
140 * @throws \Shaarli\Bookmark\Exception\LinkNotFoundException if the smallhash doesn't match any link.
141 */
142 private function filterSmallHash($smallHash)
143 {
144 $filtered = array();
145 foreach ($this->links as $key => $l) {
146 if ($smallHash == $l['shorturl']) {
147 // Yes, this is ugly and slow
148 $filtered[$key] = $l;
149 return $filtered;
150 }
151 }
152
153 if (empty($filtered)) {
154 throw new LinkNotFoundException();
155 }
156
157 return $filtered;
158 }
159
160 /**
161 * Returns the list of links corresponding to a full-text search
162 *
163 * Searches:
164 * - in the URLs, title and description;
165 * - are case-insensitive;
166 * - terms surrounded by quotes " are exact terms search.
167 * - terms starting with a dash - are excluded (except exact terms).
168 *
169 * Example:
170 * print_r($mydb->filterFulltext('hollandais'));
171 *
172 * mb_convert_case($val, MB_CASE_LOWER, 'UTF-8')
173 * - allows to perform searches on Unicode text
174 * - see https://github.com/shaarli/Shaarli/issues/75 for examples
175 *
176 * @param string $searchterms search query.
177 * @param string $visibility Optional: return only all/private/public links.
178 *
179 * @return array search results.
180 */
181 private function filterFulltext($searchterms, $visibility = 'all')
182 {
183 if (empty($searchterms)) {
184 return $this->noFilter($visibility);
185 }
186
187 $filtered = array();
188 $search = mb_convert_case(html_entity_decode($searchterms), MB_CASE_LOWER, 'UTF-8');
189 $exactRegex = '/"([^"]+)"/';
190 // Retrieve exact search terms.
191 preg_match_all($exactRegex, $search, $exactSearch);
192 $exactSearch = array_values(array_filter($exactSearch[1]));
193
194 // Remove exact search terms to get AND terms search.
195 $explodedSearchAnd = explode(' ', trim(preg_replace($exactRegex, '', $search)));
196 $explodedSearchAnd = array_values(array_filter($explodedSearchAnd));
197
198 // Filter excluding terms and update andSearch.
199 $excludeSearch = array();
200 $andSearch = array();
201 foreach ($explodedSearchAnd as $needle) {
202 if ($needle[0] == '-' && strlen($needle) > 1) {
203 $excludeSearch[] = substr($needle, 1);
204 } else {
205 $andSearch[] = $needle;
206 }
207 }
208
209 $keys = array('title', 'description', 'url', 'tags');
210
211 // Iterate over every stored link.
212 foreach ($this->links as $id => $link) {
213 // ignore non private links when 'privatonly' is on.
214 if ($visibility !== 'all') {
215 if (!$link['private'] && $visibility === 'private') {
216 continue;
217 } elseif ($link['private'] && $visibility === 'public') {
218 continue;
219 }
220 }
221
222 // Concatenate link fields to search across fields.
223 // Adds a '\' separator for exact search terms.
224 $content = '';
225 foreach ($keys as $key) {
226 $content .= mb_convert_case($link[$key], MB_CASE_LOWER, 'UTF-8') . '\\';
227 }
228
229 // Be optimistic
230 $found = true;
231
232 // First, we look for exact term search
233 for ($i = 0; $i < count($exactSearch) && $found; $i++) {
234 $found = strpos($content, $exactSearch[$i]) !== false;
235 }
236
237 // Iterate over keywords, if keyword is not found,
238 // no need to check for the others. We want all or nothing.
239 for ($i = 0; $i < count($andSearch) && $found; $i++) {
240 $found = strpos($content, $andSearch[$i]) !== false;
241 }
242
243 // Exclude terms.
244 for ($i = 0; $i < count($excludeSearch) && $found; $i++) {
245 $found = strpos($content, $excludeSearch[$i]) === false;
246 }
247
248 if ($found) {
249 $filtered[$id] = $link;
250 }
251 }
252
253 return $filtered;
254 }
255
256 /**
257 * generate a regex fragment out of a tag
258 *
259 * @param string $tag to to generate regexs from. may start with '-' to negate, contain '*' as wildcard
260 *
261 * @return string generated regex fragment
262 */
263 private static function tag2regex($tag)
264 {
265 $len = strlen($tag);
266 if (!$len || $tag === "-" || $tag === "*") {
267 // nothing to search, return empty regex
268 return '';
269 }
270 if ($tag[0] === "-") {
271 // query is negated
272 $i = 1; // use offset to start after '-' character
273 $regex = '(?!'; // create negative lookahead
274 } else {
275 $i = 0; // start at first character
276 $regex = '(?='; // use positive lookahead
277 }
278 $regex .= '.*(?:^| )'; // before tag may only be a space or the beginning
279 // iterate over string, separating it into placeholder and content
280 for (; $i < $len; $i++) {
281 if ($tag[$i] === '*') {
282 // placeholder found
283 $regex .= '[^ ]*?';
284 } else {
285 // regular characters
286 $offset = strpos($tag, '*', $i);
287 if ($offset === false) {
288 // no placeholder found, set offset to end of string
289 $offset = $len;
290 }
291 // subtract one, as we want to get before the placeholder or end of string
292 $offset -= 1;
293 // we got a tag name that we want to search for. escape any regex characters to prevent conflicts.
294 $regex .= preg_quote(substr($tag, $i, $offset - $i + 1), '/');
295 // move $i on
296 $i = $offset;
297 }
298 }
299 $regex .= '(?:$| ))'; // after the tag may only be a space or the end
300 return $regex;
301 }
302
303 /**
304 * Returns the list of links associated with a given list of tags
305 *
306 * You can specify one or more tags, separated by space or a comma, e.g.
307 * print_r($mydb->filterTags('linux programming'));
308 *
309 * @param string $tags list of tags separated by commas or blank spaces.
310 * @param bool $casesensitive ignore case if false.
311 * @param string $visibility Optional: return only all/private/public links.
312 *
313 * @return array filtered links.
314 */
315 public function filterTags($tags, $casesensitive = false, $visibility = 'all')
316 {
317 // get single tags (we may get passed an array, even though the docs say different)
318 $inputTags = $tags;
319 if (!is_array($tags)) {
320 // we got an input string, split tags
321 $inputTags = preg_split('/(?:\s+)|,/', $inputTags, -1, PREG_SPLIT_NO_EMPTY);
322 }
323
324 if (!count($inputTags)) {
325 // no input tags
326 return $this->noFilter($visibility);
327 }
328
329 // build regex from all tags
330 $re = '/^' . implode(array_map("self::tag2regex", $inputTags)) . '.*$/';
331 if (!$casesensitive) {
332 // make regex case insensitive
333 $re .= 'i';
334 }
335
336 // create resulting array
337 $filtered = array();
338
339 // iterate over each link
340 foreach ($this->links as $key => $link) {
341 // check level of visibility
342 // ignore non private links when 'privateonly' is on.
343 if ($visibility !== 'all') {
344 if (!$link['private'] && $visibility === 'private') {
345 continue;
346 } elseif ($link['private'] && $visibility === 'public') {
347 continue;
348 }
349 }
350 $search = $link['tags']; // build search string, start with tags of current link
351 if (strlen(trim($link['description'])) && strpos($link['description'], '#') !== false) {
352 // description given and at least one possible tag found
353 $descTags = array();
354 // find all tags in the form of #tag in the description
355 preg_match_all(
356 '/(?<![' . self::$HASHTAG_CHARS . '])#([' . self::$HASHTAG_CHARS . ']+?)\b/sm',
357 $link['description'],
358 $descTags
359 );
360 if (count($descTags[1])) {
361 // there were some tags in the description, add them to the search string
362 $search .= ' ' . implode(' ', $descTags[1]);
363 }
364 };
365 // match regular expression with search string
366 if (!preg_match($re, $search)) {
367 // this entry does _not_ match our regex
368 continue;
369 }
370 $filtered[$key] = $link;
371 }
372 return $filtered;
373 }
374
375 /**
376 * Return only links without any tag.
377 *
378 * @param string $visibility return only all/private/public links.
379 *
380 * @return array filtered links.
381 */
382 public function filterUntagged($visibility)
383 {
384 $filtered = [];
385 foreach ($this->links as $key => $link) {
386 if ($visibility !== 'all') {
387 if (!$link['private'] && $visibility === 'private') {
388 continue;
389 } elseif ($link['private'] && $visibility === 'public') {
390 continue;
391 }
392 }
393
394 if (empty(trim($link['tags']))) {
395 $filtered[$key] = $link;
396 }
397 }
398
399 return $filtered;
400 }
401
402 /**
403 * Returns the list of articles for a given day, chronologically sorted
404 *
405 * Day must be in the form 'YYYYMMDD' (e.g. '20120125'), e.g.
406 * print_r($mydb->filterDay('20120125'));
407 *
408 * @param string $day day to filter.
409 *
410 * @return array all link matching given day.
411 *
412 * @throws Exception if date format is invalid.
413 */
414 public function filterDay($day)
415 {
416 if (!checkDateFormat('Ymd', $day)) {
417 throw new Exception('Invalid date format');
418 }
419
420 $filtered = array();
421 foreach ($this->links as $key => $l) {
422 if ($l['created']->format('Ymd') == $day) {
423 $filtered[$key] = $l;
424 }
425 }
426
427 // sort by date ASC
428 return array_reverse($filtered, true);
429 }
430
431 /**
432 * Convert a list of tags (str) to an array. Also
433 * - handle case sensitivity.
434 * - accepts spaces commas as separator.
435 *
436 * @param string $tags string containing a list of tags.
437 * @param bool $casesensitive will convert everything to lowercase if false.
438 *
439 * @return array filtered tags string.
440 */
441 public static function tagsStrToArray($tags, $casesensitive)
442 {
443 // We use UTF-8 conversion to handle various graphemes (i.e. cyrillic, or greek)
444 $tagsOut = $casesensitive ? $tags : mb_convert_case($tags, MB_CASE_LOWER, 'UTF-8');
445 $tagsOut = str_replace(',', ' ', $tagsOut);
446
447 return preg_split('/\s+/', $tagsOut, -1, PREG_SPLIT_NO_EMPTY);
448 }
449}
diff --git a/application/bookmark/LinkUtils.php b/application/bookmark/LinkUtils.php
new file mode 100644
index 00000000..77eb2d95
--- /dev/null
+++ b/application/bookmark/LinkUtils.php
@@ -0,0 +1,296 @@
1<?php
2
3use Shaarli\Bookmark\LinkDB;
4
5/**
6 * Get cURL callback function for CURLOPT_WRITEFUNCTION
7 *
8 * @param string $charset to extract from the downloaded page (reference)
9 * @param string $title to extract from the downloaded page (reference)
10 * @param string $description to extract from the downloaded page (reference)
11 * @param string $keywords to extract from the downloaded page (reference)
12 * @param bool $retrieveDescription Automatically tries to retrieve description and keywords from HTML content
13 * @param string $curlGetInfo Optionally overrides curl_getinfo function
14 *
15 * @return Closure
16 */
17function get_curl_download_callback(
18 &$charset,
19 &$title,
20 &$description,
21 &$keywords,
22 $retrieveDescription,
23 $curlGetInfo = 'curl_getinfo'
24) {
25 $isRedirected = false;
26 $currentChunk = 0;
27 $foundChunk = null;
28
29 /**
30 * cURL callback function for CURLOPT_WRITEFUNCTION (called during the download).
31 *
32 * While downloading the remote page, we check that the HTTP code is 200 and content type is 'html/text'
33 * Then we extract the title and the charset and stop the download when it's done.
34 *
35 * @param resource $ch cURL resource
36 * @param string $data chunk of data being downloaded
37 *
38 * @return int|bool length of $data or false if we need to stop the download
39 */
40 return function (&$ch, $data) use (
41 $retrieveDescription,
42 $curlGetInfo,
43 &$charset,
44 &$title,
45 &$description,
46 &$keywords,
47 &$isRedirected,
48 &$currentChunk,
49 &$foundChunk
50 ) {
51 $currentChunk++;
52 $responseCode = $curlGetInfo($ch, CURLINFO_RESPONSE_CODE);
53 if (!empty($responseCode) && in_array($responseCode, [301, 302])) {
54 $isRedirected = true;
55 return strlen($data);
56 }
57 if (!empty($responseCode) && $responseCode !== 200) {
58 return false;
59 }
60 // After a redirection, the content type will keep the previous request value
61 // until it finds the next content-type header.
62 if (! $isRedirected || strpos(strtolower($data), 'content-type') !== false) {
63 $contentType = $curlGetInfo($ch, CURLINFO_CONTENT_TYPE);
64 }
65 if (!empty($contentType) && strpos($contentType, 'text/html') === false) {
66 return false;
67 }
68 if (!empty($contentType) && empty($charset)) {
69 $charset = header_extract_charset($contentType);
70 }
71 if (empty($charset)) {
72 $charset = html_extract_charset($data);
73 }
74 if (empty($title)) {
75 $title = html_extract_title($data);
76 $foundChunk = ! empty($title) ? $currentChunk : $foundChunk;
77 }
78 if ($retrieveDescription && empty($description)) {
79 $description = html_extract_tag('description', $data);
80 $foundChunk = ! empty($description) ? $currentChunk : $foundChunk;
81 }
82 if ($retrieveDescription && empty($keywords)) {
83 $keywords = html_extract_tag('keywords', $data);
84 if (! empty($keywords)) {
85 $foundChunk = $currentChunk;
86 // Keywords use the format tag1, tag2 multiple words, tag
87 // So we format them to match Shaarli's separator and glue multiple words with '-'
88 $keywords = implode(' ', array_map(function($keyword) {
89 return implode('-', preg_split('/\s+/', trim($keyword)));
90 }, explode(',', $keywords)));
91 }
92 }
93
94 // We got everything we want, stop the download.
95 // If we already found either the title, description or keywords,
96 // it's highly unlikely that we'll found the other metas further than
97 // in the same chunk of data or the next one. So we also stop the download after that.
98 if ((!empty($responseCode) && !empty($contentType) && !empty($charset)) && $foundChunk !== null
99 && (! $retrieveDescription
100 || $foundChunk < $currentChunk
101 || (!empty($title) && !empty($description) && !empty($keywords))
102 )
103 ) {
104 return false;
105 }
106
107 return strlen($data);
108 };
109}
110
111/**
112 * Extract title from an HTML document.
113 *
114 * @param string $html HTML content where to look for a title.
115 *
116 * @return bool|string Extracted title if found, false otherwise.
117 */
118function html_extract_title($html)
119{
120 if (preg_match('!<title.*?>(.*?)</title>!is', $html, $matches)) {
121 return trim(str_replace("\n", '', $matches[1]));
122 }
123 return false;
124}
125
126/**
127 * Extract charset from HTTP header if it's defined.
128 *
129 * @param string $header HTTP header Content-Type line.
130 *
131 * @return bool|string Charset string if found (lowercase), false otherwise.
132 */
133function header_extract_charset($header)
134{
135 preg_match('/charset="?([^; ]+)/i', $header, $match);
136 if (! empty($match[1])) {
137 return strtolower(trim($match[1]));
138 }
139
140 return false;
141}
142
143/**
144 * Extract charset HTML content (tag <meta charset>).
145 *
146 * @param string $html HTML content where to look for charset.
147 *
148 * @return bool|string Charset string if found, false otherwise.
149 */
150function html_extract_charset($html)
151{
152 // Get encoding specified in HTML header.
153 preg_match('#<meta .*charset=["\']?([^";\'>/]+)["\']? */?>#Usi', $html, $enc);
154 if (!empty($enc[1])) {
155 return strtolower($enc[1]);
156 }
157
158 return false;
159}
160
161/**
162 * Extract meta tag from HTML content in either:
163 * - OpenGraph: <meta property="og:[tag]" ...>
164 * - Meta tag: <meta name="[tag]" ...>
165 *
166 * @param string $tag Name of the tag to retrieve.
167 * @param string $html HTML content where to look for charset.
168 *
169 * @return bool|string Charset string if found, false otherwise.
170 */
171function html_extract_tag($tag, $html)
172{
173 $propertiesKey = ['property', 'name', 'itemprop'];
174 $properties = implode('|', $propertiesKey);
175 // Try to retrieve OpenGraph image.
176 $ogRegex = '#<meta[^>]+(?:'. $properties .')=["\']?(?:og:)?'. $tag .'["\'\s][^>]*content=["\']?(.*?)["\'/>]#';
177 // If the attributes are not in the order property => content (e.g. Github)
178 // New regex to keep this readable... more or less.
179 $ogRegexReverse = '#<meta[^>]+content=["\']([^"\']+)[^>]+(?:'. $properties .')=["\']?(?:og)?:'. $tag .'["\'\s/>]#';
180
181 if (preg_match($ogRegex, $html, $matches) > 0
182 || preg_match($ogRegexReverse, $html, $matches) > 0
183 ) {
184 return $matches[1];
185 }
186
187 return false;
188}
189
190/**
191 * Count private links in given linklist.
192 *
193 * @param array|Countable $links Linklist.
194 *
195 * @return int Number of private links.
196 */
197function count_private($links)
198{
199 $cpt = 0;
200 foreach ($links as $link) {
201 if ($link['private']) {
202 $cpt += 1;
203 }
204 }
205
206 return $cpt;
207}
208
209/**
210 * In a string, converts URLs to clickable links.
211 *
212 * @param string $text input string.
213 *
214 * @return string returns $text with all links converted to HTML links.
215 *
216 * @see Function inspired from http://www.php.net/manual/en/function.preg-replace.php#85722
217 */
218function text2clickable($text)
219{
220 $regex = '!(((?:https?|ftp|file)://|apt:|magnet:)\S+[a-z0-9\(\)]/?)!si';
221 return preg_replace($regex, '<a href="$1">$1</a>', $text);
222}
223
224/**
225 * Auto-link hashtags.
226 *
227 * @param string $description Given description.
228 * @param string $indexUrl Root URL.
229 *
230 * @return string Description with auto-linked hashtags.
231 */
232function hashtag_autolink($description, $indexUrl = '')
233{
234 /*
235 * To support unicode: http://stackoverflow.com/a/35498078/1484919
236 * \p{Pc} - to match underscore
237 * \p{N} - numeric character in any script
238 * \p{L} - letter from any language
239 * \p{Mn} - any non marking space (accents, umlauts, etc)
240 */
241 $regex = '/(^|\s)#([\p{Pc}\p{N}\p{L}\p{Mn}]+)/mui';
242 $replacement = '$1<a href="'. $indexUrl .'?addtag=$2" title="Hashtag $2">#$2</a>';
243 return preg_replace($regex, $replacement, $description);
244}
245
246/**
247 * This function inserts &nbsp; where relevant so that multiple spaces are properly displayed in HTML
248 * even in the absence of <pre> (This is used in description to keep text formatting).
249 *
250 * @param string $text input text.
251 *
252 * @return string formatted text.
253 */
254function space2nbsp($text)
255{
256 return preg_replace('/(^| ) /m', '$1&nbsp;', $text);
257}
258
259/**
260 * Format Shaarli's description
261 *
262 * @param string $description shaare's description.
263 * @param string $indexUrl URL to Shaarli's index.
264
265 * @return string formatted description.
266 */
267function format_description($description, $indexUrl = '')
268{
269 return nl2br(space2nbsp(hashtag_autolink(text2clickable($description), $indexUrl)));
270}
271
272/**
273 * Generate a small hash for a link.
274 *
275 * @param DateTime $date Link creation date.
276 * @param int $id Link ID.
277 *
278 * @return string the small hash generated from link data.
279 */
280function link_small_hash($date, $id)
281{
282 return smallHash($date->format(LinkDB::LINK_DATE_FORMAT) . $id);
283}
284
285/**
286 * Returns whether or not the link is an internal note.
287 * Its URL starts by `?` because it's actually a permalink.
288 *
289 * @param string $linkUrl
290 *
291 * @return bool true if internal note, false otherwise.
292 */
293function is_note($linkUrl)
294{
295 return isset($linkUrl[0]) && $linkUrl[0] === '?';
296}
diff --git a/application/bookmark/exception/LinkNotFoundException.php b/application/bookmark/exception/LinkNotFoundException.php
new file mode 100644
index 00000000..f9414428
--- /dev/null
+++ b/application/bookmark/exception/LinkNotFoundException.php
@@ -0,0 +1,15 @@
1<?php
2namespace Shaarli\Bookmark\Exception;
3
4use Exception;
5
6class LinkNotFoundException extends Exception
7{
8 /**
9 * LinkNotFoundException constructor.
10 */
11 public function __construct()
12 {
13 $this->message = t('The link you are trying to reach does not exist or has been deleted.');
14 }
15}