]> git.immae.eu Git - github/shaarli/Shaarli.git/blob - application/bookmark/LinkDB.php
namespacing: \Shaarli\FileUtils
[github/shaarli/Shaarli.git] / application / bookmark / LinkDB.php
1 <?php
2
3 namespace Shaarli\Bookmark;
4
5 use ArrayAccess;
6 use Countable;
7 use DateTime;
8 use Iterator;
9 use LinkFilter;
10 use LinkNotFoundException;
11 use Shaarli\Exceptions\IOException;
12 use Shaarli\FileUtils;
13
14 /**
15 * Data storage for links.
16 *
17 * This object behaves like an associative array.
18 *
19 * Example:
20 * $myLinks = new LinkDB();
21 * echo $myLinks[350]['title'];
22 * foreach ($myLinks as $link)
23 * echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
24 *
25 * Available keys:
26 * - id: primary key, incremental integer identifier (persistent)
27 * - description: description of the entry
28 * - created: creation date of this entry, DateTime object.
29 * - updated: last modification date of this entry, DateTime object.
30 * - private: Is this link private? 0=no, other value=yes
31 * - tags: tags attached to this entry (separated by spaces)
32 * - title Title of the link
33 * - url URL of the link. Used for displayable links (no redirector, relative, etc.).
34 * Can be absolute or relative.
35 * Relative URLs are permalinks (e.g.'?m-ukcw')
36 * - real_url Absolute processed URL.
37 * - shorturl Permalink smallhash
38 *
39 * Implements 3 interfaces:
40 * - ArrayAccess: behaves like an associative array;
41 * - Countable: there is a count() method;
42 * - Iterator: usable in foreach () loops.
43 *
44 * ID mechanism:
45 * ArrayAccess is implemented in a way that will allow to access a link
46 * with the unique identifier ID directly with $link[ID].
47 * Note that it's not the real key of the link array attribute.
48 * This mechanism is in place to have persistent link IDs,
49 * even though the internal array is reordered by date.
50 * Example:
51 * - DB: link #1 (2010-01-01) link #2 (2016-01-01)
52 * - Order: #2 #1
53 * - Import links containing: link #3 (2013-01-01)
54 * - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
55 * - Real order: #2 #3 #1
56 */
57 class LinkDB implements Iterator, Countable, ArrayAccess
58 {
59 // Links are stored as a PHP serialized string
60 private $datastore;
61
62 // Link date storage format
63 const LINK_DATE_FORMAT = 'Ymd_His';
64
65 // List of links (associative array)
66 // - key: link date (e.g. "20110823_124546"),
67 // - value: associative array (keys: title, description...)
68 private $links;
69
70 // List of all recorded URLs (key=url, value=link offset)
71 // for fast reserve search (url-->link offset)
72 private $urls;
73
74 /**
75 * @var array List of all links IDS mapped with their array offset.
76 * Map: id->offset.
77 */
78 protected $ids;
79
80 // List of offset keys (for the Iterator interface implementation)
81 private $keys;
82
83 // Position in the $this->keys array (for the Iterator interface)
84 private $position;
85
86 // Is the user logged in? (used to filter private links)
87 private $loggedIn;
88
89 // Hide public links
90 private $hidePublicLinks;
91
92 // link redirector set in user settings.
93 private $redirector;
94
95 /**
96 * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
97 *
98 * Example:
99 * anonym.to needs clean URL while dereferer.org needs urlencoded URL.
100 *
101 * @var boolean $redirectorEncode parameter: true or false
102 */
103 private $redirectorEncode;
104
105 /**
106 * Creates a new LinkDB
107 *
108 * Checks if the datastore exists; else, attempts to create a dummy one.
109 *
110 * @param string $datastore datastore file path.
111 * @param boolean $isLoggedIn is the user logged in?
112 * @param boolean $hidePublicLinks if true all links are private.
113 * @param string $redirector link redirector set in user settings.
114 * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
115 */
116 public function __construct(
117 $datastore,
118 $isLoggedIn,
119 $hidePublicLinks,
120 $redirector = '',
121 $redirectorEncode = true
122 ) {
123
124 $this->datastore = $datastore;
125 $this->loggedIn = $isLoggedIn;
126 $this->hidePublicLinks = $hidePublicLinks;
127 $this->redirector = $redirector;
128 $this->redirectorEncode = $redirectorEncode === true;
129 $this->check();
130 $this->read();
131 }
132
133 /**
134 * Countable - Counts elements of an object
135 */
136 public function count()
137 {
138 return count($this->links);
139 }
140
141 /**
142 * ArrayAccess - Assigns a value to the specified offset
143 */
144 public function offsetSet($offset, $value)
145 {
146 // TODO: use exceptions instead of "die"
147 if (!$this->loggedIn) {
148 die(t('You are not authorized to add a link.'));
149 }
150 if (!isset($value['id']) || empty($value['url'])) {
151 die(t('Internal Error: A link should always have an id and URL.'));
152 }
153 if (($offset !== null && !is_int($offset)) || !is_int($value['id'])) {
154 die(t('You must specify an integer as a key.'));
155 }
156 if ($offset !== null && $offset !== $value['id']) {
157 die(t('Array offset and link ID must be equal.'));
158 }
159
160 // If the link exists, we reuse the real offset, otherwise new entry
161 $existing = $this->getLinkOffset($offset);
162 if ($existing !== null) {
163 $offset = $existing;
164 } else {
165 $offset = count($this->links);
166 }
167 $this->links[$offset] = $value;
168 $this->urls[$value['url']] = $offset;
169 $this->ids[$value['id']] = $offset;
170 }
171
172 /**
173 * ArrayAccess - Whether or not an offset exists
174 */
175 public function offsetExists($offset)
176 {
177 return array_key_exists($this->getLinkOffset($offset), $this->links);
178 }
179
180 /**
181 * ArrayAccess - Unsets an offset
182 */
183 public function offsetUnset($offset)
184 {
185 if (!$this->loggedIn) {
186 // TODO: raise an exception
187 die('You are not authorized to delete a link.');
188 }
189 $realOffset = $this->getLinkOffset($offset);
190 $url = $this->links[$realOffset]['url'];
191 unset($this->urls[$url]);
192 unset($this->ids[$realOffset]);
193 unset($this->links[$realOffset]);
194 }
195
196 /**
197 * ArrayAccess - Returns the value at specified offset
198 */
199 public function offsetGet($offset)
200 {
201 $realOffset = $this->getLinkOffset($offset);
202 return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
203 }
204
205 /**
206 * Iterator - Returns the current element
207 */
208 public function current()
209 {
210 return $this[$this->keys[$this->position]];
211 }
212
213 /**
214 * Iterator - Returns the key of the current element
215 */
216 public function key()
217 {
218 return $this->keys[$this->position];
219 }
220
221 /**
222 * Iterator - Moves forward to next element
223 */
224 public function next()
225 {
226 ++$this->position;
227 }
228
229 /**
230 * Iterator - Rewinds the Iterator to the first element
231 *
232 * Entries are sorted by date (latest first)
233 */
234 public function rewind()
235 {
236 $this->keys = array_keys($this->ids);
237 $this->position = 0;
238 }
239
240 /**
241 * Iterator - Checks if current position is valid
242 */
243 public function valid()
244 {
245 return isset($this->keys[$this->position]);
246 }
247
248 /**
249 * Checks if the DB directory and file exist
250 *
251 * If no DB file is found, creates a dummy DB.
252 */
253 private function check()
254 {
255 if (file_exists($this->datastore)) {
256 return;
257 }
258
259 // Create a dummy database for example
260 $this->links = array();
261 $link = array(
262 'id' => 1,
263 'title' => t('The personal, minimalist, super-fast, database free, bookmarking service'),
264 'url' => 'https://shaarli.readthedocs.io',
265 'description' => t(
266 'Welcome to Shaarli! This is your first public bookmark. '
267 . 'To edit or delete me, you must first login.
268
269 To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page.
270
271 You use the community supported version of the original Shaarli project, by Sebastien Sauvage.'
272 ),
273 'private' => 0,
274 'created' => new DateTime(),
275 'tags' => 'opensource software'
276 );
277 $link['shorturl'] = link_small_hash($link['created'], $link['id']);
278 $this->links[1] = $link;
279
280 $link = array(
281 'id' => 0,
282 'title' => t('My secret stuff... - Pastebin.com'),
283 'url' => 'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
284 'description' => t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'),
285 'private' => 1,
286 'created' => new DateTime('1 minute ago'),
287 'tags' => 'secretstuff',
288 );
289 $link['shorturl'] = link_small_hash($link['created'], $link['id']);
290 $this->links[0] = $link;
291
292 // Write database to disk
293 $this->write();
294 }
295
296 /**
297 * Reads database from disk to memory
298 */
299 private function read()
300 {
301 // Public links are hidden and user not logged in => nothing to show
302 if ($this->hidePublicLinks && !$this->loggedIn) {
303 $this->links = array();
304 return;
305 }
306
307 $this->urls = [];
308 $this->ids = [];
309 $this->links = FileUtils::readFlatDB($this->datastore, []);
310
311 $toremove = array();
312 foreach ($this->links as $key => &$link) {
313 if (!$this->loggedIn && $link['private'] != 0) {
314 // Transition for not upgraded databases.
315 unset($this->links[$key]);
316 continue;
317 }
318
319 // Sanitize data fields.
320 sanitizeLink($link);
321
322 // Remove private tags if the user is not logged in.
323 if (!$this->loggedIn) {
324 $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
325 }
326
327 // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
328 if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
329 $link['real_url'] = $this->redirector;
330 if ($this->redirectorEncode) {
331 $link['real_url'] .= urlencode(unescape($link['url']));
332 } else {
333 $link['real_url'] .= $link['url'];
334 }
335 } else {
336 $link['real_url'] = $link['url'];
337 }
338
339 // To be able to load links before running the update, and prepare the update
340 if (!isset($link['created'])) {
341 $link['id'] = $link['linkdate'];
342 $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
343 if (!empty($link['updated'])) {
344 $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
345 }
346 $link['shorturl'] = smallHash($link['linkdate']);
347 }
348
349 $this->urls[$link['url']] = $key;
350 $this->ids[$link['id']] = $key;
351 }
352 }
353
354 /**
355 * Saves the database from memory to disk
356 *
357 * @throws IOException the datastore is not writable
358 */
359 private function write()
360 {
361 $this->reorder();
362 FileUtils::writeFlatDB($this->datastore, $this->links);
363 }
364
365 /**
366 * Saves the database from memory to disk
367 *
368 * @param string $pageCacheDir page cache directory
369 */
370 public function save($pageCacheDir)
371 {
372 if (!$this->loggedIn) {
373 // TODO: raise an Exception instead
374 die('You are not authorized to change the database.');
375 }
376
377 $this->write();
378
379 invalidateCaches($pageCacheDir);
380 }
381
382 /**
383 * Returns the link for a given URL, or False if it does not exist.
384 *
385 * @param string $url URL to search for
386 *
387 * @return mixed the existing link if it exists, else 'false'
388 */
389 public function getLinkFromUrl($url)
390 {
391 if (isset($this->urls[$url])) {
392 return $this->links[$this->urls[$url]];
393 }
394 return false;
395 }
396
397 /**
398 * Returns the shaare corresponding to a smallHash.
399 *
400 * @param string $request QUERY_STRING server parameter.
401 *
402 * @return array $filtered array containing permalink data.
403 *
404 * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
405 */
406 public function filterHash($request)
407 {
408 $request = substr($request, 0, 6);
409 $linkFilter = new LinkFilter($this->links);
410 return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
411 }
412
413 /**
414 * Returns the list of articles for a given day.
415 *
416 * @param string $request day to filter. Format: YYYYMMDD.
417 *
418 * @return array list of shaare found.
419 */
420 public function filterDay($request)
421 {
422 $linkFilter = new LinkFilter($this->links);
423 return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
424 }
425
426 /**
427 * Filter links according to search parameters.
428 *
429 * @param array $filterRequest Search request content. Supported keys:
430 * - searchtags: list of tags
431 * - searchterm: term search
432 * @param bool $casesensitive Optional: Perform case sensitive filter
433 * @param string $visibility return only all/private/public links
434 * @param string $untaggedonly return only untagged links
435 *
436 * @return array filtered links, all links if no suitable filter was provided.
437 */
438 public function filterSearch(
439 $filterRequest = array(),
440 $casesensitive = false,
441 $visibility = 'all',
442 $untaggedonly = false
443 ) {
444
445 // Filter link database according to parameters.
446 $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
447 $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';
448
449 // Search tags + fullsearch - blank string parameter will return all links.
450 $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext"
451 $request = [$searchtags, $searchterm];
452
453 $linkFilter = new LinkFilter($this);
454 return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly);
455 }
456
457 /**
458 * Returns the list tags appearing in the links with the given tags
459 *
460 * @param array $filteringTags tags selecting the links to consider
461 * @param string $visibility process only all/private/public links
462 *
463 * @return array tag => linksCount
464 */
465 public function linksCountPerTag($filteringTags = [], $visibility = 'all')
466 {
467 $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility);
468 $tags = [];
469 $caseMapping = [];
470 foreach ($links as $link) {
471 foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
472 if (empty($tag)) {
473 continue;
474 }
475 // The first case found will be displayed.
476 if (!isset($caseMapping[strtolower($tag)])) {
477 $caseMapping[strtolower($tag)] = $tag;
478 $tags[$caseMapping[strtolower($tag)]] = 0;
479 }
480 $tags[$caseMapping[strtolower($tag)]]++;
481 }
482 }
483
484 /*
485 * Formerly used arsort(), which doesn't define the sort behaviour for equal values.
486 * Also, this function doesn't produce the same result between PHP 5.6 and 7.
487 *
488 * So we now use array_multisort() to sort tags by DESC occurrences,
489 * then ASC alphabetically for equal values.
490 *
491 * @see https://github.com/shaarli/Shaarli/issues/1142
492 */
493 $keys = array_keys($tags);
494 $tmpTags = array_combine($keys, $keys);
495 array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags);
496 return $tags;
497 }
498
499 /**
500 * Rename or delete a tag across all links.
501 *
502 * @param string $from Tag to rename
503 * @param string $to New tag. If none is provided, the from tag will be deleted
504 *
505 * @return array|bool List of altered links or false on error
506 */
507 public function renameTag($from, $to)
508 {
509 if (empty($from)) {
510 return false;
511 }
512 $delete = empty($to);
513 // True for case-sensitive tag search.
514 $linksToAlter = $this->filterSearch(['searchtags' => $from], true);
515 foreach ($linksToAlter as $key => &$value) {
516 $tags = preg_split('/\s+/', trim($value['tags']));
517 if (($pos = array_search($from, $tags)) !== false) {
518 if ($delete) {
519 unset($tags[$pos]); // Remove tag.
520 } else {
521 $tags[$pos] = trim($to);
522 }
523 $value['tags'] = trim(implode(' ', array_unique($tags)));
524 $this[$value['id']] = $value;
525 }
526 }
527
528 return $linksToAlter;
529 }
530
531 /**
532 * Returns the list of days containing articles (oldest first)
533 * Output: An array containing days (in format YYYYMMDD).
534 */
535 public function days()
536 {
537 $linkDays = array();
538 foreach ($this->links as $link) {
539 $linkDays[$link['created']->format('Ymd')] = 0;
540 }
541 $linkDays = array_keys($linkDays);
542 sort($linkDays);
543
544 return $linkDays;
545 }
546
547 /**
548 * Reorder links by creation date (newest first).
549 *
550 * Also update the urls and ids mapping arrays.
551 *
552 * @param string $order ASC|DESC
553 */
554 public function reorder($order = 'DESC')
555 {
556 $order = $order === 'ASC' ? -1 : 1;
557 // Reorder array by dates.
558 usort($this->links, function ($a, $b) use ($order) {
559 if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) {
560 return $a['sticky'] ? -1 : 1;
561 }
562 return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
563 });
564
565 $this->urls = [];
566 $this->ids = [];
567 foreach ($this->links as $key => $link) {
568 $this->urls[$link['url']] = $key;
569 $this->ids[$link['id']] = $key;
570 }
571 }
572
573 /**
574 * Return the next key for link creation.
575 * E.g. If the last ID is 597, the next will be 598.
576 *
577 * @return int next ID.
578 */
579 public function getNextId()
580 {
581 if (!empty($this->ids)) {
582 return max(array_keys($this->ids)) + 1;
583 }
584 return 0;
585 }
586
587 /**
588 * Returns a link offset in links array from its unique ID.
589 *
590 * @param int $id Persistent ID of a link.
591 *
592 * @return int Real offset in local array, or null if doesn't exist.
593 */
594 protected function getLinkOffset($id)
595 {
596 if (isset($this->ids[$id])) {
597 return $this->ids[$id];
598 }
599 return null;
600 }
601 }