aboutsummaryrefslogtreecommitdiffhomepage
path: root/application/bookmark/LinkDB.php
blob: 3b77422a25672dce722935df120115d2309db935 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
<?php

namespace Shaarli\Bookmark;

use ArrayAccess;
use Countable;
use DateTime;
use Iterator;
use LinkFilter;
use LinkNotFoundException;
use Shaarli\Exceptions\IOException;
use Shaarli\FileUtils;

/**
 * Data storage for links.
 *
 * This object behaves like an associative array.
 *
 * Example:
 *    $myLinks = new LinkDB();
 *    echo $myLinks[350]['title'];
 *    foreach ($myLinks as $link)
 *       echo $link['title'].' at url '.$link['url'].'; description:'.$link['description'];
 *
 * Available keys:
 *  - id:       primary key, incremental integer identifier (persistent)
 *  - description: description of the entry
 *  - created:  creation date of this entry, DateTime object.
 *  - updated:  last modification date of this entry, DateTime object.
 *  - private:  Is this link private? 0=no, other value=yes
 *  - tags:     tags attached to this entry (separated by spaces)
 *  - title     Title of the link
 *  - url       URL of the link. Used for displayable links (no redirector, relative, etc.).
 *              Can be absolute or relative.
 *              Relative URLs are permalinks (e.g.'?m-ukcw')
 *  - real_url  Absolute processed URL.
 *  - shorturl  Permalink smallhash
 *
 * Implements 3 interfaces:
 *  - ArrayAccess: behaves like an associative array;
 *  - Countable:   there is a count() method;
 *  - Iterator:    usable in foreach () loops.
 *
 * ID mechanism:
 *   ArrayAccess is implemented in a way that will allow to access a link
 *   with the unique identifier ID directly with $link[ID].
 *   Note that it's not the real key of the link array attribute.
 *   This mechanism is in place to have persistent link IDs,
 *   even though the internal array is reordered by date.
 *   Example:
 *     - DB: link #1 (2010-01-01) link #2 (2016-01-01)
 *     - Order: #2 #1
 *     - Import links containing: link #3 (2013-01-01)
 *     - New DB: link #1 (2010-01-01) link #2 (2016-01-01) link #3 (2013-01-01)
 *     - Real order: #2 #3 #1
 */
class LinkDB implements Iterator, Countable, ArrayAccess
{
    // Links are stored as a PHP serialized string
    private $datastore;

    // Link date storage format
    const LINK_DATE_FORMAT = 'Ymd_His';

    // List of links (associative array)
    //  - key:   link date (e.g. "20110823_124546"),
    //  - value: associative array (keys: title, description...)
    private $links;

    // List of all recorded URLs (key=url, value=link offset)
    // for fast reserve search (url-->link offset)
    private $urls;

    /**
     * @var array List of all links IDS mapped with their array offset.
     *            Map: id->offset.
     */
    protected $ids;

    // List of offset keys (for the Iterator interface implementation)
    private $keys;

    // Position in the $this->keys array (for the Iterator interface)
    private $position;

    // Is the user logged in? (used to filter private links)
    private $loggedIn;

    // Hide public links
    private $hidePublicLinks;

    // link redirector set in user settings.
    private $redirector;

    /**
     * Set this to `true` to urlencode link behind redirector link, `false` to leave it untouched.
     *
     * Example:
     *   anonym.to needs clean URL while dereferer.org needs urlencoded URL.
     *
     * @var boolean $redirectorEncode parameter: true or false
     */
    private $redirectorEncode;

    /**
     * Creates a new LinkDB
     *
     * Checks if the datastore exists; else, attempts to create a dummy one.
     *
     * @param string $datastore datastore file path.
     * @param boolean $isLoggedIn is the user logged in?
     * @param boolean $hidePublicLinks if true all links are private.
     * @param string $redirector link redirector set in user settings.
     * @param boolean $redirectorEncode Enable urlencode on redirected urls (default: true).
     */
    public function __construct(
        $datastore,
        $isLoggedIn,
        $hidePublicLinks,
        $redirector = '',
        $redirectorEncode = true
    ) {
    
        $this->datastore = $datastore;
        $this->loggedIn = $isLoggedIn;
        $this->hidePublicLinks = $hidePublicLinks;
        $this->redirector = $redirector;
        $this->redirectorEncode = $redirectorEncode === true;
        $this->check();
        $this->read();
    }

    /**
     * Countable - Counts elements of an object
     */
    public function count()
    {
        return count($this->links);
    }

    /**
     * ArrayAccess - Assigns a value to the specified offset
     */
    public function offsetSet($offset, $value)
    {
        // TODO: use exceptions instead of "die"
        if (!$this->loggedIn) {
            die(t('You are not authorized to add a link.'));
        }
        if (!isset($value['id']) || empty($value['url'])) {
            die(t('Internal Error: A link should always have an id and URL.'));
        }
        if (($offset !== null && !is_int($offset)) || !is_int($value['id'])) {
            die(t('You must specify an integer as a key.'));
        }
        if ($offset !== null && $offset !== $value['id']) {
            die(t('Array offset and link ID must be equal.'));
        }

        // If the link exists, we reuse the real offset, otherwise new entry
        $existing = $this->getLinkOffset($offset);
        if ($existing !== null) {
            $offset = $existing;
        } else {
            $offset = count($this->links);
        }
        $this->links[$offset] = $value;
        $this->urls[$value['url']] = $offset;
        $this->ids[$value['id']] = $offset;
    }

    /**
     * ArrayAccess - Whether or not an offset exists
     */
    public function offsetExists($offset)
    {
        return array_key_exists($this->getLinkOffset($offset), $this->links);
    }

    /**
     * ArrayAccess - Unsets an offset
     */
    public function offsetUnset($offset)
    {
        if (!$this->loggedIn) {
            // TODO: raise an exception
            die('You are not authorized to delete a link.');
        }
        $realOffset = $this->getLinkOffset($offset);
        $url = $this->links[$realOffset]['url'];
        unset($this->urls[$url]);
        unset($this->ids[$realOffset]);
        unset($this->links[$realOffset]);
    }

    /**
     * ArrayAccess - Returns the value at specified offset
     */
    public function offsetGet($offset)
    {
        $realOffset = $this->getLinkOffset($offset);
        return isset($this->links[$realOffset]) ? $this->links[$realOffset] : null;
    }

    /**
     * Iterator - Returns the current element
     */
    public function current()
    {
        return $this[$this->keys[$this->position]];
    }

    /**
     * Iterator - Returns the key of the current element
     */
    public function key()
    {
        return $this->keys[$this->position];
    }

    /**
     * Iterator - Moves forward to next element
     */
    public function next()
    {
        ++$this->position;
    }

    /**
     * Iterator - Rewinds the Iterator to the first element
     *
     * Entries are sorted by date (latest first)
     */
    public function rewind()
    {
        $this->keys = array_keys($this->ids);
        $this->position = 0;
    }

    /**
     * Iterator - Checks if current position is valid
     */
    public function valid()
    {
        return isset($this->keys[$this->position]);
    }

    /**
     * Checks if the DB directory and file exist
     *
     * If no DB file is found, creates a dummy DB.
     */
    private function check()
    {
        if (file_exists($this->datastore)) {
            return;
        }

        // Create a dummy database for example
        $this->links = array();
        $link = array(
            'id' => 1,
            'title' => t('The personal, minimalist, super-fast, database free, bookmarking service'),
            'url' => 'https://shaarli.readthedocs.io',
            'description' => t(
                'Welcome to Shaarli! This is your first public bookmark. '
                . 'To edit or delete me, you must first login.

To learn how to use Shaarli, consult the link "Documentation" at the bottom of this page.

You use the community supported version of the original Shaarli project, by Sebastien Sauvage.'
            ),
            'private' => 0,
            'created' => new DateTime(),
            'tags' => 'opensource software'
        );
        $link['shorturl'] = link_small_hash($link['created'], $link['id']);
        $this->links[1] = $link;

        $link = array(
            'id' => 0,
            'title' => t('My secret stuff... - Pastebin.com'),
            'url' => 'http://sebsauvage.net/paste/?8434b27936c09649#bR7XsXhoTiLcqCpQbmOpBi3rq2zzQUC5hBI7ZT1O3x8=',
            'description' => t('Shhhh! I\'m a private link only YOU can see. You can delete me too.'),
            'private' => 1,
            'created' => new DateTime('1 minute ago'),
            'tags' => 'secretstuff',
        );
        $link['shorturl'] = link_small_hash($link['created'], $link['id']);
        $this->links[0] = $link;

        // Write database to disk
        $this->write();
    }

    /**
     * Reads database from disk to memory
     */
    private function read()
    {
        // Public links are hidden and user not logged in => nothing to show
        if ($this->hidePublicLinks && !$this->loggedIn) {
            $this->links = array();
            return;
        }

        $this->urls = [];
        $this->ids = [];
        $this->links = FileUtils::readFlatDB($this->datastore, []);

        $toremove = array();
        foreach ($this->links as $key => &$link) {
            if (!$this->loggedIn && $link['private'] != 0) {
                // Transition for not upgraded databases.
                unset($this->links[$key]);
                continue;
            }

            // Sanitize data fields.
            sanitizeLink($link);

            // Remove private tags if the user is not logged in.
            if (!$this->loggedIn) {
                $link['tags'] = preg_replace('/(^|\s+)\.[^($|\s)]+\s*/', ' ', $link['tags']);
            }

            // Do not use the redirector for internal links (Shaarli note URL starting with a '?').
            if (!empty($this->redirector) && !startsWith($link['url'], '?')) {
                $link['real_url'] = $this->redirector;
                if ($this->redirectorEncode) {
                    $link['real_url'] .= urlencode(unescape($link['url']));
                } else {
                    $link['real_url'] .= $link['url'];
                }
            } else {
                $link['real_url'] = $link['url'];
            }

            // To be able to load links before running the update, and prepare the update
            if (!isset($link['created'])) {
                $link['id'] = $link['linkdate'];
                $link['created'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['linkdate']);
                if (!empty($link['updated'])) {
                    $link['updated'] = DateTime::createFromFormat(self::LINK_DATE_FORMAT, $link['updated']);
                }
                $link['shorturl'] = smallHash($link['linkdate']);
            }

            $this->urls[$link['url']] = $key;
            $this->ids[$link['id']] = $key;
        }
    }

    /**
     * Saves the database from memory to disk
     *
     * @throws IOException the datastore is not writable
     */
    private function write()
    {
        $this->reorder();
        FileUtils::writeFlatDB($this->datastore, $this->links);
    }

    /**
     * Saves the database from memory to disk
     *
     * @param string $pageCacheDir page cache directory
     */
    public function save($pageCacheDir)
    {
        if (!$this->loggedIn) {
            // TODO: raise an Exception instead
            die('You are not authorized to change the database.');
        }

        $this->write();

        invalidateCaches($pageCacheDir);
    }

    /**
     * Returns the link for a given URL, or False if it does not exist.
     *
     * @param string $url URL to search for
     *
     * @return mixed the existing link if it exists, else 'false'
     */
    public function getLinkFromUrl($url)
    {
        if (isset($this->urls[$url])) {
            return $this->links[$this->urls[$url]];
        }
        return false;
    }

    /**
     * Returns the shaare corresponding to a smallHash.
     *
     * @param string $request QUERY_STRING server parameter.
     *
     * @return array $filtered array containing permalink data.
     *
     * @throws LinkNotFoundException if the smallhash is malformed or doesn't match any link.
     */
    public function filterHash($request)
    {
        $request = substr($request, 0, 6);
        $linkFilter = new LinkFilter($this->links);
        return $linkFilter->filter(LinkFilter::$FILTER_HASH, $request);
    }

    /**
     * Returns the list of articles for a given day.
     *
     * @param string $request day to filter. Format: YYYYMMDD.
     *
     * @return array list of shaare found.
     */
    public function filterDay($request)
    {
        $linkFilter = new LinkFilter($this->links);
        return $linkFilter->filter(LinkFilter::$FILTER_DAY, $request);
    }

    /**
     * Filter links according to search parameters.
     *
     * @param array $filterRequest Search request content. Supported keys:
     *                                - searchtags: list of tags
     *                                - searchterm: term search
     * @param bool $casesensitive Optional: Perform case sensitive filter
     * @param string $visibility return only all/private/public links
     * @param string $untaggedonly return only untagged links
     *
     * @return array filtered links, all links if no suitable filter was provided.
     */
    public function filterSearch(
        $filterRequest = array(),
        $casesensitive = false,
        $visibility = 'all',
        $untaggedonly = false
    ) {
    
        // Filter link database according to parameters.
        $searchtags = isset($filterRequest['searchtags']) ? escape($filterRequest['searchtags']) : '';
        $searchterm = isset($filterRequest['searchterm']) ? escape($filterRequest['searchterm']) : '';

        // Search tags + fullsearch - blank string parameter will return all links.
        $type = LinkFilter::$FILTER_TAG | LinkFilter::$FILTER_TEXT; // == "vuotext"
        $request = [$searchtags, $searchterm];

        $linkFilter = new LinkFilter($this);
        return $linkFilter->filter($type, $request, $casesensitive, $visibility, $untaggedonly);
    }

    /**
     * Returns the list tags appearing in the links with the given tags
     *
     * @param array $filteringTags tags selecting the links to consider
     * @param string $visibility process only all/private/public links
     *
     * @return array tag => linksCount
     */
    public function linksCountPerTag($filteringTags = [], $visibility = 'all')
    {
        $links = $this->filterSearch(['searchtags' => $filteringTags], false, $visibility);
        $tags = [];
        $caseMapping = [];
        foreach ($links as $link) {
            foreach (preg_split('/\s+/', $link['tags'], 0, PREG_SPLIT_NO_EMPTY) as $tag) {
                if (empty($tag)) {
                    continue;
                }
                // The first case found will be displayed.
                if (!isset($caseMapping[strtolower($tag)])) {
                    $caseMapping[strtolower($tag)] = $tag;
                    $tags[$caseMapping[strtolower($tag)]] = 0;
                }
                $tags[$caseMapping[strtolower($tag)]]++;
            }
        }

        /*
         * Formerly used arsort(), which doesn't define the sort behaviour for equal values.
         * Also, this function doesn't produce the same result between PHP 5.6 and 7.
         *
         * So we now use array_multisort() to sort tags by DESC occurrences,
         * then ASC alphabetically for equal values.
         *
         * @see https://github.com/shaarli/Shaarli/issues/1142
         */
        $keys = array_keys($tags);
        $tmpTags = array_combine($keys, $keys);
        array_multisort($tags, SORT_DESC, $tmpTags, SORT_ASC, $tags);
        return $tags;
    }

    /**
     * Rename or delete a tag across all links.
     *
     * @param string $from Tag to rename
     * @param string $to New tag. If none is provided, the from tag will be deleted
     *
     * @return array|bool List of altered links or false on error
     */
    public function renameTag($from, $to)
    {
        if (empty($from)) {
            return false;
        }
        $delete = empty($to);
        // True for case-sensitive tag search.
        $linksToAlter = $this->filterSearch(['searchtags' => $from], true);
        foreach ($linksToAlter as $key => &$value) {
            $tags = preg_split('/\s+/', trim($value['tags']));
            if (($pos = array_search($from, $tags)) !== false) {
                if ($delete) {
                    unset($tags[$pos]); // Remove tag.
                } else {
                    $tags[$pos] = trim($to);
                }
                $value['tags'] = trim(implode(' ', array_unique($tags)));
                $this[$value['id']] = $value;
            }
        }

        return $linksToAlter;
    }

    /**
     * Returns the list of days containing articles (oldest first)
     * Output: An array containing days (in format YYYYMMDD).
     */
    public function days()
    {
        $linkDays = array();
        foreach ($this->links as $link) {
            $linkDays[$link['created']->format('Ymd')] = 0;
        }
        $linkDays = array_keys($linkDays);
        sort($linkDays);

        return $linkDays;
    }

    /**
     * Reorder links by creation date (newest first).
     *
     * Also update the urls and ids mapping arrays.
     *
     * @param string $order ASC|DESC
     */
    public function reorder($order = 'DESC')
    {
        $order = $order === 'ASC' ? -1 : 1;
        // Reorder array by dates.
        usort($this->links, function ($a, $b) use ($order) {
            if (isset($a['sticky']) && isset($b['sticky']) && $a['sticky'] !== $b['sticky']) {
                return $a['sticky'] ? -1 : 1;
            }
            return $a['created'] < $b['created'] ? 1 * $order : -1 * $order;
        });

        $this->urls = [];
        $this->ids = [];
        foreach ($this->links as $key => $link) {
            $this->urls[$link['url']] = $key;
            $this->ids[$link['id']] = $key;
        }
    }

    /**
     * Return the next key for link creation.
     * E.g. If the last ID is 597, the next will be 598.
     *
     * @return int next ID.
     */
    public function getNextId()
    {
        if (!empty($this->ids)) {
            return max(array_keys($this->ids)) + 1;
        }
        return 0;
    }

    /**
     * Returns a link offset in links array from its unique ID.
     *
     * @param int $id Persistent ID of a link.
     *
     * @return int Real offset in local array, or null if doesn't exist.
     */
    protected function getLinkOffset($id)
    {
        if (isset($this->ids[$id])) {
            return $this->ids[$id];
        }
        return null;
    }
}