From a973afeac7b7399d35b881920f0afc1947765ccd Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Thu, 28 Jul 2016 22:54:33 +0200 Subject: Refactor bookmark import using a generic Netscape parser Relates to #607 Relates to #608 Relates to #493 (abandoned) Additions: - use Composer's autoload to load 3rd-party dependencies under vendor/ Modifications: - [import] replace the current parser with a generic, stable parser - move code to application/NetscapeBookmarkUtils - improve status report after parsing - [router] use the same endpoint for both bookmark upload and import dialog - [template] update bookmark import options - allow adding tags to all imported links - allow selecting the visibility (privacy) of imported links - [tests] ensure bookmarks are properly parsed and imported in the LinkDB - reuse reference input from the parser's test data See: - https://github.com/shaarli/netscape-bookmark-parser - https://getcomposer.org/doc/01-basic-usage.md#autoloading Signed-off-by: VirtualTam --- application/NetscapeBookmarkUtils.php | 142 ++++++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) (limited to 'application/NetscapeBookmarkUtils.php') diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php index fdbb0ad7..b99a432e 100644 --- a/application/NetscapeBookmarkUtils.php +++ b/application/NetscapeBookmarkUtils.php @@ -51,4 +51,146 @@ class NetscapeBookmarkUtils return $bookmarkLinks; } + + /** + * Generates an import status summary + * + * @param string $filename name of the file to import + * @param int $filesize size of the file to import + * @param int $importCount how many links were imported + * @param int $overwriteCount how many links were overwritten + * @param int $skipCount how many links were skipped + * + * @return string Summary of the bookmark import status + */ + private static function importStatus( + $filename, + $filesize, + $importCount=0, + $overwriteCount=0, + $skipCount=0 + ) + { + $status = 'File '.$filename.' ('.$filesize.' bytes) '; + if ($importCount == 0 && $overwriteCount == 0 && $skipCount == 0) { + $status .= 'has an unknown file format. Nothing was imported.'; + } else { + $status .= 'was successfully processed: '.$importCount.' links imported, '; + $status .= $overwriteCount.' links overwritten, '; + $status .= $skipCount.' links skipped.'; + } + return $status; + } + + /** + * Imports Web bookmarks from an uploaded Netscape bookmark dump + * + * @param array $post Server $_POST parameters + * @param array $file Server $_FILES parameters + * @param LinkDB $linkDb Loaded LinkDB instance + * @param string $pagecache Page cache + * + * @return string Summary of the bookmark import status + */ + public static function import($post, $files, $linkDb, $pagecache) + { + $filename = $files['filetoupload']['name']; + $filesize = $files['filetoupload']['size']; + $data = file_get_contents($files['filetoupload']['tmp_name']); + + // Sniff file type + if (! startsWith($data, '')) { + return self::importStatus($filename, $filesize); + } + + // Overwrite existing links? + $overwrite = ! empty($post['overwrite']); + + // Add tags to all imported links? + if (empty($post['default_tags'])) { + $defaultTags = array(); + } else { + $defaultTags = preg_split( + '/[\s,]+/', + escape($post['default_tags']) + ); + } + + // links are imported as public by default + $defaultPrivacy = 0; + + $parser = new NetscapeBookmarkParser( + true, // nested tag support + $defaultTags, // additional user-specified tags + strval(1 - $defaultPrivacy) // defaultPub = 1 - defaultPrivacy + ); + $bookmarks = $parser->parseString($data); + + $importCount = 0; + $overwriteCount = 0; + $skipCount = 0; + + foreach ($bookmarks as $bkm) { + $private = $defaultPrivacy; + if (empty($post['privacy']) || $post['privacy'] == 'default') { + // use value from the imported file + $private = $bkm['pub'] == '1' ? 0 : 1; + } else if ($post['privacy'] == 'private') { + // all imported links are private + $private = 1; + } else if ($post['privacy'] == 'public') { + // all imported links are public + $private = 0; + } + + $newLink = array( + 'title' => $bkm['title'], + 'url' => $bkm['uri'], + 'description' => $bkm['note'], + 'private' => $private, + 'linkdate'=> '', + 'tags' => $bkm['tags'] + ); + + $existingLink = $linkDb->getLinkFromUrl($bkm['uri']); + + if ($existingLink !== false) { + if ($overwrite === false) { + // Do not overwrite an existing link + $skipCount++; + continue; + } + + // Overwrite an existing link, keep its date + $newLink['linkdate'] = $existingLink['linkdate']; + $linkDb[$existingLink['linkdate']] = $newLink; + $importCount++; + $overwriteCount++; + continue; + } + + // Add a new link + $newLinkDate = new DateTime('@'.strval($bkm['time'])); + while (!empty($linkDb[$newLinkDate->format(LinkDB::LINK_DATE_FORMAT)])) { + // Ensure the date/time is not already used + // - this hack is necessary as the date/time acts as a primary key + // - apply 1 second increments until an unused index is found + // See https://github.com/shaarli/Shaarli/issues/351 + $newLinkDate->add(new DateInterval('PT1S')); + } + $linkDbDate = $newLinkDate->format(LinkDB::LINK_DATE_FORMAT); + $newLink['linkdate'] = $linkDbDate; + $linkDb[$linkDbDate] = $newLink; + $importCount++; + } + + $linkDb->savedb($pagecache); + return self::importStatus( + $filename, + $filesize, + $importCount, + $overwriteCount, + $skipCount + ); + } } -- cgit v1.2.3 From f4ad7bde56b769cea0a2a26eb739e57500705555 Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Fri, 12 Aug 2016 23:22:15 +0200 Subject: Fix: ensure Internet Explorer bookmark dumps can be imported Relates to https://github.com/shaarli/Shaarli/issues/607 Modifications: - [application][tests] NetscapeBookmarkUtils: more permissive doctype detection The IE bookmark exports contain extra escape sequences, which can be observed by binary comparison of the reference input data used in tests: $ cmp -b -l -n 8 netscape_basic.htm internet_explorer_encoding.htm 1 74 < 357 M-o 2 41 ! 273 M-; 3 104 D 277 M-? 4 117 O 74 < 5 103 C 41 ! 6 124 T 104 D 7 131 Y 117 O 8 120 P 103 C Signed-off-by: VirtualTam --- application/NetscapeBookmarkUtils.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'application/NetscapeBookmarkUtils.php') diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php index b99a432e..c3181254 100644 --- a/application/NetscapeBookmarkUtils.php +++ b/application/NetscapeBookmarkUtils.php @@ -98,8 +98,7 @@ class NetscapeBookmarkUtils $filesize = $files['filetoupload']['size']; $data = file_get_contents($files['filetoupload']['tmp_name']); - // Sniff file type - if (! startsWith($data, '')) { + if (strpos($data, '') === false) { return self::importStatus($filename, $filesize); } -- cgit v1.2.3 From 7af9a41881ed0b9d44d18a0ce03a123a8441adf5 Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Thu, 20 Oct 2016 11:31:52 +0200 Subject: Minor code cleanup: PHPDoc, spelling, unused variables, etc. --- application/NetscapeBookmarkUtils.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'application/NetscapeBookmarkUtils.php') diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php index c3181254..d6840d37 100644 --- a/application/NetscapeBookmarkUtils.php +++ b/application/NetscapeBookmarkUtils.php @@ -86,7 +86,7 @@ class NetscapeBookmarkUtils * Imports Web bookmarks from an uploaded Netscape bookmark dump * * @param array $post Server $_POST parameters - * @param array $file Server $_FILES parameters + * @param array $files Server $_FILES parameters * @param LinkDB $linkDb Loaded LinkDB instance * @param string $pagecache Page cache * -- cgit v1.2.3 From f21abf329234ae4d5a1d56c5a9dd0bc11f80bac8 Mon Sep 17 00:00:00 2001 From: VirtualTam Date: Thu, 20 Oct 2016 21:19:51 +0200 Subject: LinkDB: update datastore method names Relates to https://github.com/shaarli/Shaarli/issues/95 Signed-off-by: VirtualTam --- application/NetscapeBookmarkUtils.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'application/NetscapeBookmarkUtils.php') diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php index d6840d37..dd21f05b 100644 --- a/application/NetscapeBookmarkUtils.php +++ b/application/NetscapeBookmarkUtils.php @@ -183,7 +183,7 @@ class NetscapeBookmarkUtils $importCount++; } - $linkDb->savedb($pagecache); + $linkDb->save($pagecache); return self::importStatus( $filename, $filesize, -- cgit v1.2.3 From 01878a75b93b9966f7366ea2937c118bbc3e459e Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Mon, 28 Nov 2016 16:16:44 +0100 Subject: Apply the new ID system accros the whole codebase --- application/NetscapeBookmarkUtils.php | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) (limited to 'application/NetscapeBookmarkUtils.php') diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php index dd21f05b..8a939adb 100644 --- a/application/NetscapeBookmarkUtils.php +++ b/application/NetscapeBookmarkUtils.php @@ -38,7 +38,7 @@ class NetscapeBookmarkUtils if ($link['private'] == 0 && $selection == 'private') { continue; } - $date = DateTime::createFromFormat(LinkDB::LINK_DATE_FORMAT, $link['linkdate']); + $date = $link['created']; $link['timestamp'] = $date->getTimestamp(); $link['taglist'] = str_replace(' ', ',', $link['tags']); @@ -147,7 +147,6 @@ class NetscapeBookmarkUtils 'url' => $bkm['uri'], 'description' => $bkm['note'], 'private' => $private, - 'linkdate'=> '', 'tags' => $bkm['tags'] ); @@ -161,25 +160,21 @@ class NetscapeBookmarkUtils } // Overwrite an existing link, keep its date - $newLink['linkdate'] = $existingLink['linkdate']; - $linkDb[$existingLink['linkdate']] = $newLink; + $newLink['id'] = $existingLink['id']; + $newLink['created'] = $existingLink['created']; + $newLink['updated'] = new DateTime(); + $linkDb[$existingLink['id']] = $newLink; $importCount++; $overwriteCount++; continue; } - // Add a new link + // Add a new link - @ used for UNIX timestamps $newLinkDate = new DateTime('@'.strval($bkm['time'])); - while (!empty($linkDb[$newLinkDate->format(LinkDB::LINK_DATE_FORMAT)])) { - // Ensure the date/time is not already used - // - this hack is necessary as the date/time acts as a primary key - // - apply 1 second increments until an unused index is found - // See https://github.com/shaarli/Shaarli/issues/351 - $newLinkDate->add(new DateInterval('PT1S')); - } - $linkDbDate = $newLinkDate->format(LinkDB::LINK_DATE_FORMAT); - $newLink['linkdate'] = $linkDbDate; - $linkDb[$linkDbDate] = $newLink; + $newLinkDate->setTimezone(new DateTimeZone(date_default_timezone_get())); + $newLink['created'] = $newLinkDate; + $newLink['id'] = $linkDb->getNextId(); + $linkDb[$newLink['id']] = $newLink; $importCount++; } -- cgit v1.2.3 From d592daea8343bb4dfecff5d97e93699581ccc58c Mon Sep 17 00:00:00 2001 From: ArthurHoaro Date: Mon, 28 Nov 2016 18:24:15 +0100 Subject: Add a persistent 'shorturl' key to all links All existing link will keep their permalinks. New links will have smallhash generated with date+id. The purpose of this is to avoid collision between links due to their creation date. --- application/NetscapeBookmarkUtils.php | 1 + 1 file changed, 1 insertion(+) (limited to 'application/NetscapeBookmarkUtils.php') diff --git a/application/NetscapeBookmarkUtils.php b/application/NetscapeBookmarkUtils.php index 8a939adb..e7148d00 100644 --- a/application/NetscapeBookmarkUtils.php +++ b/application/NetscapeBookmarkUtils.php @@ -174,6 +174,7 @@ class NetscapeBookmarkUtils $newLinkDate->setTimezone(new DateTimeZone(date_default_timezone_get())); $newLink['created'] = $newLinkDate; $newLink['id'] = $linkDb->getNextId(); + $newLink['shorturl'] = link_small_hash($newLink['created'], $newLink['id']); $linkDb[$newLink['id']] = $newLink; $importCount++; } -- cgit v1.2.3