diff options
author | VirtualTam <virtualtam@flibidi.net> | 2016-07-28 22:54:33 +0200 |
---|---|---|
committer | VirtualTam <virtualtam@flibidi.net> | 2016-08-10 01:42:44 +0200 |
commit | a973afeac7b7399d35b881920f0afc1947765ccd (patch) | |
tree | 4a13ddf32779cb8f02600d7cc20d23f692c9068c /index.php | |
parent | 085157c5cb6bd0df928c7875fb7997683418f1fb (diff) | |
download | Shaarli-a973afeac7b7399d35b881920f0afc1947765ccd.tar.gz Shaarli-a973afeac7b7399d35b881920f0afc1947765ccd.tar.zst Shaarli-a973afeac7b7399d35b881920f0afc1947765ccd.zip |
Refactor bookmark import using a generic Netscape parser
Relates to #607
Relates to #608
Relates to #493 (abandoned)
Additions:
- use Composer's autoload to load 3rd-party dependencies under vendor/
Modifications:
- [import] replace the current parser with a generic, stable parser
- move code to application/NetscapeBookmarkUtils
- improve status report after parsing
- [router] use the same endpoint for both bookmark upload and import dialog
- [template] update bookmark import options
- allow adding tags to all imported links
- allow selecting the visibility (privacy) of imported links
- [tests] ensure bookmarks are properly parsed and imported in the LinkDB
- reuse reference input from the parser's test data
See:
- https://github.com/shaarli/netscape-bookmark-parser
- https://getcomposer.org/doc/01-basic-usage.md#autoloading
Signed-off-by: VirtualTam <virtualtam@flibidi.net>
Diffstat (limited to 'index.php')
-rw-r--r-- | index.php | 139 |
1 files changed, 32 insertions, 107 deletions
@@ -44,6 +44,10 @@ error_reporting(E_ALL^E_WARNING); | |||
44 | //error_reporting(-1); | 44 | //error_reporting(-1); |
45 | 45 | ||
46 | 46 | ||
47 | // 3rd-party libraries | ||
48 | require_once 'inc/rain.tpl.class.php'; | ||
49 | require_once __DIR__ . '/vendor/autoload.php'; | ||
50 | |||
47 | // Shaarli library | 51 | // Shaarli library |
48 | require_once 'application/ApplicationUtils.php'; | 52 | require_once 'application/ApplicationUtils.php'; |
49 | require_once 'application/Cache.php'; | 53 | require_once 'application/Cache.php'; |
@@ -65,7 +69,6 @@ require_once 'application/Utils.php'; | |||
65 | require_once 'application/PluginManager.php'; | 69 | require_once 'application/PluginManager.php'; |
66 | require_once 'application/Router.php'; | 70 | require_once 'application/Router.php'; |
67 | require_once 'application/Updater.php'; | 71 | require_once 'application/Updater.php'; |
68 | require_once 'inc/rain.tpl.class.php'; | ||
69 | 72 | ||
70 | // Ensure the PHP version is supported | 73 | // Ensure the PHP version is supported |
71 | try { | 74 | try { |
@@ -1468,26 +1471,37 @@ function renderPage($conf, $pluginManager) | |||
1468 | exit; | 1471 | exit; |
1469 | } | 1472 | } |
1470 | 1473 | ||
1471 | // -------- User is uploading a file for import | 1474 | if ($targetPage == Router::$PAGE_IMPORT) { |
1472 | if (isset($_SERVER['QUERY_STRING']) && startsWith($_SERVER['QUERY_STRING'], 'do=upload')) | 1475 | // Upload a Netscape bookmark dump to import its contents |
1473 | { | 1476 | |
1474 | // If file is too big, some form field may be missing. | 1477 | if (! isset($_POST['token']) || ! isset($_FILES['filetoupload'])) { |
1475 | if (!isset($_POST['token']) || (!isset($_FILES)) || (isset($_FILES['filetoupload']['size']) && $_FILES['filetoupload']['size']==0)) | 1478 | // Show import dialog |
1476 | { | 1479 | $PAGE->assign('maxfilesize', getMaxFileSize()); |
1477 | $returnurl = ( empty($_SERVER['HTTP_REFERER']) ? '?' : $_SERVER['HTTP_REFERER'] ); | 1480 | $PAGE->renderPage('import'); |
1478 | echo '<script>alert("The file you are trying to upload is probably bigger than what this webserver can accept ('.getMaxFileSize().' bytes). Please upload in smaller chunks.");document.location=\''.escape($returnurl).'\';</script>'; | ||
1479 | exit; | 1481 | exit; |
1480 | } | 1482 | } |
1481 | if (!tokenOk($_POST['token'])) die('Wrong token.'); | ||
1482 | importFile($LINKSDB); | ||
1483 | exit; | ||
1484 | } | ||
1485 | 1483 | ||
1486 | // -------- Show upload/import dialog: | 1484 | // Import bookmarks from an uploaded file |
1487 | if ($targetPage == Router::$PAGE_IMPORT) | 1485 | if (isset($_FILES['filetoupload']['size']) && $_FILES['filetoupload']['size'] == 0) { |
1488 | { | 1486 | // The file is too big or some form field may be missing. |
1489 | $PAGE->assign('maxfilesize',getMaxFileSize()); | 1487 | echo '<script>alert("The file you are trying to upload is probably' |
1490 | $PAGE->renderPage('import'); | 1488 | .' bigger than what this webserver can accept (' |
1489 | .getMaxFileSize().' bytes).' | ||
1490 | .' Please upload in smaller chunks.");document.location=\'?do=' | ||
1491 | .Router::$PAGE_IMPORT .'\';</script>'; | ||
1492 | exit; | ||
1493 | } | ||
1494 | if (! tokenOk($_POST['token'])) { | ||
1495 | die('Wrong token.'); | ||
1496 | } | ||
1497 | $status = NetscapeBookmarkUtils::import( | ||
1498 | $_POST, | ||
1499 | $_FILES, | ||
1500 | $LINKSDB, | ||
1501 | $conf->get('resource.page_cache') | ||
1502 | ); | ||
1503 | echo '<script>alert("'.$status.'");document.location=\'?do=' | ||
1504 | .Router::$PAGE_IMPORT .'\';</script>'; | ||
1491 | exit; | 1505 | exit; |
1492 | } | 1506 | } |
1493 | 1507 | ||
@@ -1545,95 +1559,6 @@ function renderPage($conf, $pluginManager) | |||
1545 | } | 1559 | } |
1546 | 1560 | ||
1547 | /** | 1561 | /** |
1548 | * Process the import file form. | ||
1549 | * | ||
1550 | * @param LinkDB $LINKSDB Loaded LinkDB instance. | ||
1551 | * @param ConfigManager $conf Configuration Manager instance. | ||
1552 | */ | ||
1553 | function importFile($LINKSDB, $conf) | ||
1554 | { | ||
1555 | if (!isLoggedIn()) { die('Not allowed.'); } | ||
1556 | |||
1557 | $filename=$_FILES['filetoupload']['name']; | ||
1558 | $filesize=$_FILES['filetoupload']['size']; | ||
1559 | $data=file_get_contents($_FILES['filetoupload']['tmp_name']); | ||
1560 | $private = (empty($_POST['private']) ? 0 : 1); // Should the links be imported as private? | ||
1561 | $overwrite = !empty($_POST['overwrite']) ; // Should the imported links overwrite existing ones? | ||
1562 | $import_count=0; | ||
1563 | |||
1564 | // Sniff file type: | ||
1565 | $type='unknown'; | ||
1566 | if (startsWith($data,'<!DOCTYPE NETSCAPE-Bookmark-file-1>')) $type='netscape'; // Netscape bookmark file (aka Firefox). | ||
1567 | |||
1568 | // Then import the bookmarks. | ||
1569 | if ($type=='netscape') | ||
1570 | { | ||
1571 | // This is a standard Netscape-style bookmark file. | ||
1572 | // This format is supported by all browsers (except IE, of course), also Delicious, Diigo and others. | ||
1573 | foreach(explode('<DT>',$data) as $html) // explode is very fast | ||
1574 | { | ||
1575 | $link = array('linkdate'=>'','title'=>'','url'=>'','description'=>'','tags'=>'','private'=>0); | ||
1576 | $d = explode('<DD>',$html); | ||
1577 | if (startsWith($d[0], '<A ')) | ||
1578 | { | ||
1579 | $link['description'] = (isset($d[1]) ? html_entity_decode(trim($d[1]),ENT_QUOTES,'UTF-8') : ''); // Get description (optional) | ||
1580 | preg_match('!<A .*?>(.*?)</A>!i',$d[0],$matches); $link['title'] = (isset($matches[1]) ? trim($matches[1]) : ''); // Get title | ||
1581 | $link['title'] = html_entity_decode($link['title'],ENT_QUOTES,'UTF-8'); | ||
1582 | preg_match_all('! ([A-Z_]+)=\"(.*?)"!i',$html,$matches,PREG_SET_ORDER); // Get all other attributes | ||
1583 | $raw_add_date=0; | ||
1584 | foreach($matches as $m) | ||
1585 | { | ||
1586 | $attr=$m[1]; $value=$m[2]; | ||
1587 | if ($attr=='HREF') $link['url']=html_entity_decode($value,ENT_QUOTES,'UTF-8'); | ||
1588 | elseif ($attr=='ADD_DATE') | ||
1589 | { | ||
1590 | $raw_add_date=intval($value); | ||
1591 | if ($raw_add_date>30000000000) $raw_add_date/=1000; //If larger than year 2920, then was likely stored in milliseconds instead of seconds | ||
1592 | } | ||
1593 | elseif ($attr=='PRIVATE') $link['private']=($value=='0'?0:1); | ||
1594 | elseif ($attr=='TAGS') $link['tags']=html_entity_decode(str_replace(',',' ',$value),ENT_QUOTES,'UTF-8'); | ||
1595 | } | ||
1596 | if ($link['url']!='') | ||
1597 | { | ||
1598 | if ($private==1) $link['private']=1; | ||
1599 | $dblink = $LINKSDB->getLinkFromUrl($link['url']); // See if the link is already in database. | ||
1600 | if ($dblink==false) | ||
1601 | { // Link not in database, let's import it... | ||
1602 | if (empty($raw_add_date)) $raw_add_date=time(); // In case of shitty bookmark file with no ADD_DATE | ||
1603 | |||
1604 | // Make sure date/time is not already used by another link. | ||
1605 | // (Some bookmark files have several different links with the same ADD_DATE) | ||
1606 | // We increment date by 1 second until we find a date which is not used in DB. | ||
1607 | // (so that links that have the same date/time are more or less kept grouped by date, but do not conflict.) | ||
1608 | while (!empty($LINKSDB[date('Ymd_His',$raw_add_date)])) { $raw_add_date++; }// Yes, I know it's ugly. | ||
1609 | $link['linkdate']=date('Ymd_His',$raw_add_date); | ||
1610 | $LINKSDB[$link['linkdate']] = $link; | ||
1611 | $import_count++; | ||
1612 | } | ||
1613 | else // Link already present in database. | ||
1614 | { | ||
1615 | if ($overwrite) | ||
1616 | { // If overwrite is required, we import link data, except date/time. | ||
1617 | $link['linkdate']=$dblink['linkdate']; | ||
1618 | $LINKSDB[$link['linkdate']] = $link; | ||
1619 | $import_count++; | ||
1620 | } | ||
1621 | } | ||
1622 | |||
1623 | } | ||
1624 | } | ||
1625 | } | ||
1626 | $LINKSDB->savedb($conf->get('resource.page_cache')); | ||
1627 | |||
1628 | echo '<script>alert("File '.json_encode($filename).' ('.$filesize.' bytes) was successfully processed: '.$import_count.' links imported.");document.location=\'?\';</script>'; | ||
1629 | } | ||
1630 | else | ||
1631 | { | ||
1632 | echo '<script>alert("File '.json_encode($filename).' ('.$filesize.' bytes) has an unknown file format. Nothing was imported.");document.location=\'?\';</script>'; | ||
1633 | } | ||
1634 | } | ||
1635 | |||
1636 | /** | ||
1637 | * Template for the list of links (<div id="linklist">) | 1562 | * Template for the list of links (<div id="linklist">) |
1638 | * This function fills all the necessary fields in the $PAGE for the template 'linklist.html' | 1563 | * This function fills all the necessary fields in the $PAGE for the template 'linklist.html' |
1639 | * | 1564 | * |