]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
Add disableContentUpdate import option
authorJerome Charaoui <jerome@riseup.net>
Wed, 7 Dec 2016 20:16:49 +0000 (15:16 -0500)
committerJeremy Benoist <jbenoist@20minutes.fr>
Thu, 1 Jun 2017 07:48:14 +0000 (09:48 +0200)
This commit also decouples the "import" and "update" functions inside
ContentProxy. If a content array is available, it must be passed to the
new importEntry method.

src/Wallabag/CoreBundle/Helper/ContentProxy.php
src/Wallabag/ImportBundle/Command/ImportCommand.php
src/Wallabag/ImportBundle/Import/AbstractImport.php
tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
tests/Wallabag/ImportBundle/Import/ChromeImportTest.php
tests/Wallabag/ImportBundle/Import/FirefoxImportTest.php
tests/Wallabag/ImportBundle/Import/InstapaperImportTest.php
tests/Wallabag/ImportBundle/Import/PocketImportTest.php
tests/Wallabag/ImportBundle/Import/ReadabilityImportTest.php
tests/Wallabag/ImportBundle/Import/WallabagV1ImportTest.php
tests/Wallabag/ImportBundle/Import/WallabagV2ImportTest.php

index c73b8eafb7aa6de27d45b55794b28ac9dae464ab..88873bd5305bab6dba0f553b158c0a862468bc22 100644 (file)
@@ -7,6 +7,7 @@ use Psr\Log\LoggerInterface;
 use Wallabag\CoreBundle\Entity\Entry;
 use Wallabag\CoreBundle\Tools\Utils;
 use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
+use Symfony\Component\Config\Definition\Exception\Exception;
 
 /**
  * This kind of proxy class take care of getting the content from an url
@@ -31,34 +32,58 @@ class ContentProxy
     }
 
     /**
-     * Fetch content using graby and hydrate given $entry with results information.
-     * In case we couldn't find content, we'll try to use Open Graph data.
-     *
-     * We can also force the content, in case of an import from the v1 for example, so the function won't
-     * fetch the content from the website but rather use information given with the $content parameter.
+     * Update existing entry by fetching from URL using Graby.
      *
      * @param Entry  $entry   Entry to update
      * @param string $url     Url to grab content for
-     * @param array  $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
      */
-    public function updateEntry(Entry $entry, $url, array $content = [])
+    public function updateEntry(Entry $entry, $url)
     {
-        // ensure content is a bit cleaned up
-        if (!empty($content['html'])) {
-            $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
-        }
+        $content = $this->graby->fetchContent($url);
+
+        $this->stockEntry($entry, $content);
+    }
+
+    /**
+     * Import entry using either fetched or provided content.
+     *
+     * @param Entry  $entry                Entry to update
+     * @param array  $content              Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
+     * @param bool   $disableContentUpdate Whether to skip trying to fetch content using Graby
+     */
+    public function importEntry(Entry $entry, array $content, $disableContentUpdate = false)
+    {
+        $this->validateContent($content);
 
-        // do we have to fetch the content or the provided one is ok?
-        if (empty($content) || false === $this->validateContent($content)) {
-            $fetchedContent = $this->graby->fetchContent($url);
+        if (false === $disableContentUpdate) {
+            try {
+                $fetchedContent = $this->graby->fetchContent($content['url']);
+            } catch (\Exception $e) {
+                $this->logger->error('Error while trying to fetch content from URL.', [
+                    'entry_url' => $content['url'],
+                    'error_msg' => $e->getMessage(),
+                ]);
+            }
 
             // when content is imported, we have information in $content
             // in case fetching content goes bad, we'll keep the imported information instead of overriding them
-            if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
+            if ($fetchedContent['html'] !== $this->fetchingErrorMessage) {
                 $content = $fetchedContent;
             }
         }
 
+        $this->stockEntry($entry, $content);
+    }
+
+    /**
+     * Stock entry with fetched or imported content.
+     * Will fall back to OpenGraph data if available.
+     *
+     * @param Entry  $entry   Entry to stock
+     * @param array  $content Array with at least title and URL
+     */
+    private function stockEntry(Entry $entry, array $content)
+    {
         $title = $content['title'];
         if (!$title && !empty($content['open_graph']['og_title'])) {
             $title = $content['open_graph']['og_title'];
@@ -74,7 +99,7 @@ class ContentProxy
             }
         }
 
-        $entry->setUrl($content['url'] ?: $url);
+        $entry->setUrl($content['url']);
         $entry->setTitle($title);
         $entry->setContent($html);
         $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
@@ -124,22 +149,29 @@ class ContentProxy
             $this->tagger->tag($entry);
         } catch (\Exception $e) {
             $this->logger->error('Error while trying to automatically tag an entry.', [
-                'entry_url' => $url,
+                'entry_url' => $content['url'],
                 'error_msg' => $e->getMessage(),
             ]);
         }
     }
 
     /**
-     * Validate that the given content as enough value to be used
-     * instead of fetch the content from the url.
+     * Validate that the given content has at least a title, an html and a url.
      *
      * @param array $content
-     *
-     * @return bool true if valid otherwise false
      */
     private function validateContent(array $content)
     {
-        return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
+        if (!empty($content['title']))) {
+            throw new Exception('Missing title from imported entry!');
+        }
+
+        if (!empty($content['url']))) {
+            throw new Exception('Missing URL from imported entry!');
+        }
+
+        if (!empty($content['html']))) {
+            throw new Exception('Missing html from imported entry!');
+        }
     }
 }
index ce72837ad0e8807ca2d2ce98a8aa55accb805233..bca800e6cf23bb5d614c7396f959f0b6d7d7b67c 100644 (file)
@@ -5,6 +5,7 @@ namespace Wallabag\ImportBundle\Command;
 use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
 use Symfony\Component\Config\Definition\Exception\Exception;
 use Symfony\Component\Console\Input\InputArgument;
+use Symfony\Component\Console\Input\InputOption;
 use Symfony\Component\Console\Input\InputInterface;
 use Symfony\Component\Console\Output\OutputInterface;
 
@@ -19,7 +20,7 @@ class ImportCommand extends ContainerAwareCommand
             ->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file')
             ->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1')
             ->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false)
-            ->addOption('useUserId', null, InputArgument::OPTIONAL, 'Use user id instead of username to find account', false)
+            ->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL')
         ;
     }
 
@@ -69,6 +70,7 @@ class ImportCommand extends ContainerAwareCommand
         }
 
         $import->setMarkAsRead($input->getOption('markAsRead'));
+        $import->setDisableContentUpdate($input->getOption('disableContentUpdate'));
         $import->setUser($user);
 
         $res = $import
index fc462c4cda5cac8acc66629f60bb4f148a75e583..167853aaef541b8955c8cd6ab0a63e2198f81159 100644 (file)
@@ -24,6 +24,7 @@ abstract class AbstractImport implements ImportInterface
     protected $producer;
     protected $user;
     protected $markAsRead;
+    protected $disableContentUpdate;
     protected $skippedEntries = 0;
     protected $importedEntries = 0;
     protected $queuedEntries = 0;
@@ -84,6 +85,27 @@ abstract class AbstractImport implements ImportInterface
         return $this->markAsRead;
     }
 
+    /**
+     * Set whether articles should be fetched for updated content.
+     *
+     * @param bool $markAsRead
+     */
+    public function setDisableContentUpdate($disableContentUpdate)
+    {
+        $this->disableContentUpdate = $disableContentUpdate;
+
+        return $this;
+    }
+
+    /**
+     * Get whether articles should be fetched for updated content.
+     */
+    public function getDisableContentUpdate()
+    {
+        return $this->disableContentUpdate;
+    }
+
+
     /**
      * Fetch content from the ContentProxy (using graby).
      * If it fails return the given entry to be saved in all case (to avoid user to loose the content).
@@ -95,9 +117,12 @@ abstract class AbstractImport implements ImportInterface
     protected function fetchContent(Entry $entry, $url, array $content = [])
     {
         try {
-            $this->contentProxy->updateEntry($entry, $url, $content);
+            $this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate);
         } catch (\Exception $e) {
-            return $entry;
+            $this->logger->error('Error trying to import an entry.', [
+                'entry_url' => $content['url'],
+                'error_msg' => $e->getMessage(),
+            ]);
         }
     }
 
index 16643938715d62b5d1b9346e28e284c6e4f690d9..1ad21d147103d326f22c240fe4f252a848afe507 100644 (file)
@@ -257,9 +257,8 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
 
         $proxy = new ContentProxy((new Graby()), $tagger, $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
-        $proxy->updateEntry(
+        $proxy->importEntry(
             $entry,
-            'http://0.0.0.0',
             [
                 'html' => str_repeat('this is my content', 325),
                 'title' => 'this is my title',
@@ -294,7 +293,6 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $entry = new Entry(new User());
         $proxy->updateEntry(
             $entry,
-            'http://0.0.0.0',
             [
                 'html' => str_repeat('this is my content', 325),
                 'title' => 'this is my title',
@@ -334,13 +332,14 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
         $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
 
         $entry = new Entry(new User());
-        $proxy->updateEntry($entry, 'http://0.0.0.0', [
+        $content = array(
             'html' => str_repeat('this is my content', 325),
             'title' => 'this is my title',
             'url' => 'http://1.1.1.1',
             'content_type' => 'text/html',
             'language' => 'fr',
-        ]);
+        );
+        $proxy->importEntry($entry, $content, true);
 
         $this->assertCount(0, $entry->getTags());
     }
index cec1953411ca7a33b8ab899b6cf433cc6473f927..7a15e91879cac28f01d841e49ddd7c211fee75b4 100644 (file)
@@ -89,7 +89,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(1))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $res = $chromeImport->import();
@@ -118,7 +118,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(1))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         // check that every entry persisted are archived
@@ -158,7 +158,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -198,7 +198,7 @@ class ChromeImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
index c186c820222bce61caee1c2828298fab3e1fd0ee..09abac57eb20ead2bbefea70610d66476ad22adb 100644 (file)
@@ -89,7 +89,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(2))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $res = $firefoxImport->import();
@@ -118,7 +118,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(1))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         // check that every entry persisted are archived
@@ -158,7 +158,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -198,7 +198,7 @@ class FirefoxImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
index 9158c8a23277f411f3a41f28176aca93ed3b01e7..05844490d000b93235a2c043feec88a93b4573cc 100644 (file)
@@ -104,7 +104,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(4))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $res = $instapaperImport->import();
@@ -133,7 +133,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->once())
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         // check that every entry persisted are archived
@@ -173,7 +173,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -213,7 +213,7 @@ class InstapaperImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
index b81ebe15fc702bf4bf78b8363a6113fb28398e6b..f75e6bea0bc48ffb40639e2b6d4fe9625959e47d 100644 (file)
@@ -282,7 +282,7 @@ class PocketImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->once())
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $pocketImport->setClient($client);
@@ -377,7 +377,7 @@ class PocketImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(2))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $pocketImport->setClient($client);
@@ -450,7 +450,7 @@ JSON;
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -536,7 +536,7 @@ JSON;
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('ImportEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
@@ -621,7 +621,7 @@ JSON;
 
         $this->contentProxy
             ->expects($this->once())
-            ->method('updateEntry')
+            ->method('importEntry')
             ->will($this->throwException(new \Exception()));
 
         $pocketImport->setClient($client);
index 8f466d383a0406c69b9b9adf4f5ebc02a5f5404f..1b0daa92d1e8e698e8ab4b1a0e70a40b3fa7daff 100644 (file)
@@ -89,7 +89,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(3))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $res = $readabilityImport->import();
@@ -118,7 +118,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(1))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         // check that every entry persisted are archived
@@ -158,7 +158,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -198,7 +198,7 @@ class ReadabilityImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
index 7cbef637795053372a3436a370ad084dc020945a..f23cb74894f0c08d39ec9da96ab5176c9c00ebb9 100644 (file)
@@ -104,7 +104,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(1))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn($entry);
 
         $res = $wallabagV1Import->import();
@@ -133,7 +133,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(3))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         // check that every entry persisted are archived
@@ -173,7 +173,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -213,7 +213,7 @@ class WallabagV1ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
index 5cc04aa5929385506cc89758e5ea6275245b6ee8..e1acf569966f392360d42fa513ed5e8e4c9207c9 100644 (file)
@@ -100,7 +100,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(2))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         $res = $wallabagV2Import->import();
@@ -129,7 +129,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(2))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->willReturn(new Entry($this->user));
 
         // check that every entry persisted are archived
@@ -165,7 +165,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $producer = $this->getMockBuilder('OldSound\RabbitMqBundle\RabbitMq\Producer')
             ->disableOriginalConstructor()
@@ -201,7 +201,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->never())
-            ->method('updateEntry');
+            ->method('importEntry');
 
         $factory = new RedisMockFactory();
         $redisMock = $factory->getAdapter('Predis\Client', true);
@@ -278,7 +278,7 @@ class WallabagV2ImportTest extends \PHPUnit_Framework_TestCase
 
         $this->contentProxy
             ->expects($this->exactly(2))
-            ->method('updateEntry')
+            ->method('importEntry')
             ->will($this->throwException(new \Exception()));
 
         $res = $wallabagV2Import->import();