]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - src/Wallabag/ImportBundle/Import/BrowserImport.php
bring chrome and firefox as separate imports
[github/wallabag/wallabag.git] / src / Wallabag / ImportBundle / Import / BrowserImport.php
index e3457196e4e0f8b837ea0c98c3decc514ffdb006..ef7d6d955ef541c826d94b531fcdface43728be5 100644 (file)
@@ -9,69 +9,24 @@ use Wallabag\CoreBundle\Entity\Entry;
 use Wallabag\UserBundle\Entity\User;
 use Wallabag\CoreBundle\Helper\ContentProxy;
 
-class BrowserImport implements ImportInterface
+abstract class BrowserImport extends AbstractImport
 {
-    protected $user;
-    protected $em;
-    protected $logger;
-    protected $contentProxy;
-    protected $skippedEntries = 0;
-    protected $importedEntries = 0;
-    protected $totalEntries = 0;
     protected $filepath;
-    protected $markAsRead;
-    private $nbEntries;
-
-    public function __construct(EntityManager $em, ContentProxy $contentProxy)
-    {
-        $this->em = $em;
-        $this->logger = new NullLogger();
-        $this->contentProxy = $contentProxy;
-    }
-
-    public function setLogger(LoggerInterface $logger)
-    {
-        $this->logger = $logger;
-    }
-
-    /**
-     * We define the user in a custom call because on the import command there is no logged in user.
-     * So we can't retrieve user from the `security.token_storage` service.
-     *
-     * @param User $user
-     *
-     * @return $this
-     */
-    public function setUser(User $user)
-    {
-        $this->user = $user;
-
-        return $this;
-    }
 
     /**
      * {@inheritdoc}
      */
-    public function getName()
-    {
-        return 'Firefox & Google Chrome';
-    }
+    abstract public function getName();
 
     /**
      * {@inheritdoc}
      */
-    public function getUrl()
-    {
-        return 'import_browser';
-    }
+    abstract public function getUrl();
 
     /**
      * {@inheritdoc}
      */
-    public function getDescription()
-    {
-        return 'import.browser.description';
-    }
+    abstract public function getDescription();
 
     /**
      * {@inheritdoc}
@@ -96,124 +51,162 @@ class BrowserImport implements ImportInterface
             return false;
         }
 
-        $this->nbEntries = 1;
+        if ($this->producer) {
+            $this->parseEntriesForProducer($data);
+
+            return true;
+        }
+
         $this->parseEntries($data);
-        $this->em->flush();
 
         return true;
     }
 
-    private function parseEntries($data)
+    /**
+     * Set file path to the json file.
+     *
+     * @param string $filepath
+     */
+    public function setFilepath($filepath)
+    {
+        $this->filepath = $filepath;
+
+        return $this;
+    }
+
+    /**
+     * Parse and insert all given entries.
+     *
+     * @param $entries
+     */
+    protected function parseEntries($entries)
     {
-        foreach ($data as $importedEntry) {
-            $this->parseEntry($importedEntry);
+        $i = 1;
+
+        foreach ($entries as $importedEntry) {
+            if ((array) $importedEntry !== $importedEntry) {
+                continue;
+            }
+
+            $entry = $this->parseEntry($importedEntry);
+
+            if (null === $entry) {
+                continue;
+            }
+
+            // flush every 20 entries
+            if (($i % 20) === 0) {
+                $this->em->flush();
+
+                // clear only affected entities
+                $this->em->clear(Entry::class);
+                $this->em->clear(Tag::class);
+            }
+            ++$i;
         }
-        $this->totalEntries += count($data);
+
+        $this->em->flush();
     }
 
-    private function parseEntry($importedEntry)
+    /**
+     * Parse entries and send them to the queue.
+     * It should just be a simple loop on all item, no call to the database should be done
+     * to speedup queuing.
+     *
+     * Faster parse entries for Producer.
+     * We don't care to make check at this time. They'll be done by the consumer.
+     *
+     * @param array $entries
+     */
+    protected function parseEntriesForProducer(array $entries)
     {
-        if (!is_array($importedEntry)) {
-            return;
+        foreach ($entries as $importedEntry) {
+
+            if ((array) $importedEntry !== $importedEntry) {
+                continue;
+            }
+
+            // set userId for the producer (it won't know which user is connected)
+            $importedEntry['userId'] = $this->user->getId();
+
+            if ($this->markAsRead) {
+                $importedEntry = $this->setEntryAsRead($importedEntry);
+            }
+
+            ++$this->queuedEntries;
+
+            $this->producer->publish(json_encode($importedEntry));
         }
+    }
 
-        /* Firefox uses guid while Chrome uses id */
+    /**
+     * {@inheritdoc}
+     */
+    public function parseEntry(array $importedEntry)
+    {
 
         if ((!key_exists('guid', $importedEntry) || (!key_exists('id', $importedEntry))) && is_array(reset($importedEntry))) {
             $this->parseEntries($importedEntry);
-
             return;
         }
+
         if (key_exists('children', $importedEntry)) {
             $this->parseEntries($importedEntry['children']);
-
             return;
         }
-        if (key_exists('uri', $importedEntry) || key_exists('url', $importedEntry)) {
-
-            /* Firefox uses uri while Chrome uses url */
 
-            $firefox = key_exists('uri', $importedEntry);
-
-            $existingEntry = $this->em
-                ->getRepository('WallabagCoreBundle:Entry')
-                ->findByUrlAndUserId(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], $this->user->getId());
-
-            if (false !== $existingEntry) {
-                ++$this->skippedEntries;
+        if (!key_exists('uri', $importedEntry) && !key_exists('url', $importedEntry)) {
+            return;
+        }
 
-                return;
-            }
+        $firefox = key_exists('uri', $importedEntry);
 
-            if (false === parse_url(($firefox) ? $importedEntry['uri'] : $importedEntry['url']) || false === filter_var(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], FILTER_VALIDATE_URL)) {
-                $this->logger->warning('Imported URL '.($firefox) ? $importedEntry['uri'] : $importedEntry['url'].' is not valid');
-                ++$this->skippedEntries;
+        $existingEntry = $this->em
+            ->getRepository('WallabagCoreBundle:Entry')
+            ->findByUrlAndUserId(($firefox) ? $importedEntry['uri'] : $importedEntry['url'], $this->user->getId());
 
-                return;
-            }
+        if (false !== $existingEntry) {
+            ++$this->skippedEntries;
 
-            try {
-                $entry = $this->contentProxy->updateEntry(
-                    new Entry($this->user),
-                    ($firefox) ? $importedEntry['uri'] : $importedEntry['url']
-                );
-            } catch (\Exception $e) {
-                $this->logger->warning('Error while saving '.($firefox) ? $importedEntry['uri'] : $importedEntry['url']);
-                ++$this->skippedEntries;
+            return;
+        }
 
-                return;
-            }
+        $data = $this->prepareEntry($importedEntry);
 
-            $entry->setArchived($this->markAsRead);
+        $entry = new Entry($this->user);
+        $entry->setUrl($data['url']);
+        $entry->setTitle($data['title']);
 
-            $this->em->persist($entry);
-            ++$this->importedEntries;
+        // update entry with content (in case fetching failed, the given entry will be return)
+        $entry = $this->fetchContent($entry, $data['url'], $data);
 
-            // flush every 20 entries
-            if (($this->nbEntries % 20) === 0) {
-                $this->em->flush();
-                $this->em->clear($entry);
-            }
-            ++$this->nbEntries;
+        if (array_key_exists('tags', $data)) {
+            $this->contentProxy->assignTagsToEntry(
+                $entry,
+                $data['tags']
+            );
         }
-    }
 
-    /**
-     * Set whether articles must be all marked as read.
-     *
-     * @param bool $markAsRead
-     *
-     * @return $this
-     */
-    public function setMarkAsRead($markAsRead)
-    {
-        $this->markAsRead = $markAsRead;
+        $entry->setArchived($data['is_archived']);
 
-        return $this;
-    }
+        if (!empty($data['created_at'])) {
+            $dt = new \DateTime();
+            $entry->setCreatedAt($dt->setTimestamp($data['created_at']/1000));
+        }
 
-    /**
-     * Set file path to the json file.
-     *
-     * @param string $filepath
-     *
-     * @return $this
-     */
-    public function setFilepath($filepath)
-    {
-        $this->filepath = $filepath;
+        $this->em->persist($entry);
+        ++$this->importedEntries;
 
-        return $this;
+        return $entry;
     }
 
     /**
      * {@inheritdoc}
      */
-    public function getSummary()
+    protected function setEntryAsRead(array $importedEntry)
     {
-        return [
-            'skipped' => $this->skippedEntries,
-            'imported' => $this->importedEntries,
-        ];
+        $importedEntry['is_archived'] = 1;
+
+        return $importedEntry;
     }
 }