aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php76
-rw-r--r--src/Wallabag/ImportBundle/Command/ImportCommand.php4
-rw-r--r--src/Wallabag/ImportBundle/Import/AbstractImport.php29
3 files changed, 84 insertions, 25 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index c73b8eaf..88873bd5 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -7,6 +7,7 @@ use Psr\Log\LoggerInterface;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Tools\Utils; 8use Wallabag\CoreBundle\Tools\Utils;
9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
10use Symfony\Component\Config\Definition\Exception\Exception;
10 11
11/** 12/**
12 * This kind of proxy class take care of getting the content from an url 13 * This kind of proxy class take care of getting the content from an url
@@ -31,34 +32,58 @@ class ContentProxy
31 } 32 }
32 33
33 /** 34 /**
34 * Fetch content using graby and hydrate given $entry with results information. 35 * Update existing entry by fetching from URL using Graby.
35 * In case we couldn't find content, we'll try to use Open Graph data.
36 *
37 * We can also force the content, in case of an import from the v1 for example, so the function won't
38 * fetch the content from the website but rather use information given with the $content parameter.
39 * 36 *
40 * @param Entry $entry Entry to update 37 * @param Entry $entry Entry to update
41 * @param string $url Url to grab content for 38 * @param string $url Url to grab content for
42 * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
43 */ 39 */
44 public function updateEntry(Entry $entry, $url, array $content = []) 40 public function updateEntry(Entry $entry, $url)
45 { 41 {
46 // ensure content is a bit cleaned up 42 $content = $this->graby->fetchContent($url);
47 if (!empty($content['html'])) { 43
48 $content['html'] = $this->graby->cleanupHtml($content['html'], $url); 44 $this->stockEntry($entry, $content);
49 } 45 }
46
47 /**
48 * Import entry using either fetched or provided content.
49 *
50 * @param Entry $entry Entry to update
51 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
52 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
53 */
54 public function importEntry(Entry $entry, array $content, $disableContentUpdate = false)
55 {
56 $this->validateContent($content);
50 57
51 // do we have to fetch the content or the provided one is ok? 58 if (false === $disableContentUpdate) {
52 if (empty($content) || false === $this->validateContent($content)) { 59 try {
53 $fetchedContent = $this->graby->fetchContent($url); 60 $fetchedContent = $this->graby->fetchContent($content['url']);
61 } catch (\Exception $e) {
62 $this->logger->error('Error while trying to fetch content from URL.', [
63 'entry_url' => $content['url'],
64 'error_msg' => $e->getMessage(),
65 ]);
66 }
54 67
55 // when content is imported, we have information in $content 68 // when content is imported, we have information in $content
56 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 69 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
57 if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) { 70 if ($fetchedContent['html'] !== $this->fetchingErrorMessage) {
58 $content = $fetchedContent; 71 $content = $fetchedContent;
59 } 72 }
60 } 73 }
61 74
75 $this->stockEntry($entry, $content);
76 }
77
78 /**
79 * Stock entry with fetched or imported content.
80 * Will fall back to OpenGraph data if available.
81 *
82 * @param Entry $entry Entry to stock
83 * @param array $content Array with at least title and URL
84 */
85 private function stockEntry(Entry $entry, array $content)
86 {
62 $title = $content['title']; 87 $title = $content['title'];
63 if (!$title && !empty($content['open_graph']['og_title'])) { 88 if (!$title && !empty($content['open_graph']['og_title'])) {
64 $title = $content['open_graph']['og_title']; 89 $title = $content['open_graph']['og_title'];
@@ -74,7 +99,7 @@ class ContentProxy
74 } 99 }
75 } 100 }
76 101
77 $entry->setUrl($content['url'] ?: $url); 102 $entry->setUrl($content['url']);
78 $entry->setTitle($title); 103 $entry->setTitle($title);
79 $entry->setContent($html); 104 $entry->setContent($html);
80 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); 105 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
@@ -124,22 +149,29 @@ class ContentProxy
124 $this->tagger->tag($entry); 149 $this->tagger->tag($entry);
125 } catch (\Exception $e) { 150 } catch (\Exception $e) {
126 $this->logger->error('Error while trying to automatically tag an entry.', [ 151 $this->logger->error('Error while trying to automatically tag an entry.', [
127 'entry_url' => $url, 152 'entry_url' => $content['url'],
128 'error_msg' => $e->getMessage(), 153 'error_msg' => $e->getMessage(),
129 ]); 154 ]);
130 } 155 }
131 } 156 }
132 157
133 /** 158 /**
134 * Validate that the given content as enough value to be used 159 * Validate that the given content has at least a title, an html and a url.
135 * instead of fetch the content from the url.
136 * 160 *
137 * @param array $content 161 * @param array $content
138 *
139 * @return bool true if valid otherwise false
140 */ 162 */
141 private function validateContent(array $content) 163 private function validateContent(array $content)
142 { 164 {
143 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); 165 if (!empty($content['title']))) {
166 throw new Exception('Missing title from imported entry!');
167 }
168
169 if (!empty($content['url']))) {
170 throw new Exception('Missing URL from imported entry!');
171 }
172
173 if (!empty($content['html']))) {
174 throw new Exception('Missing html from imported entry!');
175 }
144 } 176 }
145} 177}
diff --git a/src/Wallabag/ImportBundle/Command/ImportCommand.php b/src/Wallabag/ImportBundle/Command/ImportCommand.php
index ce72837a..bca800e6 100644
--- a/src/Wallabag/ImportBundle/Command/ImportCommand.php
+++ b/src/Wallabag/ImportBundle/Command/ImportCommand.php
@@ -5,6 +5,7 @@ namespace Wallabag\ImportBundle\Command;
5use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand; 5use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
6use Symfony\Component\Config\Definition\Exception\Exception; 6use Symfony\Component\Config\Definition\Exception\Exception;
7use Symfony\Component\Console\Input\InputArgument; 7use Symfony\Component\Console\Input\InputArgument;
8use Symfony\Component\Console\Input\InputOption;
8use Symfony\Component\Console\Input\InputInterface; 9use Symfony\Component\Console\Input\InputInterface;
9use Symfony\Component\Console\Output\OutputInterface; 10use Symfony\Component\Console\Output\OutputInterface;
10 11
@@ -19,7 +20,7 @@ class ImportCommand extends ContainerAwareCommand
19 ->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file') 20 ->addArgument('filepath', InputArgument::REQUIRED, 'Path to the JSON file')
20 ->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1') 21 ->addOption('importer', null, InputArgument::OPTIONAL, 'The importer to use: v1, v2, instapaper, pinboard, readability, firefox or chrome', 'v1')
21 ->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false) 22 ->addOption('markAsRead', null, InputArgument::OPTIONAL, 'Mark all entries as read', false)
22 ->addOption('useUserId', null, InputArgument::OPTIONAL, 'Use user id instead of username to find account', false) 23 ->addOption('disableContentUpdate', null, InputOption::VALUE_NONE, 'Disable fetching updated content from URL')
23 ; 24 ;
24 } 25 }
25 26
@@ -69,6 +70,7 @@ class ImportCommand extends ContainerAwareCommand
69 } 70 }
70 71
71 $import->setMarkAsRead($input->getOption('markAsRead')); 72 $import->setMarkAsRead($input->getOption('markAsRead'));
73 $import->setDisableContentUpdate($input->getOption('disableContentUpdate'));
72 $import->setUser($user); 74 $import->setUser($user);
73 75
74 $res = $import 76 $res = $import
diff --git a/src/Wallabag/ImportBundle/Import/AbstractImport.php b/src/Wallabag/ImportBundle/Import/AbstractImport.php
index fc462c4c..167853aa 100644
--- a/src/Wallabag/ImportBundle/Import/AbstractImport.php
+++ b/src/Wallabag/ImportBundle/Import/AbstractImport.php
@@ -24,6 +24,7 @@ abstract class AbstractImport implements ImportInterface
24 protected $producer; 24 protected $producer;
25 protected $user; 25 protected $user;
26 protected $markAsRead; 26 protected $markAsRead;
27 protected $disableContentUpdate;
27 protected $skippedEntries = 0; 28 protected $skippedEntries = 0;
28 protected $importedEntries = 0; 29 protected $importedEntries = 0;
29 protected $queuedEntries = 0; 30 protected $queuedEntries = 0;
@@ -85,6 +86,27 @@ abstract class AbstractImport implements ImportInterface
85 } 86 }
86 87
87 /** 88 /**
89 * Set whether articles should be fetched for updated content.
90 *
91 * @param bool $markAsRead
92 */
93 public function setDisableContentUpdate($disableContentUpdate)
94 {
95 $this->disableContentUpdate = $disableContentUpdate;
96
97 return $this;
98 }
99
100 /**
101 * Get whether articles should be fetched for updated content.
102 */
103 public function getDisableContentUpdate()
104 {
105 return $this->disableContentUpdate;
106 }
107
108
109 /**
88 * Fetch content from the ContentProxy (using graby). 110 * Fetch content from the ContentProxy (using graby).
89 * If it fails return the given entry to be saved in all case (to avoid user to loose the content). 111 * If it fails return the given entry to be saved in all case (to avoid user to loose the content).
90 * 112 *
@@ -95,9 +117,12 @@ abstract class AbstractImport implements ImportInterface
95 protected function fetchContent(Entry $entry, $url, array $content = []) 117 protected function fetchContent(Entry $entry, $url, array $content = [])
96 { 118 {
97 try { 119 try {
98 $this->contentProxy->updateEntry($entry, $url, $content); 120 $this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate);
99 } catch (\Exception $e) { 121 } catch (\Exception $e) {
100 return $entry; 122 $this->logger->error('Error trying to import an entry.', [
123 'entry_url' => $content['url'],
124 'error_msg' => $e->getMessage(),
125 ]);
101 } 126 }
102 } 127 }
103 128