aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorJeremy Benoist <jbenoist@20minutes.fr>2017-06-01 11:31:45 +0200
committerJeremy Benoist <jbenoist@20minutes.fr>2017-06-01 11:31:45 +0200
commit6acadf8e98cf6021a9019773df75bdb151865687 (patch)
treec517962bd405fff9863a7fc7ed5b7d821f68f90f /src
parent843182c7cf428b5f6b8a1ff7057adc703c1e816e (diff)
downloadwallabag-6acadf8e98cf6021a9019773df75bdb151865687.tar.gz
wallabag-6acadf8e98cf6021a9019773df75bdb151865687.tar.zst
wallabag-6acadf8e98cf6021a9019773df75bdb151865687.zip
Rewrote code & fix tests
Diffstat (limited to 'src')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php66
-rw-r--r--src/Wallabag/ImportBundle/Import/AbstractImport.php7
2 files changed, 22 insertions, 51 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index e6292744..3024fdc5 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -7,7 +7,6 @@ use Psr\Log\LoggerInterface;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Tools\Utils; 8use Wallabag\CoreBundle\Tools\Utils;
9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
10use Symfony\Component\Config\Definition\Exception\Exception;
11 10
12/** 11/**
13 * This kind of proxy class take care of getting the content from an url 12 * This kind of proxy class take care of getting the content from an url
@@ -32,57 +31,40 @@ class ContentProxy
32 } 31 }
33 32
34 /** 33 /**
35 * Update existing entry by fetching from URL using Graby. 34 * Update entry using either fetched or provided content.
36 * 35 *
37 * @param Entry $entry Entry to update 36 * @param Entry $entry Entry to update
38 * @param string $url Url to grab content for 37 * @param string $url Url of the content
38 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
39 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
39 */ 40 */
40 public function updateEntry(Entry $entry, $url) 41 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
41 { 42 {
42 $content = $this->graby->fetchContent($url); 43 if (!empty($content['html'])) {
43 44 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
44 // be sure to keep the url in case of error
45 // so we'll be able to refetch it in the future
46 $content['url'] = $content['url'] ?: $url;
47
48 $this->stockEntry($entry, $content);
49 }
50
51 /**
52 * Import entry using either fetched or provided content.
53 *
54 * @param Entry $entry Entry to update
55 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
56 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
57 */
58 public function importEntry(Entry $entry, array $content, $disableContentUpdate = false)
59 {
60 try {
61 $this->validateContent($content);
62 } catch (\Exception $e) {
63 // validation failed but do we want to disable updating content?
64 if (true === $disableContentUpdate) {
65 throw $e;
66 }
67 } 45 }
68 46
69 if (false === $disableContentUpdate) { 47 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
70 try { 48 try {
71 $fetchedContent = $this->graby->fetchContent($content['url']); 49 $fetchedContent = $this->graby->fetchContent($url);
72 } catch (\Exception $e) { 50 } catch (\Exception $e) {
73 $this->logger->error('Error while trying to fetch content from URL.', [ 51 $this->logger->error('Error while trying to fetch content from URL.', [
74 'entry_url' => $content['url'], 52 'entry_url' => $url,
75 'error_msg' => $e->getMessage(), 53 'error_msg' => $e->getMessage(),
76 ]); 54 ]);
77 } 55 }
78 56
79 // when content is imported, we have information in $content 57 // when content is imported, we have information in $content
80 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 58 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
81 if ($fetchedContent['html'] !== $this->fetchingErrorMessage) { 59 if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) {
82 $content = $fetchedContent; 60 $content = $fetchedContent;
83 } 61 }
84 } 62 }
85 63
64 // be sure to keep the url in case of error
65 // so we'll be able to refetch it in the future
66 $content['url'] = !empty($content['url']) ? $content['url'] : $url;
67
86 $this->stockEntry($entry, $content); 68 $this->stockEntry($entry, $content);
87 } 69 }
88 70
@@ -126,7 +108,7 @@ class ContentProxy
126 try { 108 try {
127 $entry->setPublishedAt(new \DateTime($date)); 109 $entry->setPublishedAt(new \DateTime($date));
128 } catch (\Exception $e) { 110 } catch (\Exception $e) {
129 $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); 111 $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]);
130 } 112 }
131 } 113 }
132 114
@@ -170,19 +152,11 @@ class ContentProxy
170 * Validate that the given content has at least a title, an html and a url. 152 * Validate that the given content has at least a title, an html and a url.
171 * 153 *
172 * @param array $content 154 * @param array $content
155 *
156 * @return bool true if valid otherwise false
173 */ 157 */
174 private function validateContent(array $content) 158 private function validateContent(array $content)
175 { 159 {
176 if (empty($content['title'])) { 160 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
177 throw new Exception('Missing title from imported entry!');
178 }
179
180 if (empty($content['url'])) {
181 throw new Exception('Missing URL from imported entry!');
182 }
183
184 if (empty($content['html'])) {
185 throw new Exception('Missing html from imported entry!');
186 }
187 } 161 }
188} 162}
diff --git a/src/Wallabag/ImportBundle/Import/AbstractImport.php b/src/Wallabag/ImportBundle/Import/AbstractImport.php
index bf568a1a..9f3d822a 100644
--- a/src/Wallabag/ImportBundle/Import/AbstractImport.php
+++ b/src/Wallabag/ImportBundle/Import/AbstractImport.php
@@ -115,14 +115,11 @@ abstract class AbstractImport implements ImportInterface
115 */ 115 */
116 protected function fetchContent(Entry $entry, $url, array $content = []) 116 protected function fetchContent(Entry $entry, $url, array $content = [])
117 { 117 {
118 // be sure to set at least the given url
119 $content['url'] = isset($content['url']) ? $content['url'] : $url;
120
121 try { 118 try {
122 $this->contentProxy->importEntry($entry, $content, $this->disableContentUpdate); 119 $this->contentProxy->updateEntry($entry, $url, $content, $this->disableContentUpdate);
123 } catch (\Exception $e) { 120 } catch (\Exception $e) {
124 $this->logger->error('Error trying to import an entry.', [ 121 $this->logger->error('Error trying to import an entry.', [
125 'entry_url' => $content['url'], 122 'entry_url' => $url,
126 'error_msg' => $e->getMessage(), 123 'error_msg' => $e->getMessage(),
127 ]); 124 ]);
128 } 125 }