aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/ContentProxy.php
diff options
context:
space:
mode:
authorJerome Charaoui <jerome@riseup.net>2016-12-07 15:16:49 -0500
committerJeremy Benoist <jbenoist@20minutes.fr>2017-06-01 09:48:14 +0200
commitd0e9b3d640acce49068d1a2c5603b92c1bda363e (patch)
tree1b992438ca153c18596f88ee7bec7d98b8984264 /src/Wallabag/CoreBundle/Helper/ContentProxy.php
parent1c5da417e4ddb14223f9af6e5cea6778e5c0fd08 (diff)
downloadwallabag-d0e9b3d640acce49068d1a2c5603b92c1bda363e.tar.gz
wallabag-d0e9b3d640acce49068d1a2c5603b92c1bda363e.tar.zst
wallabag-d0e9b3d640acce49068d1a2c5603b92c1bda363e.zip
Add disableContentUpdate import option
This commit also decouples the "import" and "update" functions inside ContentProxy. If a content array is available, it must be passed to the new importEntry method.
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php76
1 files changed, 54 insertions, 22 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index c73b8eaf..88873bd5 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -7,6 +7,7 @@ use Psr\Log\LoggerInterface;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Tools\Utils; 8use Wallabag\CoreBundle\Tools\Utils;
9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
10use Symfony\Component\Config\Definition\Exception\Exception;
10 11
11/** 12/**
12 * This kind of proxy class take care of getting the content from an url 13 * This kind of proxy class take care of getting the content from an url
@@ -31,34 +32,58 @@ class ContentProxy
31 } 32 }
32 33
33 /** 34 /**
34 * Fetch content using graby and hydrate given $entry with results information. 35 * Update existing entry by fetching from URL using Graby.
35 * In case we couldn't find content, we'll try to use Open Graph data.
36 *
37 * We can also force the content, in case of an import from the v1 for example, so the function won't
38 * fetch the content from the website but rather use information given with the $content parameter.
39 * 36 *
40 * @param Entry $entry Entry to update 37 * @param Entry $entry Entry to update
41 * @param string $url Url to grab content for 38 * @param string $url Url to grab content for
42 * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
43 */ 39 */
44 public function updateEntry(Entry $entry, $url, array $content = []) 40 public function updateEntry(Entry $entry, $url)
45 { 41 {
46 // ensure content is a bit cleaned up 42 $content = $this->graby->fetchContent($url);
47 if (!empty($content['html'])) { 43
48 $content['html'] = $this->graby->cleanupHtml($content['html'], $url); 44 $this->stockEntry($entry, $content);
49 } 45 }
46
47 /**
48 * Import entry using either fetched or provided content.
49 *
50 * @param Entry $entry Entry to update
51 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
52 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
53 */
54 public function importEntry(Entry $entry, array $content, $disableContentUpdate = false)
55 {
56 $this->validateContent($content);
50 57
51 // do we have to fetch the content or the provided one is ok? 58 if (false === $disableContentUpdate) {
52 if (empty($content) || false === $this->validateContent($content)) { 59 try {
53 $fetchedContent = $this->graby->fetchContent($url); 60 $fetchedContent = $this->graby->fetchContent($content['url']);
61 } catch (\Exception $e) {
62 $this->logger->error('Error while trying to fetch content from URL.', [
63 'entry_url' => $content['url'],
64 'error_msg' => $e->getMessage(),
65 ]);
66 }
54 67
55 // when content is imported, we have information in $content 68 // when content is imported, we have information in $content
56 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 69 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
57 if (empty($content) || $fetchedContent['html'] !== $this->fetchingErrorMessage) { 70 if ($fetchedContent['html'] !== $this->fetchingErrorMessage) {
58 $content = $fetchedContent; 71 $content = $fetchedContent;
59 } 72 }
60 } 73 }
61 74
75 $this->stockEntry($entry, $content);
76 }
77
78 /**
79 * Stock entry with fetched or imported content.
80 * Will fall back to OpenGraph data if available.
81 *
82 * @param Entry $entry Entry to stock
83 * @param array $content Array with at least title and URL
84 */
85 private function stockEntry(Entry $entry, array $content)
86 {
62 $title = $content['title']; 87 $title = $content['title'];
63 if (!$title && !empty($content['open_graph']['og_title'])) { 88 if (!$title && !empty($content['open_graph']['og_title'])) {
64 $title = $content['open_graph']['og_title']; 89 $title = $content['open_graph']['og_title'];
@@ -74,7 +99,7 @@ class ContentProxy
74 } 99 }
75 } 100 }
76 101
77 $entry->setUrl($content['url'] ?: $url); 102 $entry->setUrl($content['url']);
78 $entry->setTitle($title); 103 $entry->setTitle($title);
79 $entry->setContent($html); 104 $entry->setContent($html);
80 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); 105 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
@@ -124,22 +149,29 @@ class ContentProxy
124 $this->tagger->tag($entry); 149 $this->tagger->tag($entry);
125 } catch (\Exception $e) { 150 } catch (\Exception $e) {
126 $this->logger->error('Error while trying to automatically tag an entry.', [ 151 $this->logger->error('Error while trying to automatically tag an entry.', [
127 'entry_url' => $url, 152 'entry_url' => $content['url'],
128 'error_msg' => $e->getMessage(), 153 'error_msg' => $e->getMessage(),
129 ]); 154 ]);
130 } 155 }
131 } 156 }
132 157
133 /** 158 /**
134 * Validate that the given content as enough value to be used 159 * Validate that the given content has at least a title, an html and a url.
135 * instead of fetch the content from the url.
136 * 160 *
137 * @param array $content 161 * @param array $content
138 *
139 * @return bool true if valid otherwise false
140 */ 162 */
141 private function validateContent(array $content) 163 private function validateContent(array $content)
142 { 164 {
143 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); 165 if (!empty($content['title']))) {
166 throw new Exception('Missing title from imported entry!');
167 }
168
169 if (!empty($content['url']))) {
170 throw new Exception('Missing URL from imported entry!');
171 }
172
173 if (!empty($content['html']))) {
174 throw new Exception('Missing html from imported entry!');
175 }
144 } 176 }
145} 177}