aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/ContentProxy.php
diff options
context:
space:
mode:
authorJérémy Benoist <j0k3r@users.noreply.github.com>2017-06-02 11:26:37 +0200
committerGitHub <noreply@github.com>2017-06-02 11:26:37 +0200
commita687c8d915276eee0c0494156700f7d0c0606735 (patch)
tree23178dc2407aa1b926b79281c34d8e05b6318d3b /src/Wallabag/CoreBundle/Helper/ContentProxy.php
parent14b8a7c950147d32d7c9782832b87bf2b18b4fd7 (diff)
parentf5924e954730efdb7b9fadf23c0b73b3f5a0a434 (diff)
downloadwallabag-a687c8d915276eee0c0494156700f7d0c0606735.tar.gz
wallabag-a687c8d915276eee0c0494156700f7d0c0606735.tar.zst
wallabag-a687c8d915276eee0c0494156700f7d0c0606735.zip
Merge pull request #2708 from jcharaoui/import-disablecontentupdate
Import disableContentUpdate
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php48
1 files changed, 27 insertions, 21 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index 8ba77ca9..bfaa1976 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -31,27 +31,20 @@ class ContentProxy
31 } 31 }
32 32
33 /** 33 /**
34 * Fetch content using graby and hydrate given $entry with results information. 34 * Update entry using either fetched or provided content.
35 * In case we couldn't find content, we'll try to use Open Graph data.
36 * 35 *
37 * We can also force the content, in case of an import from the v1 for example, so the function won't 36 * @param Entry $entry Entry to update
38 * fetch the content from the website but rather use information given with the $content parameter. 37 * @param string $url Url of the content
39 * 38 * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url
40 * @param Entry $entry Entry to update 39 * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby
41 * @param string $url Url to grab content for
42 * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url
43 *
44 * @return Entry
45 */ 40 */
46 public function updateEntry(Entry $entry, $url, array $content = []) 41 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
47 { 42 {
48 // ensure content is a bit cleaned up
49 if (!empty($content['html'])) { 43 if (!empty($content['html'])) {
50 $content['html'] = $this->graby->cleanupHtml($content['html'], $url); 44 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
51 } 45 }
52 46
53 // do we have to fetch the content or the provided one is ok? 47 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
54 if (empty($content) || false === $this->validateContent($content)) {
55 $fetchedContent = $this->graby->fetchContent($url); 48 $fetchedContent = $this->graby->fetchContent($url);
56 49
57 // when content is imported, we have information in $content 50 // when content is imported, we have information in $content
@@ -61,6 +54,22 @@ class ContentProxy
61 } 54 }
62 } 55 }
63 56
57 // be sure to keep the url in case of error
58 // so we'll be able to refetch it in the future
59 $content['url'] = !empty($content['url']) ? $content['url'] : $url;
60
61 $this->stockEntry($entry, $content);
62 }
63
64 /**
65 * Stock entry with fetched or imported content.
66 * Will fall back to OpenGraph data if available.
67 *
68 * @param Entry $entry Entry to stock
69 * @param array $content Array with at least title, url & html
70 */
71 private function stockEntry(Entry $entry, array $content)
72 {
64 $title = $content['title']; 73 $title = $content['title'];
65 if (!$title && !empty($content['open_graph']['og_title'])) { 74 if (!$title && !empty($content['open_graph']['og_title'])) {
66 $title = $content['open_graph']['og_title']; 75 $title = $content['open_graph']['og_title'];
@@ -76,7 +85,7 @@ class ContentProxy
76 } 85 }
77 } 86 }
78 87
79 $entry->setUrl($content['url'] ?: $url); 88 $entry->setUrl($content['url']);
80 $entry->setTitle($title); 89 $entry->setTitle($title);
81 $entry->setContent($html); 90 $entry->setContent($html);
82 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); 91 $entry->setHttpStatus(isset($content['status']) ? $content['status'] : '');
@@ -92,7 +101,7 @@ class ContentProxy
92 try { 101 try {
93 $entry->setPublishedAt(new \DateTime($date)); 102 $entry->setPublishedAt(new \DateTime($date));
94 } catch (\Exception $e) { 103 } catch (\Exception $e) {
95 $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); 104 $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]);
96 } 105 }
97 } 106 }
98 107
@@ -126,17 +135,14 @@ class ContentProxy
126 $this->tagger->tag($entry); 135 $this->tagger->tag($entry);
127 } catch (\Exception $e) { 136 } catch (\Exception $e) {
128 $this->logger->error('Error while trying to automatically tag an entry.', [ 137 $this->logger->error('Error while trying to automatically tag an entry.', [
129 'entry_url' => $url, 138 'entry_url' => $content['url'],
130 'error_msg' => $e->getMessage(), 139 'error_msg' => $e->getMessage(),
131 ]); 140 ]);
132 } 141 }
133
134 return $entry;
135 } 142 }
136 143
137 /** 144 /**
138 * Validate that the given content as enough value to be used 145 * Validate that the given content has at least a title, an html and a url.
139 * instead of fetch the content from the url.
140 * 146 *
141 * @param array $content 147 * @param array $content
142 * 148 *