diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 48 |
1 files changed, 27 insertions, 21 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 8ba77ca9..bfaa1976 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -31,27 +31,20 @@ class ContentProxy | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Fetch content using graby and hydrate given $entry with results information. | 34 | * Update entry using either fetched or provided content. |
35 | * In case we couldn't find content, we'll try to use Open Graph data. | ||
36 | * | 35 | * |
37 | * We can also force the content, in case of an import from the v1 for example, so the function won't | 36 | * @param Entry $entry Entry to update |
38 | * fetch the content from the website but rather use information given with the $content parameter. | 37 | * @param string $url Url of the content |
39 | * | 38 | * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url |
40 | * @param Entry $entry Entry to update | 39 | * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby |
41 | * @param string $url Url to grab content for | ||
42 | * @param array $content An array with AT LEAST keys title, html, url to skip the fetchContent from the url | ||
43 | * | ||
44 | * @return Entry | ||
45 | */ | 40 | */ |
46 | public function updateEntry(Entry $entry, $url, array $content = []) | 41 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
47 | { | 42 | { |
48 | // ensure content is a bit cleaned up | ||
49 | if (!empty($content['html'])) { | 43 | if (!empty($content['html'])) { |
50 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); | 44 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
51 | } | 45 | } |
52 | 46 | ||
53 | // do we have to fetch the content or the provided one is ok? | 47 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { |
54 | if (empty($content) || false === $this->validateContent($content)) { | ||
55 | $fetchedContent = $this->graby->fetchContent($url); | 48 | $fetchedContent = $this->graby->fetchContent($url); |
56 | 49 | ||
57 | // when content is imported, we have information in $content | 50 | // when content is imported, we have information in $content |
@@ -61,6 +54,22 @@ class ContentProxy | |||
61 | } | 54 | } |
62 | } | 55 | } |
63 | 56 | ||
57 | // be sure to keep the url in case of error | ||
58 | // so we'll be able to refetch it in the future | ||
59 | $content['url'] = !empty($content['url']) ? $content['url'] : $url; | ||
60 | |||
61 | $this->stockEntry($entry, $content); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * Stock entry with fetched or imported content. | ||
66 | * Will fall back to OpenGraph data if available. | ||
67 | * | ||
68 | * @param Entry $entry Entry to stock | ||
69 | * @param array $content Array with at least title, url & html | ||
70 | */ | ||
71 | private function stockEntry(Entry $entry, array $content) | ||
72 | { | ||
64 | $title = $content['title']; | 73 | $title = $content['title']; |
65 | if (!$title && !empty($content['open_graph']['og_title'])) { | 74 | if (!$title && !empty($content['open_graph']['og_title'])) { |
66 | $title = $content['open_graph']['og_title']; | 75 | $title = $content['open_graph']['og_title']; |
@@ -76,7 +85,7 @@ class ContentProxy | |||
76 | } | 85 | } |
77 | } | 86 | } |
78 | 87 | ||
79 | $entry->setUrl($content['url'] ?: $url); | 88 | $entry->setUrl($content['url']); |
80 | $entry->setTitle($title); | 89 | $entry->setTitle($title); |
81 | $entry->setContent($html); | 90 | $entry->setContent($html); |
82 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); | 91 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); |
@@ -92,7 +101,7 @@ class ContentProxy | |||
92 | try { | 101 | try { |
93 | $entry->setPublishedAt(new \DateTime($date)); | 102 | $entry->setPublishedAt(new \DateTime($date)); |
94 | } catch (\Exception $e) { | 103 | } catch (\Exception $e) { |
95 | $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $url, 'date' => $content['date']]); | 104 | $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]); |
96 | } | 105 | } |
97 | } | 106 | } |
98 | 107 | ||
@@ -126,17 +135,14 @@ class ContentProxy | |||
126 | $this->tagger->tag($entry); | 135 | $this->tagger->tag($entry); |
127 | } catch (\Exception $e) { | 136 | } catch (\Exception $e) { |
128 | $this->logger->error('Error while trying to automatically tag an entry.', [ | 137 | $this->logger->error('Error while trying to automatically tag an entry.', [ |
129 | 'entry_url' => $url, | 138 | 'entry_url' => $content['url'], |
130 | 'error_msg' => $e->getMessage(), | 139 | 'error_msg' => $e->getMessage(), |
131 | ]); | 140 | ]); |
132 | } | 141 | } |
133 | |||
134 | return $entry; | ||
135 | } | 142 | } |
136 | 143 | ||
137 | /** | 144 | /** |
138 | * Validate that the given content as enough value to be used | 145 | * Validate that the given content has at least a title, an html and a url. |
139 | * instead of fetch the content from the url. | ||
140 | * | 146 | * |
141 | * @param array $content | 147 | * @param array $content |
142 | * | 148 | * |