diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 74 |
1 files changed, 48 insertions, 26 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 4b3e6fbb..bfaa1976 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -31,22 +31,20 @@ class ContentProxy | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Fetch content using graby and hydrate given entry with results information. | 34 | * Update entry using either fetched or provided content. |
35 | * In case we couldn't find content, we'll try to use Open Graph data. | ||
36 | * | 35 | * |
37 | * We can also force the content, in case of an import from the v1 for example, so the function won't | 36 | * @param Entry $entry Entry to update |
38 | * fetch the content from the website but rather use information given with the $content parameter. | 37 | * @param string $url Url of the content |
39 | * | 38 | * @param array $content Array with content provided for import with AT LEAST keys title, html, url to skip the fetchContent from the url |
40 | * @param Entry $entry Entry to update | 39 | * @param bool $disableContentUpdate Whether to skip trying to fetch content using Graby |
41 | * @param string $url Url to grab content for | ||
42 | * @param array $content An array with AT LEAST keys title, html, url, language & content_type to skip the fetchContent from the url | ||
43 | * | ||
44 | * @return Entry | ||
45 | */ | 40 | */ |
46 | public function updateEntry(Entry $entry, $url, array $content = []) | 41 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
47 | { | 42 | { |
48 | // do we have to fetch the content or the provided one is ok? | 43 | if (!empty($content['html'])) { |
49 | if (empty($content) || false === $this->validateContent($content)) { | 44 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
45 | } | ||
46 | |||
47 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { | ||
50 | $fetchedContent = $this->graby->fetchContent($url); | 48 | $fetchedContent = $this->graby->fetchContent($url); |
51 | 49 | ||
52 | // when content is imported, we have information in $content | 50 | // when content is imported, we have information in $content |
@@ -56,8 +54,24 @@ class ContentProxy | |||
56 | } | 54 | } |
57 | } | 55 | } |
58 | 56 | ||
57 | // be sure to keep the url in case of error | ||
58 | // so we'll be able to refetch it in the future | ||
59 | $content['url'] = !empty($content['url']) ? $content['url'] : $url; | ||
60 | |||
61 | $this->stockEntry($entry, $content); | ||
62 | } | ||
63 | |||
64 | /** | ||
65 | * Stock entry with fetched or imported content. | ||
66 | * Will fall back to OpenGraph data if available. | ||
67 | * | ||
68 | * @param Entry $entry Entry to stock | ||
69 | * @param array $content Array with at least title, url & html | ||
70 | */ | ||
71 | private function stockEntry(Entry $entry, array $content) | ||
72 | { | ||
59 | $title = $content['title']; | 73 | $title = $content['title']; |
60 | if (!$title && isset($content['open_graph']['og_title'])) { | 74 | if (!$title && !empty($content['open_graph']['og_title'])) { |
61 | $title = $content['open_graph']['og_title']; | 75 | $title = $content['open_graph']['og_title']; |
62 | } | 76 | } |
63 | 77 | ||
@@ -65,19 +79,30 @@ class ContentProxy | |||
65 | if (false === $html) { | 79 | if (false === $html) { |
66 | $html = $this->fetchingErrorMessage; | 80 | $html = $this->fetchingErrorMessage; |
67 | 81 | ||
68 | if (isset($content['open_graph']['og_description'])) { | 82 | if (!empty($content['open_graph']['og_description'])) { |
69 | $html .= '<p><i>But we found a short description: </i></p>'; | 83 | $html .= '<p><i>But we found a short description: </i></p>'; |
70 | $html .= $content['open_graph']['og_description']; | 84 | $html .= $content['open_graph']['og_description']; |
71 | } | 85 | } |
72 | } | 86 | } |
73 | 87 | ||
74 | $entry->setUrl($content['url'] ?: $url); | 88 | $entry->setUrl($content['url']); |
75 | $entry->setTitle($title); | 89 | $entry->setTitle($title); |
76 | $entry->setContent($html); | 90 | $entry->setContent($html); |
77 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); | 91 | $entry->setHttpStatus(isset($content['status']) ? $content['status'] : ''); |
78 | 92 | ||
79 | if (isset($content['date']) && null !== $content['date'] && '' !== $content['date']) { | 93 | if (!empty($content['date'])) { |
80 | $entry->setPublishedAt(new \DateTime($content['date'])); | 94 | $date = $content['date']; |
95 | |||
96 | // is it a timestamp? | ||
97 | if (filter_var($date, FILTER_VALIDATE_INT) !== false) { | ||
98 | $date = '@'.$content['date']; | ||
99 | } | ||
100 | |||
101 | try { | ||
102 | $entry->setPublishedAt(new \DateTime($date)); | ||
103 | } catch (\Exception $e) { | ||
104 | $this->logger->warning('Error while defining date', ['e' => $e, 'url' => $content['url'], 'date' => $content['date']]); | ||
105 | } | ||
81 | } | 106 | } |
82 | 107 | ||
83 | if (!empty($content['authors'])) { | 108 | if (!empty($content['authors'])) { |
@@ -97,12 +122,12 @@ class ContentProxy | |||
97 | $entry->setDomainName($domainName); | 122 | $entry->setDomainName($domainName); |
98 | } | 123 | } |
99 | 124 | ||
100 | if (isset($content['open_graph']['og_image']) && $content['open_graph']['og_image']) { | 125 | if (!empty($content['open_graph']['og_image'])) { |
101 | $entry->setPreviewPicture($content['open_graph']['og_image']); | 126 | $entry->setPreviewPicture($content['open_graph']['og_image']); |
102 | } | 127 | } |
103 | 128 | ||
104 | // if content is an image define as a preview too | 129 | // if content is an image define as a preview too |
105 | if (isset($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 130 | if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
106 | $entry->setPreviewPicture($content['url']); | 131 | $entry->setPreviewPicture($content['url']); |
107 | } | 132 | } |
108 | 133 | ||
@@ -110,17 +135,14 @@ class ContentProxy | |||
110 | $this->tagger->tag($entry); | 135 | $this->tagger->tag($entry); |
111 | } catch (\Exception $e) { | 136 | } catch (\Exception $e) { |
112 | $this->logger->error('Error while trying to automatically tag an entry.', [ | 137 | $this->logger->error('Error while trying to automatically tag an entry.', [ |
113 | 'entry_url' => $url, | 138 | 'entry_url' => $content['url'], |
114 | 'error_msg' => $e->getMessage(), | 139 | 'error_msg' => $e->getMessage(), |
115 | ]); | 140 | ]); |
116 | } | 141 | } |
117 | |||
118 | return $entry; | ||
119 | } | 142 | } |
120 | 143 | ||
121 | /** | 144 | /** |
122 | * Validate that the given content as enough value to be used | 145 | * Validate that the given content has at least a title, an html and a url. |
123 | * instead of fetch the content from the url. | ||
124 | * | 146 | * |
125 | * @param array $content | 147 | * @param array $content |
126 | * | 148 | * |
@@ -128,6 +150,6 @@ class ContentProxy | |||
128 | */ | 150 | */ |
129 | private function validateContent(array $content) | 151 | private function validateContent(array $content) |
130 | { | 152 | { |
131 | return isset($content['title']) && isset($content['html']) && isset($content['url']) && isset($content['language']) && isset($content['content_type']); | 153 | return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); |
132 | } | 154 | } |
133 | } | 155 | } |