diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 73 |
1 files changed, 40 insertions, 33 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index d38811a2..9c6fa8db 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; | |||
12 | use Wallabag\CoreBundle\Tools\Utils; | 12 | use Wallabag\CoreBundle\Tools\Utils; |
13 | 13 | ||
14 | /** | 14 | /** |
15 | * This kind of proxy class take care of getting the content from an url | 15 | * This kind of proxy class takes care of getting the content from an url |
16 | * and update the entry with what it found. | 16 | * and updates the entry with what it found. |
17 | */ | 17 | */ |
18 | class ContentProxy | 18 | class ContentProxy |
19 | { | 19 | { |
@@ -47,13 +47,18 @@ class ContentProxy | |||
47 | */ | 47 | */ |
48 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) | 48 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
49 | { | 49 | { |
50 | $this->graby->toggleImgNoReferrer(true); | ||
50 | if (!empty($content['html'])) { | 51 | if (!empty($content['html'])) { |
51 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); | 52 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
52 | } | 53 | } |
53 | 54 | ||
54 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { | 55 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { |
55 | $fetchedContent = $this->graby->fetchContent($url); | 56 | $fetchedContent = $this->graby->fetchContent($url); |
56 | $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); | 57 | |
58 | $fetchedContent['title'] = $this->sanitizeContentTitle( | ||
59 | $fetchedContent['title'], | ||
60 | isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : '' | ||
61 | ); | ||
57 | 62 | ||
58 | // when content is imported, we have information in $content | 63 | // when content is imported, we have information in $content |
59 | // in case fetching content goes bad, we'll keep the imported information instead of overriding them | 64 | // in case fetching content goes bad, we'll keep the imported information instead of overriding them |
@@ -73,13 +78,14 @@ class ContentProxy | |||
73 | $entry->setUrl($url); | 78 | $entry->setUrl($url); |
74 | } | 79 | } |
75 | 80 | ||
81 | $entry->setGivenUrl($url); | ||
82 | |||
76 | $this->stockEntry($entry, $content); | 83 | $this->stockEntry($entry, $content); |
77 | } | 84 | } |
78 | 85 | ||
79 | /** | 86 | /** |
80 | * Use a Symfony validator to ensure the language is well formatted. | 87 | * Use a Symfony validator to ensure the language is well formatted. |
81 | * | 88 | * |
82 | * @param Entry $entry | ||
83 | * @param string $value Language to validate and save | 89 | * @param string $value Language to validate and save |
84 | */ | 90 | */ |
85 | public function updateLanguage(Entry $entry, $value) | 91 | public function updateLanguage(Entry $entry, $value) |
@@ -105,7 +111,6 @@ class ContentProxy | |||
105 | /** | 111 | /** |
106 | * Use a Symfony validator to ensure the preview picture is a real url. | 112 | * Use a Symfony validator to ensure the preview picture is a real url. |
107 | * | 113 | * |
108 | * @param Entry $entry | ||
109 | * @param string $value URL to validate and save | 114 | * @param string $value URL to validate and save |
110 | */ | 115 | */ |
111 | public function updatePreviewPicture(Entry $entry, $value) | 116 | public function updatePreviewPicture(Entry $entry, $value) |
@@ -127,7 +132,6 @@ class ContentProxy | |||
127 | /** | 132 | /** |
128 | * Update date. | 133 | * Update date. |
129 | * | 134 | * |
130 | * @param Entry $entry | ||
131 | * @param string $value Date to validate and save | 135 | * @param string $value Date to validate and save |
132 | */ | 136 | */ |
133 | public function updatePublishedAt(Entry $entry, $value) | 137 | public function updatePublishedAt(Entry $entry, $value) |
@@ -154,8 +158,6 @@ class ContentProxy | |||
154 | 158 | ||
155 | /** | 159 | /** |
156 | * Helper to extract and save host from entry url. | 160 | * Helper to extract and save host from entry url. |
157 | * | ||
158 | * @param Entry $entry | ||
159 | */ | 161 | */ |
160 | public function setEntryDomainName(Entry $entry) | 162 | public function setEntryDomainName(Entry $entry) |
161 | { | 163 | { |
@@ -169,8 +171,6 @@ class ContentProxy | |||
169 | * Helper to set a default title using: | 171 | * Helper to set a default title using: |
170 | * - url basename, if applicable | 172 | * - url basename, if applicable |
171 | * - hostname. | 173 | * - hostname. |
172 | * | ||
173 | * @param Entry $entry | ||
174 | */ | 174 | */ |
175 | public function setDefaultEntryTitle(Entry $entry) | 175 | public function setDefaultEntryTitle(Entry $entry) |
176 | { | 176 | { |
@@ -187,8 +187,8 @@ class ContentProxy | |||
187 | /** | 187 | /** |
188 | * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. | 188 | * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. |
189 | * | 189 | * |
190 | * @param $title | 190 | * @param string $title |
191 | * @param $contentType | 191 | * @param string $contentType |
192 | * | 192 | * |
193 | * @return string | 193 | * @return string |
194 | */ | 194 | */ |
@@ -252,22 +252,19 @@ class ContentProxy | |||
252 | 252 | ||
253 | if (!empty($content['title'])) { | 253 | if (!empty($content['title'])) { |
254 | $entry->setTitle($content['title']); | 254 | $entry->setTitle($content['title']); |
255 | } elseif (!empty($content['open_graph']['og_title'])) { | ||
256 | $entry->setTitle($content['open_graph']['og_title']); | ||
257 | } | 255 | } |
258 | 256 | ||
259 | $html = $content['html']; | 257 | if (empty($content['html'])) { |
260 | if (false === $html) { | 258 | $content['html'] = $this->fetchingErrorMessage; |
261 | $html = $this->fetchingErrorMessage; | ||
262 | 259 | ||
263 | if (!empty($content['open_graph']['og_description'])) { | 260 | if (!empty($content['description'])) { |
264 | $html .= '<p><i>But we found a short description: </i></p>'; | 261 | $content['html'] .= '<p><i>But we found a short description: </i></p>'; |
265 | $html .= $content['open_graph']['og_description']; | 262 | $content['html'] .= $content['description']; |
266 | } | 263 | } |
267 | } | 264 | } |
268 | 265 | ||
269 | $entry->setContent($html); | 266 | $entry->setContent($content['html']); |
270 | $entry->setReadingTime(Utils::getReadingTime($html)); | 267 | $entry->setReadingTime(Utils::getReadingTime($content['html'])); |
271 | 268 | ||
272 | if (!empty($content['status'])) { | 269 | if (!empty($content['status'])) { |
273 | $entry->setHttpStatus($content['status']); | 270 | $entry->setHttpStatus($content['status']); |
@@ -277,8 +274,8 @@ class ContentProxy | |||
277 | $entry->setPublishedBy($content['authors']); | 274 | $entry->setPublishedBy($content['authors']); |
278 | } | 275 | } |
279 | 276 | ||
280 | if (!empty($content['all_headers']) && $this->storeArticleHeaders) { | 277 | if (!empty($content['headers'])) { |
281 | $entry->setHeaders($content['all_headers']); | 278 | $entry->setHeaders($content['headers']); |
282 | } | 279 | } |
283 | 280 | ||
284 | if (!empty($content['date'])) { | 281 | if (!empty($content['date'])) { |
@@ -289,17 +286,30 @@ class ContentProxy | |||
289 | $this->updateLanguage($entry, $content['language']); | 286 | $this->updateLanguage($entry, $content['language']); |
290 | } | 287 | } |
291 | 288 | ||
292 | if (!empty($content['open_graph']['og_image'])) { | 289 | $previewPictureUrl = ''; |
293 | $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); | 290 | if (!empty($content['image'])) { |
291 | $previewPictureUrl = $content['image']; | ||
294 | } | 292 | } |
295 | 293 | ||
296 | // if content is an image, define it as a preview too | 294 | // if content is an image, define it as a preview too |
297 | if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 295 | if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
298 | $this->updatePreviewPicture($entry, $content['url']); | 296 | $previewPictureUrl = $content['url']; |
297 | } elseif (empty($previewPictureUrl)) { | ||
298 | $this->logger->debug('Extracting images from content to provide a default preview picture'); | ||
299 | $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); | ||
300 | $this->logger->debug(\count($imagesUrls) . ' pictures found'); | ||
301 | |||
302 | if (!empty($imagesUrls)) { | ||
303 | $previewPictureUrl = $imagesUrls[0]; | ||
304 | } | ||
305 | } | ||
306 | |||
307 | if (!empty($content['headers']['content-type'])) { | ||
308 | $entry->setMimetype($content['headers']['content-type']); | ||
299 | } | 309 | } |
300 | 310 | ||
301 | if (!empty($content['content_type'])) { | 311 | if (!empty($previewPictureUrl)) { |
302 | $entry->setMimetype($content['content_type']); | 312 | $this->updatePreviewPicture($entry, $previewPictureUrl); |
303 | } | 313 | } |
304 | 314 | ||
305 | try { | 315 | try { |
@@ -316,7 +326,6 @@ class ContentProxy | |||
316 | * Update the origin_url field when a redirection occurs | 326 | * Update the origin_url field when a redirection occurs |
317 | * This field is set if it is empty and new url does not match ignore list. | 327 | * This field is set if it is empty and new url does not match ignore list. |
318 | * | 328 | * |
319 | * @param Entry $entry | ||
320 | * @param string $url | 329 | * @param string $url |
321 | */ | 330 | */ |
322 | private function updateOriginUrl(Entry $entry, $url) | 331 | private function updateOriginUrl(Entry $entry, $url) |
@@ -424,8 +433,6 @@ class ContentProxy | |||
424 | /** | 433 | /** |
425 | * Validate that the given content has at least a title, an html and a url. | 434 | * Validate that the given content has at least a title, an html and a url. |
426 | * | 435 | * |
427 | * @param array $content | ||
428 | * | ||
429 | * @return bool true if valid otherwise false | 436 | * @return bool true if valid otherwise false |
430 | */ | 437 | */ |
431 | private function validateContent(array $content) | 438 | private function validateContent(array $content) |