diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 52 |
1 files changed, 35 insertions, 17 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 31953f12..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; | |||
12 | use Wallabag\CoreBundle\Tools\Utils; | 12 | use Wallabag\CoreBundle\Tools\Utils; |
13 | 13 | ||
14 | /** | 14 | /** |
15 | * This kind of proxy class take care of getting the content from an url | 15 | * This kind of proxy class takes care of getting the content from an url |
16 | * and update the entry with what it found. | 16 | * and updates the entry with what it found. |
17 | */ | 17 | */ |
18 | class ContentProxy | 18 | class ContentProxy |
19 | { | 19 | { |
@@ -47,13 +47,18 @@ class ContentProxy | |||
47 | */ | 47 | */ |
48 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) | 48 | public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) |
49 | { | 49 | { |
50 | $this->graby->toggleImgNoReferrer(true); | ||
50 | if (!empty($content['html'])) { | 51 | if (!empty($content['html'])) { |
51 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); | 52 | $content['html'] = $this->graby->cleanupHtml($content['html'], $url); |
52 | } | 53 | } |
53 | 54 | ||
54 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { | 55 | if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { |
55 | $fetchedContent = $this->graby->fetchContent($url); | 56 | $fetchedContent = $this->graby->fetchContent($url); |
56 | $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); | 57 | |
58 | $fetchedContent['title'] = $this->sanitizeContentTitle( | ||
59 | $fetchedContent['title'], | ||
60 | isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : '' | ||
61 | ); | ||
57 | 62 | ||
58 | // when content is imported, we have information in $content | 63 | // when content is imported, we have information in $content |
59 | // in case fetching content goes bad, we'll keep the imported information instead of overriding them | 64 | // in case fetching content goes bad, we'll keep the imported information instead of overriding them |
@@ -73,6 +78,8 @@ class ContentProxy | |||
73 | $entry->setUrl($url); | 78 | $entry->setUrl($url); |
74 | } | 79 | } |
75 | 80 | ||
81 | $entry->setGivenUrl($url); | ||
82 | |||
76 | $this->stockEntry($entry, $content); | 83 | $this->stockEntry($entry, $content); |
77 | } | 84 | } |
78 | 85 | ||
@@ -187,8 +194,8 @@ class ContentProxy | |||
187 | /** | 194 | /** |
188 | * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. | 195 | * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. |
189 | * | 196 | * |
190 | * @param $title | 197 | * @param string $title |
191 | * @param $contentType | 198 | * @param string $contentType |
192 | * | 199 | * |
193 | * @return string | 200 | * @return string |
194 | */ | 201 | */ |
@@ -252,16 +259,14 @@ class ContentProxy | |||
252 | 259 | ||
253 | if (!empty($content['title'])) { | 260 | if (!empty($content['title'])) { |
254 | $entry->setTitle($content['title']); | 261 | $entry->setTitle($content['title']); |
255 | } elseif (!empty($content['open_graph']['og_title'])) { | ||
256 | $entry->setTitle($content['open_graph']['og_title']); | ||
257 | } | 262 | } |
258 | 263 | ||
259 | if (empty($content['html'])) { | 264 | if (empty($content['html'])) { |
260 | $content['html'] = $this->fetchingErrorMessage; | 265 | $content['html'] = $this->fetchingErrorMessage; |
261 | 266 | ||
262 | if (!empty($content['open_graph']['og_description'])) { | 267 | if (!empty($content['description'])) { |
263 | $content['html'] .= '<p><i>But we found a short description: </i></p>'; | 268 | $content['html'] .= '<p><i>But we found a short description: </i></p>'; |
264 | $content['html'] .= $content['open_graph']['og_description']; | 269 | $content['html'] .= $content['description']; |
265 | } | 270 | } |
266 | } | 271 | } |
267 | 272 | ||
@@ -276,8 +281,8 @@ class ContentProxy | |||
276 | $entry->setPublishedBy($content['authors']); | 281 | $entry->setPublishedBy($content['authors']); |
277 | } | 282 | } |
278 | 283 | ||
279 | if (!empty($content['all_headers']) && $this->storeArticleHeaders) { | 284 | if (!empty($content['headers'])) { |
280 | $entry->setHeaders($content['all_headers']); | 285 | $entry->setHeaders($content['headers']); |
281 | } | 286 | } |
282 | 287 | ||
283 | if (!empty($content['date'])) { | 288 | if (!empty($content['date'])) { |
@@ -288,17 +293,30 @@ class ContentProxy | |||
288 | $this->updateLanguage($entry, $content['language']); | 293 | $this->updateLanguage($entry, $content['language']); |
289 | } | 294 | } |
290 | 295 | ||
291 | if (!empty($content['open_graph']['og_image'])) { | 296 | $previewPictureUrl = ''; |
292 | $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); | 297 | if (!empty($content['image'])) { |
298 | $previewPictureUrl = $content['image']; | ||
293 | } | 299 | } |
294 | 300 | ||
295 | // if content is an image, define it as a preview too | 301 | // if content is an image, define it as a preview too |
296 | if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 302 | if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
297 | $this->updatePreviewPicture($entry, $content['url']); | 303 | $previewPictureUrl = $content['url']; |
304 | } elseif (empty($previewPictureUrl)) { | ||
305 | $this->logger->debug('Extracting images from content to provide a default preview picture'); | ||
306 | $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); | ||
307 | $this->logger->debug(\count($imagesUrls) . ' pictures found'); | ||
308 | |||
309 | if (!empty($imagesUrls)) { | ||
310 | $previewPictureUrl = $imagesUrls[0]; | ||
311 | } | ||
312 | } | ||
313 | |||
314 | if (!empty($content['headers']['content-type'])) { | ||
315 | $entry->setMimetype($content['headers']['content-type']); | ||
298 | } | 316 | } |
299 | 317 | ||
300 | if (!empty($content['content_type'])) { | 318 | if (!empty($previewPictureUrl)) { |
301 | $entry->setMimetype($content['content_type']); | 319 | $this->updatePreviewPicture($entry, $previewPictureUrl); |
302 | } | 320 | } |
303 | 321 | ||
304 | try { | 322 | try { |