aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/ContentProxy.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php52
1 files changed, 35 insertions, 17 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index 31953f12..5901df8b 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -47,13 +47,18 @@ class ContentProxy
47 */ 47 */
48 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) 48 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
49 { 49 {
50 $this->graby->toggleImgNoReferrer(true);
50 if (!empty($content['html'])) { 51 if (!empty($content['html'])) {
51 $content['html'] = $this->graby->cleanupHtml($content['html'], $url); 52 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
52 } 53 }
53 54
54 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { 55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
55 $fetchedContent = $this->graby->fetchContent($url); 56 $fetchedContent = $this->graby->fetchContent($url);
56 $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); 57
58 $fetchedContent['title'] = $this->sanitizeContentTitle(
59 $fetchedContent['title'],
60 isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
61 );
57 62
58 // when content is imported, we have information in $content 63 // when content is imported, we have information in $content
59 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 64 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
@@ -73,6 +78,8 @@ class ContentProxy
73 $entry->setUrl($url); 78 $entry->setUrl($url);
74 } 79 }
75 80
81 $entry->setGivenUrl($url);
82
76 $this->stockEntry($entry, $content); 83 $this->stockEntry($entry, $content);
77 } 84 }
78 85
@@ -187,8 +194,8 @@ class ContentProxy
187 /** 194 /**
188 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. 195 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
189 * 196 *
190 * @param $title 197 * @param string $title
191 * @param $contentType 198 * @param string $contentType
192 * 199 *
193 * @return string 200 * @return string
194 */ 201 */
@@ -252,16 +259,14 @@ class ContentProxy
252 259
253 if (!empty($content['title'])) { 260 if (!empty($content['title'])) {
254 $entry->setTitle($content['title']); 261 $entry->setTitle($content['title']);
255 } elseif (!empty($content['open_graph']['og_title'])) {
256 $entry->setTitle($content['open_graph']['og_title']);
257 } 262 }
258 263
259 if (empty($content['html'])) { 264 if (empty($content['html'])) {
260 $content['html'] = $this->fetchingErrorMessage; 265 $content['html'] = $this->fetchingErrorMessage;
261 266
262 if (!empty($content['open_graph']['og_description'])) { 267 if (!empty($content['description'])) {
263 $content['html'] .= '<p><i>But we found a short description: </i></p>'; 268 $content['html'] .= '<p><i>But we found a short description: </i></p>';
264 $content['html'] .= $content['open_graph']['og_description']; 269 $content['html'] .= $content['description'];
265 } 270 }
266 } 271 }
267 272
@@ -276,8 +281,8 @@ class ContentProxy
276 $entry->setPublishedBy($content['authors']); 281 $entry->setPublishedBy($content['authors']);
277 } 282 }
278 283
279 if (!empty($content['all_headers']) && $this->storeArticleHeaders) { 284 if (!empty($content['headers'])) {
280 $entry->setHeaders($content['all_headers']); 285 $entry->setHeaders($content['headers']);
281 } 286 }
282 287
283 if (!empty($content['date'])) { 288 if (!empty($content['date'])) {
@@ -288,17 +293,30 @@ class ContentProxy
288 $this->updateLanguage($entry, $content['language']); 293 $this->updateLanguage($entry, $content['language']);
289 } 294 }
290 295
291 if (!empty($content['open_graph']['og_image'])) { 296 $previewPictureUrl = '';
292 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 297 if (!empty($content['image'])) {
298 $previewPictureUrl = $content['image'];
293 } 299 }
294 300
295 // if content is an image, define it as a preview too 301 // if content is an image, define it as a preview too
296 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 302 if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
297 $this->updatePreviewPicture($entry, $content['url']); 303 $previewPictureUrl = $content['url'];
304 } elseif (empty($previewPictureUrl)) {
305 $this->logger->debug('Extracting images from content to provide a default preview picture');
306 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
307 $this->logger->debug(\count($imagesUrls) . ' pictures found');
308
309 if (!empty($imagesUrls)) {
310 $previewPictureUrl = $imagesUrls[0];
311 }
312 }
313
314 if (!empty($content['headers']['content-type'])) {
315 $entry->setMimetype($content['headers']['content-type']);
298 } 316 }
299 317
300 if (!empty($content['content_type'])) { 318 if (!empty($previewPictureUrl)) {
301 $entry->setMimetype($content['content_type']); 319 $this->updatePreviewPicture($entry, $previewPictureUrl);
302 } 320 }
303 321
304 try { 322 try {