aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/ContentProxy.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/ContentProxy.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php73
1 files changed, 40 insertions, 33 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index d38811a2..9c6fa8db 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -47,13 +47,18 @@ class ContentProxy
47 */ 47 */
48 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) 48 public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false)
49 { 49 {
50 $this->graby->toggleImgNoReferrer(true);
50 if (!empty($content['html'])) { 51 if (!empty($content['html'])) {
51 $content['html'] = $this->graby->cleanupHtml($content['html'], $url); 52 $content['html'] = $this->graby->cleanupHtml($content['html'], $url);
52 } 53 }
53 54
54 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { 55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
55 $fetchedContent = $this->graby->fetchContent($url); 56 $fetchedContent = $this->graby->fetchContent($url);
56 $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); 57
58 $fetchedContent['title'] = $this->sanitizeContentTitle(
59 $fetchedContent['title'],
60 isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
61 );
57 62
58 // when content is imported, we have information in $content 63 // when content is imported, we have information in $content
59 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 64 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
@@ -73,13 +78,14 @@ class ContentProxy
73 $entry->setUrl($url); 78 $entry->setUrl($url);
74 } 79 }
75 80
81 $entry->setGivenUrl($url);
82
76 $this->stockEntry($entry, $content); 83 $this->stockEntry($entry, $content);
77 } 84 }
78 85
79 /** 86 /**
80 * Use a Symfony validator to ensure the language is well formatted. 87 * Use a Symfony validator to ensure the language is well formatted.
81 * 88 *
82 * @param Entry $entry
83 * @param string $value Language to validate and save 89 * @param string $value Language to validate and save
84 */ 90 */
85 public function updateLanguage(Entry $entry, $value) 91 public function updateLanguage(Entry $entry, $value)
@@ -105,7 +111,6 @@ class ContentProxy
105 /** 111 /**
106 * Use a Symfony validator to ensure the preview picture is a real url. 112 * Use a Symfony validator to ensure the preview picture is a real url.
107 * 113 *
108 * @param Entry $entry
109 * @param string $value URL to validate and save 114 * @param string $value URL to validate and save
110 */ 115 */
111 public function updatePreviewPicture(Entry $entry, $value) 116 public function updatePreviewPicture(Entry $entry, $value)
@@ -127,7 +132,6 @@ class ContentProxy
127 /** 132 /**
128 * Update date. 133 * Update date.
129 * 134 *
130 * @param Entry $entry
131 * @param string $value Date to validate and save 135 * @param string $value Date to validate and save
132 */ 136 */
133 public function updatePublishedAt(Entry $entry, $value) 137 public function updatePublishedAt(Entry $entry, $value)
@@ -154,8 +158,6 @@ class ContentProxy
154 158
155 /** 159 /**
156 * Helper to extract and save host from entry url. 160 * Helper to extract and save host from entry url.
157 *
158 * @param Entry $entry
159 */ 161 */
160 public function setEntryDomainName(Entry $entry) 162 public function setEntryDomainName(Entry $entry)
161 { 163 {
@@ -169,8 +171,6 @@ class ContentProxy
169 * Helper to set a default title using: 171 * Helper to set a default title using:
170 * - url basename, if applicable 172 * - url basename, if applicable
171 * - hostname. 173 * - hostname.
172 *
173 * @param Entry $entry
174 */ 174 */
175 public function setDefaultEntryTitle(Entry $entry) 175 public function setDefaultEntryTitle(Entry $entry)
176 { 176 {
@@ -187,8 +187,8 @@ class ContentProxy
187 /** 187 /**
188 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. 188 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
189 * 189 *
190 * @param $title 190 * @param string $title
191 * @param $contentType 191 * @param string $contentType
192 * 192 *
193 * @return string 193 * @return string
194 */ 194 */
@@ -252,22 +252,19 @@ class ContentProxy
252 252
253 if (!empty($content['title'])) { 253 if (!empty($content['title'])) {
254 $entry->setTitle($content['title']); 254 $entry->setTitle($content['title']);
255 } elseif (!empty($content['open_graph']['og_title'])) {
256 $entry->setTitle($content['open_graph']['og_title']);
257 } 255 }
258 256
259 $html = $content['html']; 257 if (empty($content['html'])) {
260 if (false === $html) { 258 $content['html'] = $this->fetchingErrorMessage;
261 $html = $this->fetchingErrorMessage;
262 259
263 if (!empty($content['open_graph']['og_description'])) { 260 if (!empty($content['description'])) {
264 $html .= '<p><i>But we found a short description: </i></p>'; 261 $content['html'] .= '<p><i>But we found a short description: </i></p>';
265 $html .= $content['open_graph']['og_description']; 262 $content['html'] .= $content['description'];
266 } 263 }
267 } 264 }
268 265
269 $entry->setContent($html); 266 $entry->setContent($content['html']);
270 $entry->setReadingTime(Utils::getReadingTime($html)); 267 $entry->setReadingTime(Utils::getReadingTime($content['html']));
271 268
272 if (!empty($content['status'])) { 269 if (!empty($content['status'])) {
273 $entry->setHttpStatus($content['status']); 270 $entry->setHttpStatus($content['status']);
@@ -277,8 +274,8 @@ class ContentProxy
277 $entry->setPublishedBy($content['authors']); 274 $entry->setPublishedBy($content['authors']);
278 } 275 }
279 276
280 if (!empty($content['all_headers']) && $this->storeArticleHeaders) { 277 if (!empty($content['headers'])) {
281 $entry->setHeaders($content['all_headers']); 278 $entry->setHeaders($content['headers']);
282 } 279 }
283 280
284 if (!empty($content['date'])) { 281 if (!empty($content['date'])) {
@@ -289,17 +286,30 @@ class ContentProxy
289 $this->updateLanguage($entry, $content['language']); 286 $this->updateLanguage($entry, $content['language']);
290 } 287 }
291 288
292 if (!empty($content['open_graph']['og_image'])) { 289 $previewPictureUrl = '';
293 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 290 if (!empty($content['image'])) {
291 $previewPictureUrl = $content['image'];
294 } 292 }
295 293
296 // if content is an image, define it as a preview too 294 // if content is an image, define it as a preview too
297 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 295 if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
298 $this->updatePreviewPicture($entry, $content['url']); 296 $previewPictureUrl = $content['url'];
297 } elseif (empty($previewPictureUrl)) {
298 $this->logger->debug('Extracting images from content to provide a default preview picture');
299 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
300 $this->logger->debug(\count($imagesUrls) . ' pictures found');
301
302 if (!empty($imagesUrls)) {
303 $previewPictureUrl = $imagesUrls[0];
304 }
305 }
306
307 if (!empty($content['headers']['content-type'])) {
308 $entry->setMimetype($content['headers']['content-type']);
299 } 309 }
300 310
301 if (!empty($content['content_type'])) { 311 if (!empty($previewPictureUrl)) {
302 $entry->setMimetype($content['content_type']); 312 $this->updatePreviewPicture($entry, $previewPictureUrl);
303 } 313 }
304 314
305 try { 315 try {
@@ -316,7 +326,6 @@ class ContentProxy
316 * Update the origin_url field when a redirection occurs 326 * Update the origin_url field when a redirection occurs
317 * This field is set if it is empty and new url does not match ignore list. 327 * This field is set if it is empty and new url does not match ignore list.
318 * 328 *
319 * @param Entry $entry
320 * @param string $url 329 * @param string $url
321 */ 330 */
322 private function updateOriginUrl(Entry $entry, $url) 331 private function updateOriginUrl(Entry $entry, $url)
@@ -424,8 +433,6 @@ class ContentProxy
424 /** 433 /**
425 * Validate that the given content has at least a title, an html and a url. 434 * Validate that the given content has at least a title, an html and a url.
426 * 435 *
427 * @param array $content
428 *
429 * @return bool true if valid otherwise false 436 * @return bool true if valid otherwise false
430 */ 437 */
431 private function validateContent(array $content) 438 private function validateContent(array $content)