X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FContentProxy.php;h=5901df8bdc0a456d8cf6c5ae1ac5c605cacb0027;hb=488e3d7a915dcdce37fe4af14f33293edcb4fbdb;hp=d38811a272caa13b0ed859a99ee394c64e93b831;hpb=a6e4e83809ab7abf51e5c06de503ef1b900bb219;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index d38811a2..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; /** - * This kind of proxy class take care of getting the content from an url - * and update the entry with what it found. + * This kind of proxy class takes care of getting the content from an url + * and updates the entry with what it found. */ class ContentProxy { @@ -47,13 +47,18 @@ class ContentProxy */ public function updateEntry(Entry $entry, $url, array $content = [], $disableContentUpdate = false) { + $this->graby->toggleImgNoReferrer(true); if (!empty($content['html'])) { $content['html'] = $this->graby->cleanupHtml($content['html'], $url); } if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { $fetchedContent = $this->graby->fetchContent($url); - $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); + + $fetchedContent['title'] = $this->sanitizeContentTitle( + $fetchedContent['title'], + isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : '' + ); // when content is imported, we have information in $content // in case fetching content goes bad, we'll keep the imported information instead of overriding them @@ -73,6 +78,8 @@ class ContentProxy $entry->setUrl($url); } + $entry->setGivenUrl($url); + $this->stockEntry($entry, $content); } @@ -187,8 +194,8 @@ class ContentProxy /** * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. * - * @param $title - * @param $contentType + * @param string $title + * @param string $contentType * * @return string */ @@ -252,22 +259,19 @@ class ContentProxy if (!empty($content['title'])) { $entry->setTitle($content['title']); - } elseif (!empty($content['open_graph']['og_title'])) { - $entry->setTitle($content['open_graph']['og_title']); } - $html = $content['html']; - if (false === $html) { - $html = $this->fetchingErrorMessage; + if (empty($content['html'])) { + $content['html'] = $this->fetchingErrorMessage; - if (!empty($content['open_graph']['og_description'])) { - $html .= '

But we found a short description:

'; - $html .= $content['open_graph']['og_description']; + if (!empty($content['description'])) { + $content['html'] .= '

But we found a short description:

'; + $content['html'] .= $content['description']; } } - $entry->setContent($html); - $entry->setReadingTime(Utils::getReadingTime($html)); + $entry->setContent($content['html']); + $entry->setReadingTime(Utils::getReadingTime($content['html'])); if (!empty($content['status'])) { $entry->setHttpStatus($content['status']); @@ -277,8 +281,8 @@ class ContentProxy $entry->setPublishedBy($content['authors']); } - if (!empty($content['all_headers']) && $this->storeArticleHeaders) { - $entry->setHeaders($content['all_headers']); + if (!empty($content['headers'])) { + $entry->setHeaders($content['headers']); } if (!empty($content['date'])) { @@ -289,17 +293,30 @@ class ContentProxy $this->updateLanguage($entry, $content['language']); } - if (!empty($content['open_graph']['og_image'])) { - $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); + $previewPictureUrl = ''; + if (!empty($content['image'])) { + $previewPictureUrl = $content['image']; } // if content is an image, define it as a preview too - if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->updatePreviewPicture($entry, $content['url']); + if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { + $previewPictureUrl = $content['url']; + } elseif (empty($previewPictureUrl)) { + $this->logger->debug('Extracting images from content to provide a default preview picture'); + $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); + $this->logger->debug(\count($imagesUrls) . ' pictures found'); + + if (!empty($imagesUrls)) { + $previewPictureUrl = $imagesUrls[0]; + } + } + + if (!empty($content['headers']['content-type'])) { + $entry->setMimetype($content['headers']['content-type']); } - if (!empty($content['content_type'])) { - $entry->setMimetype($content['content_type']); + if (!empty($previewPictureUrl)) { + $this->updatePreviewPicture($entry, $previewPictureUrl); } try {