X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FContentProxy.php;h=5901df8bdc0a456d8cf6c5ae1ac5c605cacb0027;hb=488e3d7a915dcdce37fe4af14f33293edcb4fbdb;hp=bc257ffbc32ab1f8c468d3eb9cc170c67f3ffbc2;hpb=a73cb8a689431a035d1ffca10d9654e84de5b58c;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bc257ffb..5901df8b 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; /** - * This kind of proxy class take care of getting the content from an url - * and update the entry with what it found. + * This kind of proxy class takes care of getting the content from an url + * and updates the entry with what it found. */ class ContentProxy { @@ -54,7 +54,11 @@ class ContentProxy if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { $fetchedContent = $this->graby->fetchContent($url); - $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); + + $fetchedContent['title'] = $this->sanitizeContentTitle( + $fetchedContent['title'], + isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : '' + ); // when content is imported, we have information in $content // in case fetching content goes bad, we'll keep the imported information instead of overriding them @@ -74,6 +78,8 @@ class ContentProxy $entry->setUrl($url); } + $entry->setGivenUrl($url); + $this->stockEntry($entry, $content); } @@ -188,8 +194,8 @@ class ContentProxy /** * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. * - * @param $title - * @param $contentType + * @param string $title + * @param string $contentType * * @return string */ @@ -253,16 +259,14 @@ class ContentProxy if (!empty($content['title'])) { $entry->setTitle($content['title']); - } elseif (!empty($content['open_graph']['og_title'])) { - $entry->setTitle($content['open_graph']['og_title']); } if (empty($content['html'])) { $content['html'] = $this->fetchingErrorMessage; - if (!empty($content['open_graph']['og_description'])) { + if (!empty($content['description'])) { $content['html'] .= '

But we found a short description:

'; - $content['html'] .= $content['open_graph']['og_description']; + $content['html'] .= $content['description']; } } @@ -277,8 +281,8 @@ class ContentProxy $entry->setPublishedBy($content['authors']); } - if (!empty($content['all_headers']) && $this->storeArticleHeaders) { - $entry->setHeaders($content['all_headers']); + if (!empty($content['headers'])) { + $entry->setHeaders($content['headers']); } if (!empty($content['date'])) { @@ -289,17 +293,30 @@ class ContentProxy $this->updateLanguage($entry, $content['language']); } - if (!empty($content['open_graph']['og_image'])) { - $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); + $previewPictureUrl = ''; + if (!empty($content['image'])) { + $previewPictureUrl = $content['image']; } // if content is an image, define it as a preview too - if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->updatePreviewPicture($entry, $content['url']); + if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { + $previewPictureUrl = $content['url']; + } elseif (empty($previewPictureUrl)) { + $this->logger->debug('Extracting images from content to provide a default preview picture'); + $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); + $this->logger->debug(\count($imagesUrls) . ' pictures found'); + + if (!empty($imagesUrls)) { + $previewPictureUrl = $imagesUrls[0]; + } + } + + if (!empty($content['headers']['content-type'])) { + $entry->setMimetype($content['headers']['content-type']); } - if (!empty($content['content_type'])) { - $entry->setMimetype($content['content_type']); + if (!empty($previewPictureUrl)) { + $this->updatePreviewPicture($entry, $previewPictureUrl); } try {