From 423efadefc2459c7b4a2eabc32edaed918e1075d Mon Sep 17 00:00:00 2001 From: nicofrand Date: Fri, 10 May 2019 23:01:07 +0200 Subject: [PATCH] Set first picture as preview picture --- .../CoreBundle/Helper/ContentProxy.php | 20 ++++++++++--- .../CoreBundle/Helper/DownloadImages.php | 29 ++++++++++++++----- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bc257ffb..ca01dec8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; use Wallabag\CoreBundle\Tools\Utils; /** - * This kind of proxy class take care of getting the content from an url - * and update the entry with what it found. + * This kind of proxy class takes care of getting the content from an url + * and updates the entry with what it found. */ class ContentProxy { @@ -289,13 +289,25 @@ class ContentProxy $this->updateLanguage($entry, $content['language']); } + $previewPictureUrl = ''; if (!empty($content['open_graph']['og_image'])) { - $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); + $previewPictureUrl = $content['open_graph']['og_image']; } // if content is an image, define it as a preview too if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->updatePreviewPicture($entry, $content['url']); + $previewPictureUrl = $content['url']; + } elseif (empty($previewPictureUrl)) { + $this->logger->debug('Extracting images from content to provide a default preview picture'); + $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); + $this->logger->debug(\count($imagesUrls) . ' pictures found'); + if (!empty($imagesUrls)) { + $previewPictureUrl = $imagesUrls[0]; + } + } + + if (!empty($previewPictureUrl)) { + $this->updatePreviewPicture($entry, $previewPictureUrl); } if (!empty($content['content_type'])) { diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 9a7e9828..c1645e45 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -31,23 +31,36 @@ class DownloadImages } /** - * Process the html and extract image from it, save them to local and return the updated html. + * Process the html and extract images URLs from it. * - * @param int $entryId ID of the entry * @param string $html - * @param string $url Used as a base path for relative image and folder * - * @return string + * @return string[] */ - public function processHtml($entryId, $html, $url) + public static function extractImagesUrlsFromHtml($html) { $crawler = new Crawler($html); $imagesCrawler = $crawler ->filterXpath('//img'); $imagesUrls = $imagesCrawler ->extract(['src']); - $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); - $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); + + return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + } + + /** + * Process the html and extract image from it, save them to local and return the updated html. + * + * @param int $entryId ID of the entry + * @param string $html + * @param string $url Used as a base path for relative image and folder + * + * @return string + */ + public function processHtml($entryId, $html, $url) + { + $imagesUrls = self::extractImagesUrlsFromHtml($html); $relativePath = $this->getRelativePath($entryId); @@ -199,7 +212,7 @@ class DownloadImages * * @return array An array of urls */ - private function getSrcsetUrls(Crawler $imagesCrawler) + private static function getSrcsetUrls(Crawler $imagesCrawler) { $urls = []; $iterator = $imagesCrawler -- 2.41.0