diff options
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 20 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 29 |
2 files changed, 37 insertions, 12 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bc257ffb..ca01dec8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry; | |||
12 | use Wallabag\CoreBundle\Tools\Utils; | 12 | use Wallabag\CoreBundle\Tools\Utils; |
13 | 13 | ||
14 | /** | 14 | /** |
15 | * This kind of proxy class take care of getting the content from an url | 15 | * This kind of proxy class takes care of getting the content from an url |
16 | * and update the entry with what it found. | 16 | * and updates the entry with what it found. |
17 | */ | 17 | */ |
18 | class ContentProxy | 18 | class ContentProxy |
19 | { | 19 | { |
@@ -289,13 +289,25 @@ class ContentProxy | |||
289 | $this->updateLanguage($entry, $content['language']); | 289 | $this->updateLanguage($entry, $content['language']); |
290 | } | 290 | } |
291 | 291 | ||
292 | $previewPictureUrl = ''; | ||
292 | if (!empty($content['open_graph']['og_image'])) { | 293 | if (!empty($content['open_graph']['og_image'])) { |
293 | $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); | 294 | $previewPictureUrl = $content['open_graph']['og_image']; |
294 | } | 295 | } |
295 | 296 | ||
296 | // if content is an image, define it as a preview too | 297 | // if content is an image, define it as a preview too |
297 | if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | 298 | if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { |
298 | $this->updatePreviewPicture($entry, $content['url']); | 299 | $previewPictureUrl = $content['url']; |
300 | } elseif (empty($previewPictureUrl)) { | ||
301 | $this->logger->debug('Extracting images from content to provide a default preview picture'); | ||
302 | $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']); | ||
303 | $this->logger->debug(\count($imagesUrls) . ' pictures found'); | ||
304 | if (!empty($imagesUrls)) { | ||
305 | $previewPictureUrl = $imagesUrls[0]; | ||
306 | } | ||
307 | } | ||
308 | |||
309 | if (!empty($previewPictureUrl)) { | ||
310 | $this->updatePreviewPicture($entry, $previewPictureUrl); | ||
299 | } | 311 | } |
300 | 312 | ||
301 | if (!empty($content['content_type'])) { | 313 | if (!empty($content['content_type'])) { |
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 9a7e9828..c1645e45 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -31,23 +31,36 @@ class DownloadImages | |||
31 | } | 31 | } |
32 | 32 | ||
33 | /** | 33 | /** |
34 | * Process the html and extract image from it, save them to local and return the updated html. | 34 | * Process the html and extract images URLs from it. |
35 | * | 35 | * |
36 | * @param int $entryId ID of the entry | ||
37 | * @param string $html | 36 | * @param string $html |
38 | * @param string $url Used as a base path for relative image and folder | ||
39 | * | 37 | * |
40 | * @return string | 38 | * @return string[] |
41 | */ | 39 | */ |
42 | public function processHtml($entryId, $html, $url) | 40 | public static function extractImagesUrlsFromHtml($html) |
43 | { | 41 | { |
44 | $crawler = new Crawler($html); | 42 | $crawler = new Crawler($html); |
45 | $imagesCrawler = $crawler | 43 | $imagesCrawler = $crawler |
46 | ->filterXpath('//img'); | 44 | ->filterXpath('//img'); |
47 | $imagesUrls = $imagesCrawler | 45 | $imagesUrls = $imagesCrawler |
48 | ->extract(['src']); | 46 | ->extract(['src']); |
49 | $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); | 47 | $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); |
50 | $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | 48 | |
49 | return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | ||
50 | } | ||
51 | |||
52 | /** | ||
53 | * Process the html and extract image from it, save them to local and return the updated html. | ||
54 | * | ||
55 | * @param int $entryId ID of the entry | ||
56 | * @param string $html | ||
57 | * @param string $url Used as a base path for relative image and folder | ||
58 | * | ||
59 | * @return string | ||
60 | */ | ||
61 | public function processHtml($entryId, $html, $url) | ||
62 | { | ||
63 | $imagesUrls = self::extractImagesUrlsFromHtml($html); | ||
51 | 64 | ||
52 | $relativePath = $this->getRelativePath($entryId); | 65 | $relativePath = $this->getRelativePath($entryId); |
53 | 66 | ||
@@ -199,7 +212,7 @@ class DownloadImages | |||
199 | * | 212 | * |
200 | * @return array An array of urls | 213 | * @return array An array of urls |
201 | */ | 214 | */ |
202 | private function getSrcsetUrls(Crawler $imagesCrawler) | 215 | private static function getSrcsetUrls(Crawler $imagesCrawler) |
203 | { | 216 | { |
204 | $urls = []; | 217 | $urls = []; |
205 | $iterator = $imagesCrawler | 218 | $iterator = $imagesCrawler |