aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php20
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php29
2 files changed, 37 insertions, 12 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index bc257ffb..ca01dec8 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -289,13 +289,25 @@ class ContentProxy
289 $this->updateLanguage($entry, $content['language']); 289 $this->updateLanguage($entry, $content['language']);
290 } 290 }
291 291
292 $previewPictureUrl = '';
292 if (!empty($content['open_graph']['og_image'])) { 293 if (!empty($content['open_graph']['og_image'])) {
293 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 294 $previewPictureUrl = $content['open_graph']['og_image'];
294 } 295 }
295 296
296 // if content is an image, define it as a preview too 297 // if content is an image, define it as a preview too
297 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 298 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
298 $this->updatePreviewPicture($entry, $content['url']); 299 $previewPictureUrl = $content['url'];
300 } elseif (empty($previewPictureUrl)) {
301 $this->logger->debug('Extracting images from content to provide a default preview picture');
302 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
303 $this->logger->debug(\count($imagesUrls) . ' pictures found');
304 if (!empty($imagesUrls)) {
305 $previewPictureUrl = $imagesUrls[0];
306 }
307 }
308
309 if (!empty($previewPictureUrl)) {
310 $this->updatePreviewPicture($entry, $previewPictureUrl);
299 } 311 }
300 312
301 if (!empty($content['content_type'])) { 313 if (!empty($content['content_type'])) {
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 9a7e9828..c1645e45 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -31,23 +31,36 @@ class DownloadImages
31 } 31 }
32 32
33 /** 33 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 34 * Process the html and extract images URLs from it.
35 * 35 *
36 * @param int $entryId ID of the entry
37 * @param string $html 36 * @param string $html
38 * @param string $url Used as a base path for relative image and folder
39 * 37 *
40 * @return string 38 * @return string[]
41 */ 39 */
42 public function processHtml($entryId, $html, $url) 40 public static function extractImagesUrlsFromHtml($html)
43 { 41 {
44 $crawler = new Crawler($html); 42 $crawler = new Crawler($html);
45 $imagesCrawler = $crawler 43 $imagesCrawler = $crawler
46 ->filterXpath('//img'); 44 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler 45 $imagesUrls = $imagesCrawler
48 ->extract(['src']); 46 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); 47 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 48
49 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
50 }
51
52 /**
53 * Process the html and extract image from it, save them to local and return the updated html.
54 *
55 * @param int $entryId ID of the entry
56 * @param string $html
57 * @param string $url Used as a base path for relative image and folder
58 *
59 * @return string
60 */
61 public function processHtml($entryId, $html, $url)
62 {
63 $imagesUrls = self::extractImagesUrlsFromHtml($html);
51 64
52 $relativePath = $this->getRelativePath($entryId); 65 $relativePath = $this->getRelativePath($entryId);
53 66
@@ -199,7 +212,7 @@ class DownloadImages
199 * 212 *
200 * @return array An array of urls 213 * @return array An array of urls
201 */ 214 */
202 private function getSrcsetUrls(Crawler $imagesCrawler) 215 private static function getSrcsetUrls(Crawler $imagesCrawler)
203 { 216 {
204 $urls = []; 217 $urls = [];
205 $iterator = $imagesCrawler 218 $iterator = $imagesCrawler