From c15bb5ad72b1a9692682cf88318a70b7e650d34a Mon Sep 17 00:00:00 2001 From: Simounet Date: Thu, 31 May 2018 23:42:06 +0200 Subject: Fix srcset attribute on images downloaded --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 36 ++++++++++++++++++++-- .../CoreBundle/Helper/DownloadImagesTest.php | 21 +++++++++++++ 2 files changed, 54 insertions(+), 3 deletions(-) diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 252ba57c..9c9452dd 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -42,14 +42,17 @@ class DownloadImages public function processHtml($entryId, $html, $url) { $crawler = new Crawler($html); - $result = $crawler - ->filterXpath('//img') + $imagesCrawler = $crawler + ->filterXpath('//img'); + $imagesUrls = $imagesCrawler ->extract(['src']); + $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); + $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); $relativePath = $this->getRelativePath($entryId); // download and save the image to the folder - foreach ($result as $image) { + foreach ($imagesUrls as $image) { $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); if (false === $imagePath) { @@ -171,6 +174,33 @@ class DownloadImages @rmdir($folderPath); } + /** + * Get images urls from the srcset image attribute. + * + * @param Crawler $imagesCrawler + * + * @return array An array of urls + */ + protected function getSrcsetUrls(Crawler $imagesCrawler) + { + $urls = []; + $iterator = $imagesCrawler + ->getIterator(); + while ($iterator->valid()) { + $srcsetAttribute = $iterator->current()->getAttribute('srcset'); + if ('' !== $srcsetAttribute) { + $srcset = array_map('trim', explode(',', $srcsetAttribute)); + $srcsetUrls = array_map(function ($src) { + return explode(' ', $src)[0]; + }, $srcset); + $urls = array_merge($srcsetUrls, $urls); + } + $iterator->next(); + } + + return $urls; + } + /** * Setup base folder where all images are going to be saved. */ diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index 0e1d296b..51ab1bcd 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php @@ -183,4 +183,25 @@ class DownloadImagesTest extends TestCase $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); } + + public function testProcessImageWithSrcset() + { + $client = new Client(); + + $mock = new Mock([ + new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), + new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), + new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), + ]); + + $client->getEmitter()->attach($mock); + + $logHandler = new TestHandler(); + $logger = new Logger('test', [$logHandler]); + + $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); + $res = $download->processHtml(123, '

', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/'); + + $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); + } } -- cgit v1.2.3