aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJérémy Benoist <j0k3r@users.noreply.github.com>2018-06-01 14:53:04 +0200
committerGitHub <noreply@github.com>2018-06-01 14:53:04 +0200
commit8fe73e076c75f185e44ee992e6f2ec6157aecf8e (patch)
tree778558ec77177019596ccd2c356d24b1b6074936
parent9707ac46613eea0c536d87f4c141d567fc2ef7d0 (diff)
parentc15bb5ad72b1a9692682cf88318a70b7e650d34a (diff)
downloadwallabag-8fe73e076c75f185e44ee992e6f2ec6157aecf8e.tar.gz
wallabag-8fe73e076c75f185e44ee992e6f2ec6157aecf8e.tar.zst
wallabag-8fe73e076c75f185e44ee992e6f2ec6157aecf8e.zip
Merge pull request #3661 from Simounet/fix/2981-srcset-image
Fix srcset attribute on images downloaded
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php36
-rw-r--r--tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php21
2 files changed, 54 insertions, 3 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 252ba57c..9c9452dd 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -42,14 +42,17 @@ class DownloadImages
42 public function processHtml($entryId, $html, $url) 42 public function processHtml($entryId, $html, $url)
43 { 43 {
44 $crawler = new Crawler($html); 44 $crawler = new Crawler($html);
45 $result = $crawler 45 $imagesCrawler = $crawler
46 ->filterXpath('//img') 46 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler
47 ->extract(['src']); 48 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
48 51
49 $relativePath = $this->getRelativePath($entryId); 52 $relativePath = $this->getRelativePath($entryId);
50 53
51 // download and save the image to the folder 54 // download and save the image to the folder
52 foreach ($result as $image) { 55 foreach ($imagesUrls as $image) {
53 $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); 56 $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
54 57
55 if (false === $imagePath) { 58 if (false === $imagePath) {
@@ -172,6 +175,33 @@ class DownloadImages
172 } 175 }
173 176
174 /** 177 /**
178 * Get images urls from the srcset image attribute.
179 *
180 * @param Crawler $imagesCrawler
181 *
182 * @return array An array of urls
183 */
184 protected function getSrcsetUrls(Crawler $imagesCrawler)
185 {
186 $urls = [];
187 $iterator = $imagesCrawler
188 ->getIterator();
189 while ($iterator->valid()) {
190 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
191 if ('' !== $srcsetAttribute) {
192 $srcset = array_map('trim', explode(',', $srcsetAttribute));
193 $srcsetUrls = array_map(function ($src) {
194 return explode(' ', $src)[0];
195 }, $srcset);
196 $urls = array_merge($srcsetUrls, $urls);
197 }
198 $iterator->next();
199 }
200
201 return $urls;
202 }
203
204 /**
175 * Setup base folder where all images are going to be saved. 205 * Setup base folder where all images are going to be saved.
176 */ 206 */
177 private function setFolder() 207 private function setFolder()
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
index 0e1d296b..51ab1bcd 100644
--- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
@@ -183,4 +183,25 @@ class DownloadImagesTest extends TestCase
183 $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); 183 $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
184 $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); 184 $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
185 } 185 }
186
187 public function testProcessImageWithSrcset()
188 {
189 $client = new Client();
190
191 $mock = new Mock([
192 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
193 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
194 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
195 ]);
196
197 $client->getEmitter()->attach($mock);
198
199 $logHandler = new TestHandler();
200 $logger = new Logger('test', [$logHandler]);
201
202 $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
203 $res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/');
204
205 $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
206 }
186} 207}