diff options
author | Jérémy Benoist <j0k3r@users.noreply.github.com> | 2018-06-01 14:53:04 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-06-01 14:53:04 +0200 |
commit | 8fe73e076c75f185e44ee992e6f2ec6157aecf8e (patch) | |
tree | 778558ec77177019596ccd2c356d24b1b6074936 | |
parent | 9707ac46613eea0c536d87f4c141d567fc2ef7d0 (diff) | |
parent | c15bb5ad72b1a9692682cf88318a70b7e650d34a (diff) | |
download | wallabag-8fe73e076c75f185e44ee992e6f2ec6157aecf8e.tar.gz wallabag-8fe73e076c75f185e44ee992e6f2ec6157aecf8e.tar.zst wallabag-8fe73e076c75f185e44ee992e6f2ec6157aecf8e.zip |
Merge pull request #3661 from Simounet/fix/2981-srcset-image
Fix srcset attribute on images downloaded
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 36 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | 21 |
2 files changed, 54 insertions, 3 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 252ba57c..9c9452dd 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -42,14 +42,17 @@ class DownloadImages | |||
42 | public function processHtml($entryId, $html, $url) | 42 | public function processHtml($entryId, $html, $url) |
43 | { | 43 | { |
44 | $crawler = new Crawler($html); | 44 | $crawler = new Crawler($html); |
45 | $result = $crawler | 45 | $imagesCrawler = $crawler |
46 | ->filterXpath('//img') | 46 | ->filterXpath('//img'); |
47 | $imagesUrls = $imagesCrawler | ||
47 | ->extract(['src']); | 48 | ->extract(['src']); |
49 | $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); | ||
50 | $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | ||
48 | 51 | ||
49 | $relativePath = $this->getRelativePath($entryId); | 52 | $relativePath = $this->getRelativePath($entryId); |
50 | 53 | ||
51 | // download and save the image to the folder | 54 | // download and save the image to the folder |
52 | foreach ($result as $image) { | 55 | foreach ($imagesUrls as $image) { |
53 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); | 56 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); |
54 | 57 | ||
55 | if (false === $imagePath) { | 58 | if (false === $imagePath) { |
@@ -172,6 +175,33 @@ class DownloadImages | |||
172 | } | 175 | } |
173 | 176 | ||
174 | /** | 177 | /** |
178 | * Get images urls from the srcset image attribute. | ||
179 | * | ||
180 | * @param Crawler $imagesCrawler | ||
181 | * | ||
182 | * @return array An array of urls | ||
183 | */ | ||
184 | protected function getSrcsetUrls(Crawler $imagesCrawler) | ||
185 | { | ||
186 | $urls = []; | ||
187 | $iterator = $imagesCrawler | ||
188 | ->getIterator(); | ||
189 | while ($iterator->valid()) { | ||
190 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); | ||
191 | if ('' !== $srcsetAttribute) { | ||
192 | $srcset = array_map('trim', explode(',', $srcsetAttribute)); | ||
193 | $srcsetUrls = array_map(function ($src) { | ||
194 | return explode(' ', $src)[0]; | ||
195 | }, $srcset); | ||
196 | $urls = array_merge($srcsetUrls, $urls); | ||
197 | } | ||
198 | $iterator->next(); | ||
199 | } | ||
200 | |||
201 | return $urls; | ||
202 | } | ||
203 | |||
204 | /** | ||
175 | * Setup base folder where all images are going to be saved. | 205 | * Setup base folder where all images are going to be saved. |
176 | */ | 206 | */ |
177 | private function setFolder() | 207 | private function setFolder() |
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index 0e1d296b..51ab1bcd 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | |||
@@ -183,4 +183,25 @@ class DownloadImagesTest extends TestCase | |||
183 | $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); | 183 | $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); |
184 | $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); | 184 | $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); |
185 | } | 185 | } |
186 | |||
187 | public function testProcessImageWithSrcset() | ||
188 | { | ||
189 | $client = new Client(); | ||
190 | |||
191 | $mock = new Mock([ | ||
192 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
193 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
194 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
195 | ]); | ||
196 | |||
197 | $client->getEmitter()->attach($mock); | ||
198 | |||
199 | $logHandler = new TestHandler(); | ||
200 | $logger = new Logger('test', [$logHandler]); | ||
201 | |||
202 | $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); | ||
203 | $res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/'); | ||
204 | |||
205 | $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); | ||
206 | } | ||
186 | } | 207 | } |