aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorSimounet <contact@simounet.net>2018-07-11 19:57:34 +0200
committerSimounet <contact@simounet.net>2018-07-12 14:29:30 +0200
commite6f12c073416eba6fc620f0ff38a343bda428280 (patch)
treee267f9328fd49b7c4707c1c272c772a492165397
parent0f36a88e16c843827be1074df1c36ea534b31394 (diff)
downloadwallabag-e6f12c073416eba6fc620f0ff38a343bda428280.tar.gz
wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.tar.zst
wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.zip
More robust srcset image attribute handling
Linked to HTMLawed PR https://github.com/kesar/HTMLawed/pull/17
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php11
-rw-r--r--tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php25
2 files changed, 33 insertions, 3 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index f91cdf5e..487a3a23 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -185,7 +185,7 @@ class DownloadImages
185 * 185 *
186 * @return array An array of urls 186 * @return array An array of urls
187 */ 187 */
188 protected function getSrcsetUrls(Crawler $imagesCrawler) 188 private function getSrcsetUrls(Crawler $imagesCrawler)
189 { 189 {
190 $urls = []; 190 $urls = [];
191 $iterator = $imagesCrawler 191 $iterator = $imagesCrawler
@@ -193,9 +193,14 @@ class DownloadImages
193 while ($iterator->valid()) { 193 while ($iterator->valid()) {
194 $srcsetAttribute = $iterator->current()->getAttribute('srcset'); 194 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
195 if ('' !== $srcsetAttribute) { 195 if ('' !== $srcsetAttribute) {
196 $srcset = array_map('trim', explode(',', $srcsetAttribute)); 196 // Couldn't start with " OR ' OR a white space
197 // Could be one or more white space
198 // Must be one or more digits followed by w OR x
199 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
200 preg_match_all($pattern, $srcsetAttribute, $matches);
201 $srcset = call_user_func_array('array_merge', $matches);
197 $srcsetUrls = array_map(function ($src) { 202 $srcsetUrls = array_map(function ($src) {
198 return explode(' ', $src)[0]; 203 return trim(explode(' ', $src, 2)[0]);
199 }, $srcset); 204 }, $srcset);
200 $urls = array_merge($srcsetUrls, $urls); 205 $urls = array_merge($srcsetUrls, $urls);
201 } 206 }
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
index faa803fa..cda5f843 100644
--- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
@@ -205,6 +205,31 @@ class DownloadImagesTest extends TestCase
205 $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); 205 $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
206 } 206 }
207 207
208 public function testProcessImageWithTrickySrcset()
209 {
210 $client = new Client();
211
212 $mock = new Mock([
213 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
214 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
215 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
216 ]);
217
218 $client->getEmitter()->attach($mock);
219
220 $logHandler = new TestHandler();
221 $logger = new Logger('test', [$logHandler]);
222
223 $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
224 $res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 )
225 (min-width: 1251px) calc( (100vw - 530px) / 2 )
226 (min-width: 1086px) calc(100vw - 480px)
227 (min-width: 626px) calc(100vw - 335px)
228 calc(100vw - 30px)" alt="" /></figure>', 'https://css-tricks.com/the-critical-request/');
229
230 $this->assertNotContains('f_auto,q_auto', $res, 'Image srcset attribute were not replaced');
231 }
232
208 public function testProcessImageWithNullPath() 233 public function testProcessImageWithNullPath()
209 { 234 {
210 $client = new Client(); 235 $client = new Client();