]> git.immae.eu Git - github/wallabag/wallabag.git/commitdiff
More robust srcset image attribute handling 3690/head
authorSimounet <contact@simounet.net>
Wed, 11 Jul 2018 17:57:34 +0000 (19:57 +0200)
committerSimounet <contact@simounet.net>
Thu, 12 Jul 2018 12:29:30 +0000 (14:29 +0200)
Linked to HTMLawed PR https://github.com/kesar/HTMLawed/pull/17

src/Wallabag/CoreBundle/Helper/DownloadImages.php
tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php

index f91cdf5ebe005f63ea523271212bc035afb8a20e..487a3a238eff43ff4316f1897f9fb65deb5310e2 100644 (file)
@@ -185,7 +185,7 @@ class DownloadImages
      *
      * @return array An array of urls
      */
-    protected function getSrcsetUrls(Crawler $imagesCrawler)
+    private function getSrcsetUrls(Crawler $imagesCrawler)
     {
         $urls = [];
         $iterator = $imagesCrawler
@@ -193,9 +193,14 @@ class DownloadImages
         while ($iterator->valid()) {
             $srcsetAttribute = $iterator->current()->getAttribute('srcset');
             if ('' !== $srcsetAttribute) {
-                $srcset = array_map('trim', explode(',', $srcsetAttribute));
+                // Couldn't start with " OR ' OR a white space
+                // Could be one or more white space
+                // Must be one or more digits followed by w OR x
+                $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
+                preg_match_all($pattern, $srcsetAttribute, $matches);
+                $srcset = call_user_func_array('array_merge', $matches);
                 $srcsetUrls = array_map(function ($src) {
-                    return explode(' ', $src)[0];
+                    return trim(explode(' ', $src, 2)[0]);
                 }, $srcset);
                 $urls = array_merge($srcsetUrls, $urls);
             }
index faa803fa0b4c4ef5a81235518fb34c8413390322..cda5f84312ca081670f1512b5798b7b7421626b6 100644 (file)
@@ -205,6 +205,31 @@ class DownloadImagesTest extends TestCase
         $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
     }
 
+    public function testProcessImageWithTrickySrcset()
+    {
+        $client = new Client();
+
+        $mock = new Mock([
+            new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
+            new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
+            new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
+        ]);
+
+        $client->getEmitter()->attach($mock);
+
+        $logHandler = new TestHandler();
+        $logger = new Logger('test', [$logHandler]);
+
+        $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
+        $res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 )
+       (min-width: 1251px) calc( (100vw - 530px) / 2 )
+       (min-width: 1086px) calc(100vw - 480px)
+       (min-width: 626px)  calc(100vw - 335px)
+                           calc(100vw - 30px)" alt="" /></figure>', 'https://css-tricks.com/the-critical-request/');
+
+        $this->assertNotContains('f_auto,q_auto', $res, 'Image srcset attribute were not replaced');
+    }
+
     public function testProcessImageWithNullPath()
     {
         $client = new Client();