diff options
author | Simounet <contact@simounet.net> | 2018-07-11 19:57:34 +0200 |
---|---|---|
committer | Simounet <contact@simounet.net> | 2018-07-12 14:29:30 +0200 |
commit | e6f12c073416eba6fc620f0ff38a343bda428280 (patch) | |
tree | e267f9328fd49b7c4707c1c272c772a492165397 | |
parent | 0f36a88e16c843827be1074df1c36ea534b31394 (diff) | |
download | wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.tar.gz wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.tar.zst wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.zip |
More robust srcset image attribute handling
Linked to HTMLawed PR https://github.com/kesar/HTMLawed/pull/17
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 11 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | 25 |
2 files changed, 33 insertions, 3 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index f91cdf5e..487a3a23 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -185,7 +185,7 @@ class DownloadImages | |||
185 | * | 185 | * |
186 | * @return array An array of urls | 186 | * @return array An array of urls |
187 | */ | 187 | */ |
188 | protected function getSrcsetUrls(Crawler $imagesCrawler) | 188 | private function getSrcsetUrls(Crawler $imagesCrawler) |
189 | { | 189 | { |
190 | $urls = []; | 190 | $urls = []; |
191 | $iterator = $imagesCrawler | 191 | $iterator = $imagesCrawler |
@@ -193,9 +193,14 @@ class DownloadImages | |||
193 | while ($iterator->valid()) { | 193 | while ($iterator->valid()) { |
194 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); | 194 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); |
195 | if ('' !== $srcsetAttribute) { | 195 | if ('' !== $srcsetAttribute) { |
196 | $srcset = array_map('trim', explode(',', $srcsetAttribute)); | 196 | // Couldn't start with " OR ' OR a white space |
197 | // Could be one or more white space | ||
198 | // Must be one or more digits followed by w OR x | ||
199 | $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; | ||
200 | preg_match_all($pattern, $srcsetAttribute, $matches); | ||
201 | $srcset = call_user_func_array('array_merge', $matches); | ||
197 | $srcsetUrls = array_map(function ($src) { | 202 | $srcsetUrls = array_map(function ($src) { |
198 | return explode(' ', $src)[0]; | 203 | return trim(explode(' ', $src, 2)[0]); |
199 | }, $srcset); | 204 | }, $srcset); |
200 | $urls = array_merge($srcsetUrls, $urls); | 205 | $urls = array_merge($srcsetUrls, $urls); |
201 | } | 206 | } |
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index faa803fa..cda5f843 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | |||
@@ -205,6 +205,31 @@ class DownloadImagesTest extends TestCase | |||
205 | $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); | 205 | $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); |
206 | } | 206 | } |
207 | 207 | ||
208 | public function testProcessImageWithTrickySrcset() | ||
209 | { | ||
210 | $client = new Client(); | ||
211 | |||
212 | $mock = new Mock([ | ||
213 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
214 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
215 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
216 | ]); | ||
217 | |||
218 | $client->getEmitter()->attach($mock); | ||
219 | |||
220 | $logHandler = new TestHandler(); | ||
221 | $logger = new Logger('test', [$logHandler]); | ||
222 | |||
223 | $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); | ||
224 | $res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 ) | ||
225 | (min-width: 1251px) calc( (100vw - 530px) / 2 ) | ||
226 | (min-width: 1086px) calc(100vw - 480px) | ||
227 | (min-width: 626px) calc(100vw - 335px) | ||
228 | calc(100vw - 30px)" alt="" /></figure>', 'https://css-tricks.com/the-critical-request/'); | ||
229 | |||
230 | $this->assertNotContains('f_auto,q_auto', $res, 'Image srcset attribute were not replaced'); | ||
231 | } | ||
232 | |||
208 | public function testProcessImageWithNullPath() | 233 | public function testProcessImageWithNullPath() |
209 | { | 234 | { |
210 | $client = new Client(); | 235 | $client = new Client(); |