diff options
author | Simounet <contact@simounet.net> | 2018-07-11 19:57:34 +0200 |
---|---|---|
committer | Simounet <contact@simounet.net> | 2018-07-12 14:29:30 +0200 |
commit | e6f12c073416eba6fc620f0ff38a343bda428280 (patch) | |
tree | e267f9328fd49b7c4707c1c272c772a492165397 /src | |
parent | 0f36a88e16c843827be1074df1c36ea534b31394 (diff) | |
download | wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.tar.gz wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.tar.zst wallabag-e6f12c073416eba6fc620f0ff38a343bda428280.zip |
More robust srcset image attribute handling
Linked to HTMLawed PR https://github.com/kesar/HTMLawed/pull/17
Diffstat (limited to 'src')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 11 |
1 files changed, 8 insertions, 3 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index f91cdf5e..487a3a23 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -185,7 +185,7 @@ class DownloadImages | |||
185 | * | 185 | * |
186 | * @return array An array of urls | 186 | * @return array An array of urls |
187 | */ | 187 | */ |
188 | protected function getSrcsetUrls(Crawler $imagesCrawler) | 188 | private function getSrcsetUrls(Crawler $imagesCrawler) |
189 | { | 189 | { |
190 | $urls = []; | 190 | $urls = []; |
191 | $iterator = $imagesCrawler | 191 | $iterator = $imagesCrawler |
@@ -193,9 +193,14 @@ class DownloadImages | |||
193 | while ($iterator->valid()) { | 193 | while ($iterator->valid()) { |
194 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); | 194 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); |
195 | if ('' !== $srcsetAttribute) { | 195 | if ('' !== $srcsetAttribute) { |
196 | $srcset = array_map('trim', explode(',', $srcsetAttribute)); | 196 | // Couldn't start with " OR ' OR a white space |
197 | // Could be one or more white space | ||
198 | // Must be one or more digits followed by w OR x | ||
199 | $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; | ||
200 | preg_match_all($pattern, $srcsetAttribute, $matches); | ||
201 | $srcset = call_user_func_array('array_merge', $matches); | ||
197 | $srcsetUrls = array_map(function ($src) { | 202 | $srcsetUrls = array_map(function ($src) { |
198 | return explode(' ', $src)[0]; | 203 | return trim(explode(' ', $src, 2)[0]); |
199 | }, $srcset); | 204 | }, $srcset); |
200 | $urls = array_merge($srcsetUrls, $urls); | 205 | $urls = array_merge($srcsetUrls, $urls); |
201 | } | 206 | } |