diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 31 |
1 files changed, 15 insertions, 16 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 7a39a2e4..1d361d6d 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -2,6 +2,8 @@ | |||
2 | 2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | 3 | namespace Wallabag\CoreBundle\Helper; |
4 | 4 | ||
5 | use GuzzleHttp\Psr7\Uri; | ||
6 | use GuzzleHttp\Psr7\UriResolver; | ||
5 | use Http\Client\Common\HttpMethodsClient; | 7 | use Http\Client\Common\HttpMethodsClient; |
6 | use Http\Client\Common\Plugin\ErrorPlugin; | 8 | use Http\Client\Common\Plugin\ErrorPlugin; |
7 | use Http\Client\Common\PluginClient; | 9 | use Http\Client\Common\PluginClient; |
@@ -45,10 +47,8 @@ class DownloadImages | |||
45 | public static function extractImagesUrlsFromHtml($html) | 47 | public static function extractImagesUrlsFromHtml($html) |
46 | { | 48 | { |
47 | $crawler = new Crawler($html); | 49 | $crawler = new Crawler($html); |
48 | $imagesCrawler = $crawler | 50 | $imagesCrawler = $crawler->filterXpath('//img'); |
49 | ->filterXpath('//img'); | 51 | $imagesUrls = $imagesCrawler->extract(['src']); |
50 | $imagesUrls = $imagesCrawler | ||
51 | ->extract(['src']); | ||
52 | $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); | 52 | $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); |
53 | 53 | ||
54 | return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | 54 | return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); |
@@ -220,22 +220,25 @@ class DownloadImages | |||
220 | private static function getSrcsetUrls(Crawler $imagesCrawler) | 220 | private static function getSrcsetUrls(Crawler $imagesCrawler) |
221 | { | 221 | { |
222 | $urls = []; | 222 | $urls = []; |
223 | $iterator = $imagesCrawler | 223 | $iterator = $imagesCrawler->getIterator(); |
224 | ->getIterator(); | 224 | |
225 | while ($iterator->valid()) { | 225 | while ($iterator->valid()) { |
226 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); | 226 | $srcsetAttribute = $iterator->current()->getAttribute('srcset'); |
227 | |||
227 | if ('' !== $srcsetAttribute) { | 228 | if ('' !== $srcsetAttribute) { |
228 | // Couldn't start with " OR ' OR a white space | 229 | // Couldn't start with " OR ' OR a white space |
229 | // Could be one or more white space | 230 | // Could be one or more white space |
230 | // Must be one or more digits followed by w OR x | 231 | // Must be one or more digits followed by w OR x |
231 | $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; | 232 | $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; |
232 | preg_match_all($pattern, $srcsetAttribute, $matches); | 233 | preg_match_all($pattern, $srcsetAttribute, $matches); |
234 | |||
233 | $srcset = \call_user_func_array('array_merge', $matches); | 235 | $srcset = \call_user_func_array('array_merge', $matches); |
234 | $srcsetUrls = array_map(function ($src) { | 236 | $srcsetUrls = array_map(function ($src) { |
235 | return trim(explode(' ', $src, 2)[0]); | 237 | return trim(explode(' ', $src, 2)[0]); |
236 | }, $srcset); | 238 | }, $srcset); |
237 | $urls = array_merge($srcsetUrls, $urls); | 239 | $urls = array_merge($srcsetUrls, $urls); |
238 | } | 240 | } |
241 | |||
239 | $iterator->next(); | 242 | $iterator->next(); |
240 | } | 243 | } |
241 | 244 | ||
@@ -292,20 +295,16 @@ class DownloadImages | |||
292 | return $url; | 295 | return $url; |
293 | } | 296 | } |
294 | 297 | ||
295 | $base = new \SimplePie_IRI($base); | 298 | $base = new Uri($base); |
296 | 299 | ||
297 | // remove '//' in URL path (causes URLs not to resolve properly) | 300 | // in case the url has no scheme & host |
298 | if (isset($base->ipath)) { | 301 | if ('' === $base->getAuthority() || '' === $base->getScheme()) { |
299 | $base->ipath = preg_replace('!//+!', '/', $base->ipath); | 302 | $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); |
300 | } | ||
301 | 303 | ||
302 | if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { | 304 | return false; |
303 | return $absolute->get_uri(); | ||
304 | } | 305 | } |
305 | 306 | ||
306 | $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); | 307 | return (string) UriResolver::resolve($base, new Uri($url)); |
307 | |||
308 | return false; | ||
309 | } | 308 | } |
310 | 309 | ||
311 | /** | 310 | /** |