aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--composer.json1
-rw-r--r--composer.lock2
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php31
3 files changed, 16 insertions, 18 deletions
diff --git a/composer.json b/composer.json
index 55e7f765..b0c17385 100644
--- a/composer.json
+++ b/composer.json
@@ -63,7 +63,6 @@
63 "nelmio/api-doc-bundle": "^2.13.2", 63 "nelmio/api-doc-bundle": "^2.13.2",
64 "mgargano/simplehtmldom": "~1.5", 64 "mgargano/simplehtmldom": "~1.5",
65 "wallabag/tcpdf": "^6.2.26", 65 "wallabag/tcpdf": "^6.2.26",
66 "simplepie/simplepie": "~1.5",
67 "willdurand/hateoas-bundle": "~1.3", 66 "willdurand/hateoas-bundle": "~1.3",
68 "liip/theme-bundle": "^1.4.6", 67 "liip/theme-bundle": "^1.4.6",
69 "lexik/form-filter-bundle": "^5.0.4", 68 "lexik/form-filter-bundle": "^5.0.4",
diff --git a/composer.lock b/composer.lock
index cbb9265d..a2a48c1e 100644
--- a/composer.lock
+++ b/composer.lock
@@ -4,7 +4,7 @@
4 "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", 4 "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies",
5 "This file is @generated automatically" 5 "This file is @generated automatically"
6 ], 6 ],
7 "content-hash": "883f44eda34a48c8ddabc3294498d996", 7 "content-hash": "c42e1b50f4a2b8a59ca06c5ccb24e6a3",
8 "packages": [ 8 "packages": [
9 { 9 {
10 "name": "bdunogier/guzzle-site-authenticator", 10 "name": "bdunogier/guzzle-site-authenticator",
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 7a39a2e4..1d361d6d 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,6 +2,8 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Psr7\Uri;
6use GuzzleHttp\Psr7\UriResolver;
5use Http\Client\Common\HttpMethodsClient; 7use Http\Client\Common\HttpMethodsClient;
6use Http\Client\Common\Plugin\ErrorPlugin; 8use Http\Client\Common\Plugin\ErrorPlugin;
7use Http\Client\Common\PluginClient; 9use Http\Client\Common\PluginClient;
@@ -45,10 +47,8 @@ class DownloadImages
45 public static function extractImagesUrlsFromHtml($html) 47 public static function extractImagesUrlsFromHtml($html)
46 { 48 {
47 $crawler = new Crawler($html); 49 $crawler = new Crawler($html);
48 $imagesCrawler = $crawler 50 $imagesCrawler = $crawler->filterXpath('//img');
49 ->filterXpath('//img'); 51 $imagesUrls = $imagesCrawler->extract(['src']);
50 $imagesUrls = $imagesCrawler
51 ->extract(['src']);
52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); 52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
53 53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
@@ -220,22 +220,25 @@ class DownloadImages
220 private static function getSrcsetUrls(Crawler $imagesCrawler) 220 private static function getSrcsetUrls(Crawler $imagesCrawler)
221 { 221 {
222 $urls = []; 222 $urls = [];
223 $iterator = $imagesCrawler 223 $iterator = $imagesCrawler->getIterator();
224 ->getIterator(); 224
225 while ($iterator->valid()) { 225 while ($iterator->valid()) {
226 $srcsetAttribute = $iterator->current()->getAttribute('srcset'); 226 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
227
227 if ('' !== $srcsetAttribute) { 228 if ('' !== $srcsetAttribute) {
228 // Couldn't start with " OR ' OR a white space 229 // Couldn't start with " OR ' OR a white space
229 // Could be one or more white space 230 // Could be one or more white space
230 // Must be one or more digits followed by w OR x 231 // Must be one or more digits followed by w OR x
231 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; 232 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
232 preg_match_all($pattern, $srcsetAttribute, $matches); 233 preg_match_all($pattern, $srcsetAttribute, $matches);
234
233 $srcset = \call_user_func_array('array_merge', $matches); 235 $srcset = \call_user_func_array('array_merge', $matches);
234 $srcsetUrls = array_map(function ($src) { 236 $srcsetUrls = array_map(function ($src) {
235 return trim(explode(' ', $src, 2)[0]); 237 return trim(explode(' ', $src, 2)[0]);
236 }, $srcset); 238 }, $srcset);
237 $urls = array_merge($srcsetUrls, $urls); 239 $urls = array_merge($srcsetUrls, $urls);
238 } 240 }
241
239 $iterator->next(); 242 $iterator->next();
240 } 243 }
241 244
@@ -292,20 +295,16 @@ class DownloadImages
292 return $url; 295 return $url;
293 } 296 }
294 297
295 $base = new \SimplePie_IRI($base); 298 $base = new Uri($base);
296 299
297 // remove '//' in URL path (causes URLs not to resolve properly) 300 // in case the url has no scheme & host
298 if (isset($base->ipath)) { 301 if ('' === $base->getAuthority() || '' === $base->getScheme()) {
299 $base->ipath = preg_replace('!//+!', '/', $base->ipath); 302 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
300 }
301 303
302 if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { 304 return false;
303 return $absolute->get_uri();
304 } 305 }
305 306
306 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); 307 return (string) UriResolver::resolve($base, new Uri($url));
307
308 return false;
309 } 308 }
310 309
311 /** 310 /**