aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php31
1 files changed, 15 insertions, 16 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 7a39a2e4..1d361d6d 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,6 +2,8 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Psr7\Uri;
6use GuzzleHttp\Psr7\UriResolver;
5use Http\Client\Common\HttpMethodsClient; 7use Http\Client\Common\HttpMethodsClient;
6use Http\Client\Common\Plugin\ErrorPlugin; 8use Http\Client\Common\Plugin\ErrorPlugin;
7use Http\Client\Common\PluginClient; 9use Http\Client\Common\PluginClient;
@@ -45,10 +47,8 @@ class DownloadImages
45 public static function extractImagesUrlsFromHtml($html) 47 public static function extractImagesUrlsFromHtml($html)
46 { 48 {
47 $crawler = new Crawler($html); 49 $crawler = new Crawler($html);
48 $imagesCrawler = $crawler 50 $imagesCrawler = $crawler->filterXpath('//img');
49 ->filterXpath('//img'); 51 $imagesUrls = $imagesCrawler->extract(['src']);
50 $imagesUrls = $imagesCrawler
51 ->extract(['src']);
52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); 52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
53 53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
@@ -220,22 +220,25 @@ class DownloadImages
220 private static function getSrcsetUrls(Crawler $imagesCrawler) 220 private static function getSrcsetUrls(Crawler $imagesCrawler)
221 { 221 {
222 $urls = []; 222 $urls = [];
223 $iterator = $imagesCrawler 223 $iterator = $imagesCrawler->getIterator();
224 ->getIterator(); 224
225 while ($iterator->valid()) { 225 while ($iterator->valid()) {
226 $srcsetAttribute = $iterator->current()->getAttribute('srcset'); 226 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
227
227 if ('' !== $srcsetAttribute) { 228 if ('' !== $srcsetAttribute) {
228 // Couldn't start with " OR ' OR a white space 229 // Couldn't start with " OR ' OR a white space
229 // Could be one or more white space 230 // Could be one or more white space
230 // Must be one or more digits followed by w OR x 231 // Must be one or more digits followed by w OR x
231 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; 232 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
232 preg_match_all($pattern, $srcsetAttribute, $matches); 233 preg_match_all($pattern, $srcsetAttribute, $matches);
234
233 $srcset = \call_user_func_array('array_merge', $matches); 235 $srcset = \call_user_func_array('array_merge', $matches);
234 $srcsetUrls = array_map(function ($src) { 236 $srcsetUrls = array_map(function ($src) {
235 return trim(explode(' ', $src, 2)[0]); 237 return trim(explode(' ', $src, 2)[0]);
236 }, $srcset); 238 }, $srcset);
237 $urls = array_merge($srcsetUrls, $urls); 239 $urls = array_merge($srcsetUrls, $urls);
238 } 240 }
241
239 $iterator->next(); 242 $iterator->next();
240 } 243 }
241 244
@@ -292,20 +295,16 @@ class DownloadImages
292 return $url; 295 return $url;
293 } 296 }
294 297
295 $base = new \SimplePie_IRI($base); 298 $base = new Uri($base);
296 299
297 // remove '//' in URL path (causes URLs not to resolve properly) 300 // in case the url has no scheme & host
298 if (isset($base->ipath)) { 301 if ('' === $base->getAuthority() || '' === $base->getScheme()) {
299 $base->ipath = preg_replace('!//+!', '/', $base->ipath); 302 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
300 }
301 303
302 if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { 304 return false;
303 return $absolute->get_uri();
304 } 305 }
305 306
306 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); 307 return (string) UriResolver::resolve($base, new Uri($url));
307
308 return false;
309 } 308 }
310 309
311 /** 310 /**