X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FDownloadImages.php;h=1d361d6daeaa3a6da615a6cc52400d63256981ee;hb=830cce45b3a3246c113423c52bcc580087ddebcd;hp=c1645e45afe9eb0d15335a7684de5e7be63ffcd7;hpb=423efadefc2459c7b4a2eabc32edaed918e1075d;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index c1645e45..1d361d6d 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -2,8 +2,15 @@ namespace Wallabag\CoreBundle\Helper; -use GuzzleHttp\Client; -use GuzzleHttp\Message\Response; +use GuzzleHttp\Psr7\Uri; +use GuzzleHttp\Psr7\UriResolver; +use Http\Client\Common\HttpMethodsClient; +use Http\Client\Common\Plugin\ErrorPlugin; +use Http\Client\Common\PluginClient; +use Http\Client\HttpClient; +use Http\Discovery\MessageFactoryDiscovery; +use Http\Message\MessageFactory; +use Psr\Http\Message\ResponseInterface; use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\Finder\Finder; @@ -19,9 +26,9 @@ class DownloadImages private $mimeGuesser; private $wallabagUrl; - public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) + public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null) { - $this->client = $client; + $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find()); $this->baseFolder = $baseFolder; $this->wallabagUrl = rtrim($wallabagUrl, '/'); $this->logger = $logger; @@ -40,10 +47,8 @@ class DownloadImages public static function extractImagesUrlsFromHtml($html) { $crawler = new Crawler($html); - $imagesCrawler = $crawler - ->filterXpath('//img'); - $imagesUrls = $imagesCrawler - ->extract(['src']); + $imagesCrawler = $crawler->filterXpath('//img'); + $imagesUrls = $imagesCrawler->extract(['src']); $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); @@ -135,7 +140,7 @@ class DownloadImages $localPath = $folderPath . '/' . $hashImage . '.' . $ext; try { - $im = imagecreatefromstring($res->getBody()); + $im = imagecreatefromstring((string) $res->getBody()); } catch (\Exception $e) { $im = false; } @@ -215,22 +220,25 @@ class DownloadImages private static function getSrcsetUrls(Crawler $imagesCrawler) { $urls = []; - $iterator = $imagesCrawler - ->getIterator(); + $iterator = $imagesCrawler->getIterator(); + while ($iterator->valid()) { $srcsetAttribute = $iterator->current()->getAttribute('srcset'); + if ('' !== $srcsetAttribute) { // Couldn't start with " OR ' OR a white space // Could be one or more white space // Must be one or more digits followed by w OR x $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; preg_match_all($pattern, $srcsetAttribute, $matches); + $srcset = \call_user_func_array('array_merge', $matches); $srcsetUrls = array_map(function ($src) { return trim(explode(' ', $src, 2)[0]); }, $srcset); $urls = array_merge($srcsetUrls, $urls); } + $iterator->next(); } @@ -287,33 +295,29 @@ class DownloadImages return $url; } - $base = new \SimplePie_IRI($base); + $base = new Uri($base); - // remove '//' in URL path (causes URLs not to resolve properly) - if (isset($base->ipath)) { - $base->ipath = preg_replace('!//+!', '/', $base->ipath); - } + // in case the url has no scheme & host + if ('' === $base->getAuthority() || '' === $base->getScheme()) { + $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); - if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { - return $absolute->get_uri(); + return false; } - $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); - - return false; + return (string) UriResolver::resolve($base, new Uri($url)); } /** * Retrieve and validate the extension from the response of the url of the image. * - * @param Response $res Guzzle Response - * @param string $imagePath Path from the src image from the content (used for log only) + * @param ResponseInterface $res Http Response + * @param string $imagePath Path from the src image from the content (used for log only) * * @return string|false Extension name or false if validation failed */ - private function getExtensionFromResponse(Response $res, $imagePath) + private function getExtensionFromResponse(ResponseInterface $res, $imagePath) { - $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); + $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type'))); $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); // ok header doesn't have the extension, try a different way