X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FDownloadImages.php;h=9a7e9828594492bca5bb8286e82e7f31da8f3763;hb=9f0957b831622ee577fa7d8f92ec0df6f3a8e274;hp=252ba57c517f94dae8a30674829baa73a3074d80;hpb=2490f61dca635026a3eb9b5e9b6978b1981b1172;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 252ba57c..9a7e9828 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -42,14 +42,17 @@ class DownloadImages public function processHtml($entryId, $html, $url) { $crawler = new Crawler($html); - $result = $crawler - ->filterXpath('//img') + $imagesCrawler = $crawler + ->filterXpath('//img'); + $imagesUrls = $imagesCrawler ->extract(['src']); + $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); + $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); $relativePath = $this->getRelativePath($entryId); // download and save the image to the folder - foreach ($result as $image) { + foreach ($imagesUrls as $image) { $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); if (false === $imagePath) { @@ -82,6 +85,10 @@ class DownloadImages */ public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) { + if (null === $imagePath) { + return false; + } + if (null === $relativePath) { $relativePath = $this->getRelativePath($entryId); } @@ -128,7 +135,21 @@ class DownloadImages switch ($ext) { case 'gif': - imagegif($im, $localPath); + // use Imagick if available to keep GIF animation + if (class_exists('\\Imagick')) { + try { + $imagick = new \Imagick(); + $imagick->readImageBlob($res->getBody()); + $imagick->setImageFormat('gif'); + $imagick->writeImages($localPath, true); + } catch (\Exception $e) { + // if Imagick fail, fallback to the default solution + imagegif($im, $localPath); + } + } else { + imagegif($im, $localPath); + } + $this->logger->debug('DownloadImages: Re-creating gif'); break; case 'jpeg': @@ -171,6 +192,38 @@ class DownloadImages @rmdir($folderPath); } + /** + * Get images urls from the srcset image attribute. + * + * @param Crawler $imagesCrawler + * + * @return array An array of urls + */ + private function getSrcsetUrls(Crawler $imagesCrawler) + { + $urls = []; + $iterator = $imagesCrawler + ->getIterator(); + while ($iterator->valid()) { + $srcsetAttribute = $iterator->current()->getAttribute('srcset'); + if ('' !== $srcsetAttribute) { + // Couldn't start with " OR ' OR a white space + // Could be one or more white space + // Must be one or more digits followed by w OR x + $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; + preg_match_all($pattern, $srcsetAttribute, $matches); + $srcset = \call_user_func_array('array_merge', $matches); + $srcsetUrls = array_map(function ($src) { + return trim(explode(' ', $src, 2)[0]); + }, $srcset); + $urls = array_merge($srcsetUrls, $urls); + } + $iterator->next(); + } + + return $urls; + } + /** * Setup base folder where all images are going to be saved. */ @@ -269,7 +322,7 @@ class DownloadImages $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); } - if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { + if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); return false;