X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FDownloadImages.php;h=c1645e45afe9eb0d15335a7684de5e7be63ffcd7;hb=5c0701ba41fd64ba471addb4a84af062277ab559;hp=f91cdf5ebe005f63ea523271212bc035afb8a20e;hpb=9216bab8c9ea3cea60c0a6fae1ffaea212a8932e;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index f91cdf5e..c1645e45 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -31,23 +31,36 @@ class DownloadImages } /** - * Process the html and extract image from it, save them to local and return the updated html. + * Process the html and extract images URLs from it. * - * @param int $entryId ID of the entry * @param string $html - * @param string $url Used as a base path for relative image and folder * - * @return string + * @return string[] */ - public function processHtml($entryId, $html, $url) + public static function extractImagesUrlsFromHtml($html) { $crawler = new Crawler($html); $imagesCrawler = $crawler ->filterXpath('//img'); $imagesUrls = $imagesCrawler ->extract(['src']); - $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); - $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); + + return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); + } + + /** + * Process the html and extract image from it, save them to local and return the updated html. + * + * @param int $entryId ID of the entry + * @param string $html + * @param string $url Used as a base path for relative image and folder + * + * @return string + */ + public function processHtml($entryId, $html, $url) + { + $imagesUrls = self::extractImagesUrlsFromHtml($html); $relativePath = $this->getRelativePath($entryId); @@ -135,7 +148,21 @@ class DownloadImages switch ($ext) { case 'gif': - imagegif($im, $localPath); + // use Imagick if available to keep GIF animation + if (class_exists('\\Imagick')) { + try { + $imagick = new \Imagick(); + $imagick->readImageBlob($res->getBody()); + $imagick->setImageFormat('gif'); + $imagick->writeImages($localPath, true); + } catch (\Exception $e) { + // if Imagick fail, fallback to the default solution + imagegif($im, $localPath); + } + } else { + imagegif($im, $localPath); + } + $this->logger->debug('DownloadImages: Re-creating gif'); break; case 'jpeg': @@ -185,7 +212,7 @@ class DownloadImages * * @return array An array of urls */ - protected function getSrcsetUrls(Crawler $imagesCrawler) + private static function getSrcsetUrls(Crawler $imagesCrawler) { $urls = []; $iterator = $imagesCrawler @@ -193,9 +220,14 @@ class DownloadImages while ($iterator->valid()) { $srcsetAttribute = $iterator->current()->getAttribute('srcset'); if ('' !== $srcsetAttribute) { - $srcset = array_map('trim', explode(',', $srcsetAttribute)); + // Couldn't start with " OR ' OR a white space + // Could be one or more white space + // Must be one or more digits followed by w OR x + $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; + preg_match_all($pattern, $srcsetAttribute, $matches); + $srcset = \call_user_func_array('array_merge', $matches); $srcsetUrls = array_map(function ($src) { - return explode(' ', $src)[0]; + return trim(explode(' ', $src, 2)[0]); }, $srcset); $urls = array_merge($srcsetUrls, $urls); } @@ -303,7 +335,7 @@ class DownloadImages $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); } - if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { + if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); return false;