}
/**
- * Process the html and extract image from it, save them to local and return the updated html.
+ * Process the html and extract images URLs from it.
*
- * @param int $entryId ID of the entry
* @param string $html
- * @param string $url Used as a base path for relative image and folder
*
- * @return string
+ * @return string[]
*/
- public function processHtml($entryId, $html, $url)
+ public static function extractImagesUrlsFromHtml($html)
{
$crawler = new Crawler($html);
$imagesCrawler = $crawler
->filterXpath('//img');
$imagesUrls = $imagesCrawler
->extract(['src']);
- $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
- $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
+ $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
+
+ return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
+ }
+
+ /**
+ * Process the html and extract image from it, save them to local and return the updated html.
+ *
+ * @param int $entryId ID of the entry
+ * @param string $html
+ * @param string $url Used as a base path for relative image and folder
+ *
+ * @return string
+ */
+ public function processHtml($entryId, $html, $url)
+ {
+ $imagesUrls = self::extractImagesUrlsFromHtml($html);
$relativePath = $this->getRelativePath($entryId);
*/
public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
{
+ if (null === $imagePath) {
+ return false;
+ }
+
if (null === $relativePath) {
$relativePath = $this->getRelativePath($entryId);
}
switch ($ext) {
case 'gif':
- imagegif($im, $localPath);
+ // use Imagick if available to keep GIF animation
+ if (class_exists('\\Imagick')) {
+ try {
+ $imagick = new \Imagick();
+ $imagick->readImageBlob($res->getBody());
+ $imagick->setImageFormat('gif');
+ $imagick->writeImages($localPath, true);
+ } catch (\Exception $e) {
+ // if Imagick fail, fallback to the default solution
+ imagegif($im, $localPath);
+ }
+ } else {
+ imagegif($im, $localPath);
+ }
+
$this->logger->debug('DownloadImages: Re-creating gif');
break;
case 'jpeg':
*
* @return array An array of urls
*/
- protected function getSrcsetUrls(Crawler $imagesCrawler)
+ private static function getSrcsetUrls(Crawler $imagesCrawler)
{
$urls = [];
$iterator = $imagesCrawler
while ($iterator->valid()) {
$srcsetAttribute = $iterator->current()->getAttribute('srcset');
if ('' !== $srcsetAttribute) {
- $srcset = array_map('trim', explode(',', $srcsetAttribute));
+ // Couldn't start with " OR ' OR a white space
+ // Could be one or more white space
+ // Must be one or more digits followed by w OR x
+ $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
+ preg_match_all($pattern, $srcsetAttribute, $matches);
+ $srcset = \call_user_func_array('array_merge', $matches);
$srcsetUrls = array_map(function ($src) {
- return explode(' ', $src)[0];
+ return trim(explode(' ', $src, 2)[0]);
}, $srcset);
$urls = array_merge($srcsetUrls, $urls);
}
$this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
}
- if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
+ if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
$this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath);
return false;