X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=src%2FWallabag%2FCoreBundle%2FHelper%2FDownloadImages.php;h=ed888cdb031ae52afd2b6624378c7927a0366e4d;hb=873f6b8e03079d11fab541aa5b0bc6f8fe2d645e;hp=004bb277515b1a436a761526a687c886b5f04082;hpb=48656e0eaac006a80f21e9aec8900747fe76283a;p=github%2Fwallabag%2Fwallabag.git diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 004bb277..ed888cdb 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -5,7 +5,9 @@ namespace Wallabag\CoreBundle\Helper; use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; use GuzzleHttp\Client; +use GuzzleHttp\Message\Response; use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; +use Symfony\Component\Finder\Finder; class DownloadImages { @@ -15,11 +17,13 @@ class DownloadImages private $baseFolder; private $logger; private $mimeGuesser; + private $wallabagUrl; - public function __construct(Client $client, $baseFolder, LoggerInterface $logger) + public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) { $this->client = $client; $this->baseFolder = $baseFolder; + $this->wallabagUrl = rtrim($wallabagUrl, '/'); $this->logger = $logger; $this->mimeGuesser = new MimeTypeExtensionGuesser(); @@ -33,35 +37,41 @@ class DownloadImages { // if folder doesn't exist, attempt to create one and store the folder name in property $folder if (!file_exists($this->baseFolder)) { - mkdir($this->baseFolder, 0777, true); + mkdir($this->baseFolder, 0755, true); } } /** * Process the html and extract image from it, save them to local and return the updated html. * + * @param int $entryId ID of the entry * @param string $html - * @param string $url Used as a base path for relative image and folder + * @param string $url Used as a base path for relative image and folder * * @return string */ - public function processHtml($html, $url) + public function processHtml($entryId, $html, $url) { $crawler = new Crawler($html); $result = $crawler ->filterXpath('//img') - ->extract(array('src')); + ->extract(['src']); - $relativePath = $this->getRelativePath($url); + $relativePath = $this->getRelativePath($entryId); // download and save the image to the folder foreach ($result as $image) { - $imagePath = $this->processSingleImage($image, $url, $relativePath); + $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); if (false === $imagePath) { continue; } + // if image contains "&" and we can't find it in the html it might be because it's encoded as & + if (false !== stripos($image, '&') && false === stripos($html, $image)) { + $image = str_replace('&', '&', $image); + } + $html = str_replace($image, $imagePath, $html); } @@ -74,24 +84,27 @@ class DownloadImages * - re-saved it (for security reason) * - return the new local path. * + * @param int $entryId ID of the entry * @param string $imagePath Path to the image to retrieve * @param string $url Url from where the image were found * @param string $relativePath Relative local path to saved the image * * @return string Relative url to access the image from the web */ - public function processSingleImage($imagePath, $url, $relativePath = null) + public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) { - if (null == $relativePath) { - $relativePath = $this->getRelativePath($url); + if (null === $relativePath) { + $relativePath = $this->getRelativePath($entryId); } + $this->logger->debug('DownloadImages: working on image: '.$imagePath); + $folderPath = $this->baseFolder.'/'.$relativePath; // build image path $absolutePath = $this->getAbsoluteLink($url, $imagePath); if (false === $absolutePath) { - $this->logger->log('error', 'Can not determine the absolute path for that image, skipping.'); + $this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.'); return false; } @@ -99,18 +112,16 @@ class DownloadImages try { $res = $this->client->get($absolutePath); } catch (\Exception $e) { - $this->logger->log('error', 'Can not retrieve image, skipping.', ['exception' => $e]); + $this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]); return false; } - $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); - $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); - if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->logger->log('error', 'Processed image with not allowed extension. Skipping '.$imagePath); - + $ext = $this->getExtensionFromResponse($res, $imagePath); + if (false === $res) { return false; } + $hashImage = hash('crc32', $absolutePath); $localPath = $folderPath.'/'.$hashImage.'.'.$ext; @@ -121,49 +132,74 @@ class DownloadImages } if (false === $im) { - $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); + $this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]); return false; } switch ($ext) { case 'gif': - $result = imagegif($im, $localPath); - $this->logger->log('debug', 'Re-creating gif'); + imagegif($im, $localPath); + $this->logger->debug('DownloadImages: Re-creating gif'); break; case 'jpeg': case 'jpg': - $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); - $this->logger->log('debug', 'Re-creating jpg'); + imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); + $this->logger->debug('DownloadImages: Re-creating jpg'); break; case 'png': - $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); - $this->logger->log('debug', 'Re-creating png'); + imagealphablending($im, false); + imagesavealpha($im, true); + imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); + $this->logger->debug('DownloadImages: Re-creating png'); } imagedestroy($im); - return '/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; + return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; + } + + /** + * Remove all images for the given entry id. + * + * @param int $entryId ID of the entry + */ + public function removeImages($entryId) + { + $relativePath = $this->getRelativePath($entryId); + $folderPath = $this->baseFolder.'/'.$relativePath; + + $finder = new Finder(); + $finder + ->files() + ->ignoreDotFiles(true) + ->in($folderPath); + + foreach ($finder as $file) { + @unlink($file->getRealPath()); + } + + @rmdir($folderPath); } /** * Generate the folder where we are going to save images based on the entry url. * - * @param string $url + * @param int $entryId ID of the entry * * @return string */ - private function getRelativePath($url) + private function getRelativePath($entryId) { - $hashUrl = hash('crc32', $url); - $relativePath = $hashUrl[0].'/'.$hashUrl[1].'/'.$hashUrl; + $hashId = hash('crc32', $entryId); + $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; $folderPath = $this->baseFolder.'/'.$relativePath; if (!file_exists($folderPath)) { mkdir($folderPath, 0777, true); } - $this->logger->log('debug', 'Folder used for that url', ['folder' => $folderPath, 'url' => $url]); + $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); return $relativePath; } @@ -196,8 +232,49 @@ class DownloadImages return $absolute->get_uri(); } - $this->logger->log('error', 'Can not make an absolute link', ['base' => $base, 'url' => $url]); + $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); return false; } + + /** + * Retrieve and validate the extension from the response of the url of the image. + * + * @param Response $res Guzzle Response + * @param string $imagePath Path from the src image from the content (used for log only) + * + * @return string|false Extension name or false if validation failed + */ + private function getExtensionFromResponse(Response $res, $imagePath) + { + $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); + $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); + + // ok header doesn't have the extension, try a different way + if (empty($ext)) { + $types = [ + 'jpeg' => "\xFF\xD8\xFF", + 'gif' => 'GIF', + 'png' => "\x89\x50\x4e\x47\x0d\x0a", + ]; + $bytes = substr((string) $res->getBody(), 0, 8); + + foreach ($types as $type => $header) { + if (0 === strpos($bytes, $header)) { + $ext = $type; + break; + } + } + + $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); + } + + if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { + $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); + + return false; + } + + return $ext; + } }