From fcad69a427de7ce4f65cbf53bcf778e561959807 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Thu, 1 Jun 2017 22:50:33 +0200 Subject: Replace images with & MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Images with `&` in the path weren’t well replaced because they might be with `&` in the html instead. Replacing `&` with `&` fix the problem. --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 0d330d2a..f7c26a38 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -66,6 +66,12 @@ class DownloadImages continue; } + // if image contains "&"" and we can't find it in the html + // it might be because it's encoded as & + if (false !== stripos($image, '&') && false === stripos($html, $image)) { + $image = str_replace('&', '&', $image); + } + $html = str_replace($image, $imagePath, $html); } @@ -114,7 +120,7 @@ class DownloadImages $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); + $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); return false; } -- cgit v1.2.3 From 9bf7752f73ebfbfea0adbdb0d562a3cfa85039f3 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Thu, 1 Jun 2017 22:58:38 +0200 Subject: CS --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index f7c26a38..54e23a05 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -54,7 +54,7 @@ class DownloadImages $crawler = new Crawler($html); $result = $crawler ->filterXpath('//img') - ->extract(array('src')); + ->extract(['src']); $relativePath = $this->getRelativePath($entryId); @@ -66,8 +66,7 @@ class DownloadImages continue; } - // if image contains "&"" and we can't find it in the html - // it might be because it's encoded as & + // if image contains "&" and we can't find it in the html it might be because it's encoded as & if (false !== stripos($image, '&') && false === stripos($html, $image)) { $image = str_replace('&', '&', $image); } -- cgit v1.2.3 From 577c0b6dd82c421c377c37295c59dee147068132 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Mon, 5 Jun 2017 22:54:02 +0200 Subject: Use an alternative way to detect image MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When parsing content to retrieve images to save locally, we only check for the content-type of the image response. In some case, that value is empty. Now we’re also checking for the first few bytes of the content as an alternative to detect if it’s an image wallabag can handle. We might get higher image supports using that alternative method. --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 50 ++++++++++++++++++++--- 1 file changed, 45 insertions(+), 5 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 54e23a05..ed888cdb 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper; use Psr\Log\LoggerInterface; use Symfony\Component\DomCrawler\Crawler; use GuzzleHttp\Client; +use GuzzleHttp\Message\Response; use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; use Symfony\Component\Finder\Finder; @@ -116,13 +117,11 @@ class DownloadImages return false; } - $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); - $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); - if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); - + $ext = $this->getExtensionFromResponse($res, $imagePath); + if (false === $res) { return false; } + $hashImage = hash('crc32', $absolutePath); $localPath = $folderPath.'/'.$hashImage.'.'.$ext; @@ -237,4 +236,45 @@ class DownloadImages return false; } + + /** + * Retrieve and validate the extension from the response of the url of the image. + * + * @param Response $res Guzzle Response + * @param string $imagePath Path from the src image from the content (used for log only) + * + * @return string|false Extension name or false if validation failed + */ + private function getExtensionFromResponse(Response $res, $imagePath) + { + $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); + $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); + + // ok header doesn't have the extension, try a different way + if (empty($ext)) { + $types = [ + 'jpeg' => "\xFF\xD8\xFF", + 'gif' => 'GIF', + 'png' => "\x89\x50\x4e\x47\x0d\x0a", + ]; + $bytes = substr((string) $res->getBody(), 0, 8); + + foreach ($types as $type => $header) { + if (0 === strpos($bytes, $header)) { + $ext = $type; + break; + } + } + + $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); + } + + if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { + $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); + + return false; + } + + return $ext; + } } -- cgit v1.2.3 From f808b01692a835673f328d7221ba8c212caa9b61 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Sat, 1 Jul 2017 09:52:38 +0200 Subject: Add a real configuration for CS-Fixer --- src/Wallabag/CoreBundle/Helper/DownloadImages.php | 44 +++++++++++------------ 1 file changed, 22 insertions(+), 22 deletions(-) (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php') diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index ed888cdb..252ba57c 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php @@ -2,12 +2,12 @@ namespace Wallabag\CoreBundle\Helper; -use Psr\Log\LoggerInterface; -use Symfony\Component\DomCrawler\Crawler; use GuzzleHttp\Client; use GuzzleHttp\Message\Response; -use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; +use Psr\Log\LoggerInterface; +use Symfony\Component\DomCrawler\Crawler; use Symfony\Component\Finder\Finder; +use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; class DownloadImages { @@ -30,17 +30,6 @@ class DownloadImages $this->setFolder(); } - /** - * Setup base folder where all images are going to be saved. - */ - private function setFolder() - { - // if folder doesn't exist, attempt to create one and store the folder name in property $folder - if (!file_exists($this->baseFolder)) { - mkdir($this->baseFolder, 0755, true); - } - } - /** * Process the html and extract image from it, save them to local and return the updated html. * @@ -97,9 +86,9 @@ class DownloadImages $relativePath = $this->getRelativePath($entryId); } - $this->logger->debug('DownloadImages: working on image: '.$imagePath); + $this->logger->debug('DownloadImages: working on image: ' . $imagePath); - $folderPath = $this->baseFolder.'/'.$relativePath; + $folderPath = $this->baseFolder . '/' . $relativePath; // build image path $absolutePath = $this->getAbsoluteLink($url, $imagePath); @@ -123,7 +112,7 @@ class DownloadImages } $hashImage = hash('crc32', $absolutePath); - $localPath = $folderPath.'/'.$hashImage.'.'.$ext; + $localPath = $folderPath . '/' . $hashImage . '.' . $ext; try { $im = imagecreatefromstring($res->getBody()); @@ -156,7 +145,7 @@ class DownloadImages imagedestroy($im); - return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; + return $this->wallabagUrl . '/assets/images/' . $relativePath . '/' . $hashImage . '.' . $ext; } /** @@ -167,7 +156,7 @@ class DownloadImages public function removeImages($entryId) { $relativePath = $this->getRelativePath($entryId); - $folderPath = $this->baseFolder.'/'.$relativePath; + $folderPath = $this->baseFolder . '/' . $relativePath; $finder = new Finder(); $finder @@ -182,6 +171,17 @@ class DownloadImages @rmdir($folderPath); } + /** + * Setup base folder where all images are going to be saved. + */ + private function setFolder() + { + // if folder doesn't exist, attempt to create one and store the folder name in property $folder + if (!file_exists($this->baseFolder)) { + mkdir($this->baseFolder, 0755, true); + } + } + /** * Generate the folder where we are going to save images based on the entry url. * @@ -192,8 +192,8 @@ class DownloadImages private function getRelativePath($entryId) { $hashId = hash('crc32', $entryId); - $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; - $folderPath = $this->baseFolder.'/'.$relativePath; + $relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId; + $folderPath = $this->baseFolder . '/' . $relativePath; if (!file_exists($folderPath)) { mkdir($folderPath, 0777, true); @@ -270,7 +270,7 @@ class DownloadImages } if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { - $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); + $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); return false; } -- cgit v1.2.3