diff options
author | Jeremy Benoist <jeremy.benoist@gmail.com> | 2017-06-05 22:54:02 +0200 |
---|---|---|
committer | Jeremy Benoist <jeremy.benoist@gmail.com> | 2017-06-05 22:54:02 +0200 |
commit | 577c0b6dd82c421c377c37295c59dee147068132 (patch) | |
tree | e63b2d0a64b446934cd85cd89eb3611a6b386a46 /src/Wallabag | |
parent | a687c8d915276eee0c0494156700f7d0c0606735 (diff) | |
download | wallabag-577c0b6dd82c421c377c37295c59dee147068132.tar.gz wallabag-577c0b6dd82c421c377c37295c59dee147068132.tar.zst wallabag-577c0b6dd82c421c377c37295c59dee147068132.zip |
Use an alternative way to detect image
When parsing content to retrieve images to save locally, we only check for the content-type of the image response.
In some case, that value is empty.
Now we’re also checking for the first few bytes of the content as an alternative to detect if it’s an image wallabag can handle.
We might get higher image supports using that alternative method.
Diffstat (limited to 'src/Wallabag')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 50 |
1 files changed, 45 insertions, 5 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 54e23a05..ed888cdb 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper; | |||
5 | use Psr\Log\LoggerInterface; | 5 | use Psr\Log\LoggerInterface; |
6 | use Symfony\Component\DomCrawler\Crawler; | 6 | use Symfony\Component\DomCrawler\Crawler; |
7 | use GuzzleHttp\Client; | 7 | use GuzzleHttp\Client; |
8 | use GuzzleHttp\Message\Response; | ||
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
9 | use Symfony\Component\Finder\Finder; | 10 | use Symfony\Component\Finder\Finder; |
10 | 11 | ||
@@ -116,13 +117,11 @@ class DownloadImages | |||
116 | return false; | 117 | return false; |
117 | } | 118 | } |
118 | 119 | ||
119 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 120 | $ext = $this->getExtensionFromResponse($res, $imagePath); |
120 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 121 | if (false === $res) { |
121 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
122 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
123 | |||
124 | return false; | 122 | return false; |
125 | } | 123 | } |
124 | |||
126 | $hashImage = hash('crc32', $absolutePath); | 125 | $hashImage = hash('crc32', $absolutePath); |
127 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | 126 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; |
128 | 127 | ||
@@ -237,4 +236,45 @@ class DownloadImages | |||
237 | 236 | ||
238 | return false; | 237 | return false; |
239 | } | 238 | } |
239 | |||
240 | /** | ||
241 | * Retrieve and validate the extension from the response of the url of the image. | ||
242 | * | ||
243 | * @param Response $res Guzzle Response | ||
244 | * @param string $imagePath Path from the src image from the content (used for log only) | ||
245 | * | ||
246 | * @return string|false Extension name or false if validation failed | ||
247 | */ | ||
248 | private function getExtensionFromResponse(Response $res, $imagePath) | ||
249 | { | ||
250 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | ||
251 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | ||
252 | |||
253 | // ok header doesn't have the extension, try a different way | ||
254 | if (empty($ext)) { | ||
255 | $types = [ | ||
256 | 'jpeg' => "\xFF\xD8\xFF", | ||
257 | 'gif' => 'GIF', | ||
258 | 'png' => "\x89\x50\x4e\x47\x0d\x0a", | ||
259 | ]; | ||
260 | $bytes = substr((string) $res->getBody(), 0, 8); | ||
261 | |||
262 | foreach ($types as $type => $header) { | ||
263 | if (0 === strpos($bytes, $header)) { | ||
264 | $ext = $type; | ||
265 | break; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); | ||
270 | } | ||
271 | |||
272 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
273 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
274 | |||
275 | return false; | ||
276 | } | ||
277 | |||
278 | return $ext; | ||
279 | } | ||
240 | } | 280 | } |