diff options
author | Jeremy Benoist <jeremy.benoist@gmail.com> | 2017-06-05 22:54:02 +0200 |
---|---|---|
committer | Jeremy Benoist <jeremy.benoist@gmail.com> | 2017-06-05 22:54:02 +0200 |
commit | 577c0b6dd82c421c377c37295c59dee147068132 (patch) | |
tree | e63b2d0a64b446934cd85cd89eb3611a6b386a46 | |
parent | a687c8d915276eee0c0494156700f7d0c0606735 (diff) | |
download | wallabag-577c0b6dd82c421c377c37295c59dee147068132.tar.gz wallabag-577c0b6dd82c421c377c37295c59dee147068132.tar.zst wallabag-577c0b6dd82c421c377c37295c59dee147068132.zip |
Use an alternative way to detect image
When parsing content to retrieve images to save locally, we only check for the content-type of the image response.
In some case, that value is empty.
Now we’re also checking for the first few bytes of the content as an alternative to detect if it’s an image wallabag can handle.
We might get higher image supports using that alternative method.
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 50 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | 25 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg | bin | 0 -> 354067 bytes |
3 files changed, 70 insertions, 5 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 54e23a05..ed888cdb 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper; | |||
5 | use Psr\Log\LoggerInterface; | 5 | use Psr\Log\LoggerInterface; |
6 | use Symfony\Component\DomCrawler\Crawler; | 6 | use Symfony\Component\DomCrawler\Crawler; |
7 | use GuzzleHttp\Client; | 7 | use GuzzleHttp\Client; |
8 | use GuzzleHttp\Message\Response; | ||
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
9 | use Symfony\Component\Finder\Finder; | 10 | use Symfony\Component\Finder\Finder; |
10 | 11 | ||
@@ -116,13 +117,11 @@ class DownloadImages | |||
116 | return false; | 117 | return false; |
117 | } | 118 | } |
118 | 119 | ||
119 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 120 | $ext = $this->getExtensionFromResponse($res, $imagePath); |
120 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 121 | if (false === $res) { |
121 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
122 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
123 | |||
124 | return false; | 122 | return false; |
125 | } | 123 | } |
124 | |||
126 | $hashImage = hash('crc32', $absolutePath); | 125 | $hashImage = hash('crc32', $absolutePath); |
127 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | 126 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; |
128 | 127 | ||
@@ -237,4 +236,45 @@ class DownloadImages | |||
237 | 236 | ||
238 | return false; | 237 | return false; |
239 | } | 238 | } |
239 | |||
240 | /** | ||
241 | * Retrieve and validate the extension from the response of the url of the image. | ||
242 | * | ||
243 | * @param Response $res Guzzle Response | ||
244 | * @param string $imagePath Path from the src image from the content (used for log only) | ||
245 | * | ||
246 | * @return string|false Extension name or false if validation failed | ||
247 | */ | ||
248 | private function getExtensionFromResponse(Response $res, $imagePath) | ||
249 | { | ||
250 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | ||
251 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | ||
252 | |||
253 | // ok header doesn't have the extension, try a different way | ||
254 | if (empty($ext)) { | ||
255 | $types = [ | ||
256 | 'jpeg' => "\xFF\xD8\xFF", | ||
257 | 'gif' => 'GIF', | ||
258 | 'png' => "\x89\x50\x4e\x47\x0d\x0a", | ||
259 | ]; | ||
260 | $bytes = substr((string) $res->getBody(), 0, 8); | ||
261 | |||
262 | foreach ($types as $type => $header) { | ||
263 | if (0 === strpos($bytes, $header)) { | ||
264 | $ext = $type; | ||
265 | break; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); | ||
270 | } | ||
271 | |||
272 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
273 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
274 | |||
275 | return false; | ||
276 | } | ||
277 | |||
278 | return $ext; | ||
279 | } | ||
240 | } | 280 | } |
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index 9125f8dc..c02f9658 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | |||
@@ -157,4 +157,29 @@ class DownloadImagesTest extends \PHPUnit_Framework_TestCase | |||
157 | 157 | ||
158 | $this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced'); | 158 | $this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced'); |
159 | } | 159 | } |
160 | |||
161 | public function testProcessRealImage() | ||
162 | { | ||
163 | $client = new Client(); | ||
164 | |||
165 | $mock = new Mock([ | ||
166 | new Response(200, ['content-type' => null], Stream::factory(file_get_contents(__DIR__.'/../fixtures/image-no-content-type.jpg'))), | ||
167 | ]); | ||
168 | |||
169 | $client->getEmitter()->attach($mock); | ||
170 | |||
171 | $logHandler = new TestHandler(); | ||
172 | $logger = new Logger('test', array($logHandler)); | ||
173 | |||
174 | $download = new DownloadImages($client, sys_get_temp_dir().'/wallabag_test', 'http://wallabag.io/', $logger); | ||
175 | |||
176 | $res = $download->processSingleImage( | ||
177 | 123, | ||
178 | 'https://cdn.theconversation.com/files/157200/article/width926/gsj2rjp2-1487348607.jpg', | ||
179 | 'https://theconversation.com/conversation-avec-gerald-bronner-ce-nest-pas-la-post-verite-qui-nous-menace-mais-lextension-de-notre-credulite-73089' | ||
180 | ); | ||
181 | |||
182 | $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); | ||
183 | $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); | ||
184 | } | ||
160 | } | 185 | } |
diff --git a/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg b/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg new file mode 100644 index 00000000..0c60e952 --- /dev/null +++ b/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg | |||
Binary files differ | |||