aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorJérémy Benoist <j0k3r@users.noreply.github.com>2017-06-07 13:41:25 +0200
committerGitHub <noreply@github.com>2017-06-07 13:41:25 +0200
commit4e4a5b534ff241f25c35fad24c9c79eb12f4adde (patch)
treee17c4eadbb91bfe2f683d3772e2dfc90c950486a
parenta3f16a5685aecaa4f7d513983e866f6793548845 (diff)
parent577c0b6dd82c421c377c37295c59dee147068132 (diff)
downloadwallabag-4e4a5b534ff241f25c35fad24c9c79eb12f4adde.tar.gz
wallabag-4e4a5b534ff241f25c35fad24c9c79eb12f4adde.tar.zst
wallabag-4e4a5b534ff241f25c35fad24c9c79eb12f4adde.zip
Merge pull request #3184 from wallabag/better-way-image-extension
Use an alternative way to detect images
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php50
-rw-r--r--tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php25
-rw-r--r--tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpgbin0 -> 354067 bytes
3 files changed, 70 insertions, 5 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 54e23a05..ed888cdb 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper;
5use Psr\Log\LoggerInterface; 5use Psr\Log\LoggerInterface;
6use Symfony\Component\DomCrawler\Crawler; 6use Symfony\Component\DomCrawler\Crawler;
7use GuzzleHttp\Client; 7use GuzzleHttp\Client;
8use GuzzleHttp\Message\Response;
8use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
9use Symfony\Component\Finder\Finder; 10use Symfony\Component\Finder\Finder;
10 11
@@ -116,13 +117,11 @@ class DownloadImages
116 return false; 117 return false;
117 } 118 }
118 119
119 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 120 $ext = $this->getExtensionFromResponse($res, $imagePath);
120 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 121 if (false === $res) {
121 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
122 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
123
124 return false; 122 return false;
125 } 123 }
124
126 $hashImage = hash('crc32', $absolutePath); 125 $hashImage = hash('crc32', $absolutePath);
127 $localPath = $folderPath.'/'.$hashImage.'.'.$ext; 126 $localPath = $folderPath.'/'.$hashImage.'.'.$ext;
128 127
@@ -237,4 +236,45 @@ class DownloadImages
237 236
238 return false; 237 return false;
239 } 238 }
239
240 /**
241 * Retrieve and validate the extension from the response of the url of the image.
242 *
243 * @param Response $res Guzzle Response
244 * @param string $imagePath Path from the src image from the content (used for log only)
245 *
246 * @return string|false Extension name or false if validation failed
247 */
248 private function getExtensionFromResponse(Response $res, $imagePath)
249 {
250 $ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
251 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
252
253 // ok header doesn't have the extension, try a different way
254 if (empty($ext)) {
255 $types = [
256 'jpeg' => "\xFF\xD8\xFF",
257 'gif' => 'GIF',
258 'png' => "\x89\x50\x4e\x47\x0d\x0a",
259 ];
260 $bytes = substr((string) $res->getBody(), 0, 8);
261
262 foreach ($types as $type => $header) {
263 if (0 === strpos($bytes, $header)) {
264 $ext = $type;
265 break;
266 }
267 }
268
269 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
270 }
271
272 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
273 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
274
275 return false;
276 }
277
278 return $ext;
279 }
240} 280}
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
index 9125f8dc..c02f9658 100644
--- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
@@ -157,4 +157,29 @@ class DownloadImagesTest extends \PHPUnit_Framework_TestCase
157 157
158 $this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced'); 158 $this->assertFalse($res, 'Absolute image can not be determined, so it will not be replaced');
159 } 159 }
160
161 public function testProcessRealImage()
162 {
163 $client = new Client();
164
165 $mock = new Mock([
166 new Response(200, ['content-type' => null], Stream::factory(file_get_contents(__DIR__.'/../fixtures/image-no-content-type.jpg'))),
167 ]);
168
169 $client->getEmitter()->attach($mock);
170
171 $logHandler = new TestHandler();
172 $logger = new Logger('test', array($logHandler));
173
174 $download = new DownloadImages($client, sys_get_temp_dir().'/wallabag_test', 'http://wallabag.io/', $logger);
175
176 $res = $download->processSingleImage(
177 123,
178 'https://cdn.theconversation.com/files/157200/article/width926/gsj2rjp2-1487348607.jpg',
179 'https://theconversation.com/conversation-avec-gerald-bronner-ce-nest-pas-la-post-verite-qui-nous-menace-mais-lextension-de-notre-credulite-73089'
180 );
181
182 $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
183 $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
184 }
160} 185}
diff --git a/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg b/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg
new file mode 100644
index 00000000..0c60e952
--- /dev/null
+++ b/tests/Wallabag/CoreBundle/fixtures/image-no-content-type.jpg
Binary files differ