aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/DownloadImages.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php57
1 files changed, 51 insertions, 6 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 0d330d2a..ed888cdb 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper;
5use Psr\Log\LoggerInterface; 5use Psr\Log\LoggerInterface;
6use Symfony\Component\DomCrawler\Crawler; 6use Symfony\Component\DomCrawler\Crawler;
7use GuzzleHttp\Client; 7use GuzzleHttp\Client;
8use GuzzleHttp\Message\Response;
8use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
9use Symfony\Component\Finder\Finder; 10use Symfony\Component\Finder\Finder;
10 11
@@ -54,7 +55,7 @@ class DownloadImages
54 $crawler = new Crawler($html); 55 $crawler = new Crawler($html);
55 $result = $crawler 56 $result = $crawler
56 ->filterXpath('//img') 57 ->filterXpath('//img')
57 ->extract(array('src')); 58 ->extract(['src']);
58 59
59 $relativePath = $this->getRelativePath($entryId); 60 $relativePath = $this->getRelativePath($entryId);
60 61
@@ -66,6 +67,11 @@ class DownloadImages
66 continue; 67 continue;
67 } 68 }
68 69
70 // if image contains "&" and we can't find it in the html it might be because it's encoded as &
71 if (false !== stripos($image, '&') && false === stripos($html, $image)) {
72 $image = str_replace('&', '&', $image);
73 }
74
69 $html = str_replace($image, $imagePath, $html); 75 $html = str_replace($image, $imagePath, $html);
70 } 76 }
71 77
@@ -111,13 +117,11 @@ class DownloadImages
111 return false; 117 return false;
112 } 118 }
113 119
114 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 120 $ext = $this->getExtensionFromResponse($res, $imagePath);
115 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 121 if (false === $res) {
116 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
117 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath);
118
119 return false; 122 return false;
120 } 123 }
124
121 $hashImage = hash('crc32', $absolutePath); 125 $hashImage = hash('crc32', $absolutePath);
122 $localPath = $folderPath.'/'.$hashImage.'.'.$ext; 126 $localPath = $folderPath.'/'.$hashImage.'.'.$ext;
123 127
@@ -232,4 +236,45 @@ class DownloadImages
232 236
233 return false; 237 return false;
234 } 238 }
239
240 /**
241 * Retrieve and validate the extension from the response of the url of the image.
242 *
243 * @param Response $res Guzzle Response
244 * @param string $imagePath Path from the src image from the content (used for log only)
245 *
246 * @return string|false Extension name or false if validation failed
247 */
248 private function getExtensionFromResponse(Response $res, $imagePath)
249 {
250 $ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
251 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
252
253 // ok header doesn't have the extension, try a different way
254 if (empty($ext)) {
255 $types = [
256 'jpeg' => "\xFF\xD8\xFF",
257 'gif' => 'GIF',
258 'png' => "\x89\x50\x4e\x47\x0d\x0a",
259 ];
260 $bytes = substr((string) $res->getBody(), 0, 8);
261
262 foreach ($types as $type => $header) {
263 if (0 === strpos($bytes, $header)) {
264 $ext = $type;
265 break;
266 }
267 }
268
269 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
270 }
271
272 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
273 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
274
275 return false;
276 }
277
278 return $ext;
279 }
235} 280}