diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 57 |
1 files changed, 51 insertions, 6 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 0d330d2a..ed888cdb 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper; | |||
5 | use Psr\Log\LoggerInterface; | 5 | use Psr\Log\LoggerInterface; |
6 | use Symfony\Component\DomCrawler\Crawler; | 6 | use Symfony\Component\DomCrawler\Crawler; |
7 | use GuzzleHttp\Client; | 7 | use GuzzleHttp\Client; |
8 | use GuzzleHttp\Message\Response; | ||
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
9 | use Symfony\Component\Finder\Finder; | 10 | use Symfony\Component\Finder\Finder; |
10 | 11 | ||
@@ -54,7 +55,7 @@ class DownloadImages | |||
54 | $crawler = new Crawler($html); | 55 | $crawler = new Crawler($html); |
55 | $result = $crawler | 56 | $result = $crawler |
56 | ->filterXpath('//img') | 57 | ->filterXpath('//img') |
57 | ->extract(array('src')); | 58 | ->extract(['src']); |
58 | 59 | ||
59 | $relativePath = $this->getRelativePath($entryId); | 60 | $relativePath = $this->getRelativePath($entryId); |
60 | 61 | ||
@@ -66,6 +67,11 @@ class DownloadImages | |||
66 | continue; | 67 | continue; |
67 | } | 68 | } |
68 | 69 | ||
70 | // if image contains "&" and we can't find it in the html it might be because it's encoded as & | ||
71 | if (false !== stripos($image, '&') && false === stripos($html, $image)) { | ||
72 | $image = str_replace('&', '&', $image); | ||
73 | } | ||
74 | |||
69 | $html = str_replace($image, $imagePath, $html); | 75 | $html = str_replace($image, $imagePath, $html); |
70 | } | 76 | } |
71 | 77 | ||
@@ -111,13 +117,11 @@ class DownloadImages | |||
111 | return false; | 117 | return false; |
112 | } | 118 | } |
113 | 119 | ||
114 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 120 | $ext = $this->getExtensionFromResponse($res, $imagePath); |
115 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 121 | if (false === $res) { |
116 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
117 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); | ||
118 | |||
119 | return false; | 122 | return false; |
120 | } | 123 | } |
124 | |||
121 | $hashImage = hash('crc32', $absolutePath); | 125 | $hashImage = hash('crc32', $absolutePath); |
122 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | 126 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; |
123 | 127 | ||
@@ -232,4 +236,45 @@ class DownloadImages | |||
232 | 236 | ||
233 | return false; | 237 | return false; |
234 | } | 238 | } |
239 | |||
240 | /** | ||
241 | * Retrieve and validate the extension from the response of the url of the image. | ||
242 | * | ||
243 | * @param Response $res Guzzle Response | ||
244 | * @param string $imagePath Path from the src image from the content (used for log only) | ||
245 | * | ||
246 | * @return string|false Extension name or false if validation failed | ||
247 | */ | ||
248 | private function getExtensionFromResponse(Response $res, $imagePath) | ||
249 | { | ||
250 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | ||
251 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | ||
252 | |||
253 | // ok header doesn't have the extension, try a different way | ||
254 | if (empty($ext)) { | ||
255 | $types = [ | ||
256 | 'jpeg' => "\xFF\xD8\xFF", | ||
257 | 'gif' => 'GIF', | ||
258 | 'png' => "\x89\x50\x4e\x47\x0d\x0a", | ||
259 | ]; | ||
260 | $bytes = substr((string) $res->getBody(), 0, 8); | ||
261 | |||
262 | foreach ($types as $type => $header) { | ||
263 | if (0 === strpos($bytes, $header)) { | ||
264 | $ext = $type; | ||
265 | break; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); | ||
270 | } | ||
271 | |||
272 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
273 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
274 | |||
275 | return false; | ||
276 | } | ||
277 | |||
278 | return $ext; | ||
279 | } | ||
235 | } | 280 | } |