diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 97 |
1 files changed, 71 insertions, 26 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 0d330d2a..252ba57c 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -2,11 +2,12 @@ | |||
2 | 2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | 3 | namespace Wallabag\CoreBundle\Helper; |
4 | 4 | ||
5 | use GuzzleHttp\Client; | ||
6 | use GuzzleHttp\Message\Response; | ||
5 | use Psr\Log\LoggerInterface; | 7 | use Psr\Log\LoggerInterface; |
6 | use Symfony\Component\DomCrawler\Crawler; | 8 | use Symfony\Component\DomCrawler\Crawler; |
7 | use GuzzleHttp\Client; | ||
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | ||
9 | use Symfony\Component\Finder\Finder; | 9 | use Symfony\Component\Finder\Finder; |
10 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | ||
10 | 11 | ||
11 | class DownloadImages | 12 | class DownloadImages |
12 | { | 13 | { |
@@ -30,17 +31,6 @@ class DownloadImages | |||
30 | } | 31 | } |
31 | 32 | ||
32 | /** | 33 | /** |
33 | * Setup base folder where all images are going to be saved. | ||
34 | */ | ||
35 | private function setFolder() | ||
36 | { | ||
37 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder | ||
38 | if (!file_exists($this->baseFolder)) { | ||
39 | mkdir($this->baseFolder, 0755, true); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * Process the html and extract image from it, save them to local and return the updated html. | 34 | * Process the html and extract image from it, save them to local and return the updated html. |
45 | * | 35 | * |
46 | * @param int $entryId ID of the entry | 36 | * @param int $entryId ID of the entry |
@@ -54,7 +44,7 @@ class DownloadImages | |||
54 | $crawler = new Crawler($html); | 44 | $crawler = new Crawler($html); |
55 | $result = $crawler | 45 | $result = $crawler |
56 | ->filterXpath('//img') | 46 | ->filterXpath('//img') |
57 | ->extract(array('src')); | 47 | ->extract(['src']); |
58 | 48 | ||
59 | $relativePath = $this->getRelativePath($entryId); | 49 | $relativePath = $this->getRelativePath($entryId); |
60 | 50 | ||
@@ -66,6 +56,11 @@ class DownloadImages | |||
66 | continue; | 56 | continue; |
67 | } | 57 | } |
68 | 58 | ||
59 | // if image contains "&" and we can't find it in the html it might be because it's encoded as & | ||
60 | if (false !== stripos($image, '&') && false === stripos($html, $image)) { | ||
61 | $image = str_replace('&', '&', $image); | ||
62 | } | ||
63 | |||
69 | $html = str_replace($image, $imagePath, $html); | 64 | $html = str_replace($image, $imagePath, $html); |
70 | } | 65 | } |
71 | 66 | ||
@@ -91,9 +86,9 @@ class DownloadImages | |||
91 | $relativePath = $this->getRelativePath($entryId); | 86 | $relativePath = $this->getRelativePath($entryId); |
92 | } | 87 | } |
93 | 88 | ||
94 | $this->logger->debug('DownloadImages: working on image: '.$imagePath); | 89 | $this->logger->debug('DownloadImages: working on image: ' . $imagePath); |
95 | 90 | ||
96 | $folderPath = $this->baseFolder.'/'.$relativePath; | 91 | $folderPath = $this->baseFolder . '/' . $relativePath; |
97 | 92 | ||
98 | // build image path | 93 | // build image path |
99 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); | 94 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); |
@@ -111,15 +106,13 @@ class DownloadImages | |||
111 | return false; | 106 | return false; |
112 | } | 107 | } |
113 | 108 | ||
114 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 109 | $ext = $this->getExtensionFromResponse($res, $imagePath); |
115 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 110 | if (false === $res) { |
116 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
117 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); | ||
118 | |||
119 | return false; | 111 | return false; |
120 | } | 112 | } |
113 | |||
121 | $hashImage = hash('crc32', $absolutePath); | 114 | $hashImage = hash('crc32', $absolutePath); |
122 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | 115 | $localPath = $folderPath . '/' . $hashImage . '.' . $ext; |
123 | 116 | ||
124 | try { | 117 | try { |
125 | $im = imagecreatefromstring($res->getBody()); | 118 | $im = imagecreatefromstring($res->getBody()); |
@@ -152,7 +145,7 @@ class DownloadImages | |||
152 | 145 | ||
153 | imagedestroy($im); | 146 | imagedestroy($im); |
154 | 147 | ||
155 | return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; | 148 | return $this->wallabagUrl . '/assets/images/' . $relativePath . '/' . $hashImage . '.' . $ext; |
156 | } | 149 | } |
157 | 150 | ||
158 | /** | 151 | /** |
@@ -163,7 +156,7 @@ class DownloadImages | |||
163 | public function removeImages($entryId) | 156 | public function removeImages($entryId) |
164 | { | 157 | { |
165 | $relativePath = $this->getRelativePath($entryId); | 158 | $relativePath = $this->getRelativePath($entryId); |
166 | $folderPath = $this->baseFolder.'/'.$relativePath; | 159 | $folderPath = $this->baseFolder . '/' . $relativePath; |
167 | 160 | ||
168 | $finder = new Finder(); | 161 | $finder = new Finder(); |
169 | $finder | 162 | $finder |
@@ -179,6 +172,17 @@ class DownloadImages | |||
179 | } | 172 | } |
180 | 173 | ||
181 | /** | 174 | /** |
175 | * Setup base folder where all images are going to be saved. | ||
176 | */ | ||
177 | private function setFolder() | ||
178 | { | ||
179 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder | ||
180 | if (!file_exists($this->baseFolder)) { | ||
181 | mkdir($this->baseFolder, 0755, true); | ||
182 | } | ||
183 | } | ||
184 | |||
185 | /** | ||
182 | * Generate the folder where we are going to save images based on the entry url. | 186 | * Generate the folder where we are going to save images based on the entry url. |
183 | * | 187 | * |
184 | * @param int $entryId ID of the entry | 188 | * @param int $entryId ID of the entry |
@@ -188,8 +192,8 @@ class DownloadImages | |||
188 | private function getRelativePath($entryId) | 192 | private function getRelativePath($entryId) |
189 | { | 193 | { |
190 | $hashId = hash('crc32', $entryId); | 194 | $hashId = hash('crc32', $entryId); |
191 | $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; | 195 | $relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId; |
192 | $folderPath = $this->baseFolder.'/'.$relativePath; | 196 | $folderPath = $this->baseFolder . '/' . $relativePath; |
193 | 197 | ||
194 | if (!file_exists($folderPath)) { | 198 | if (!file_exists($folderPath)) { |
195 | mkdir($folderPath, 0777, true); | 199 | mkdir($folderPath, 0777, true); |
@@ -232,4 +236,45 @@ class DownloadImages | |||
232 | 236 | ||
233 | return false; | 237 | return false; |
234 | } | 238 | } |
239 | |||
240 | /** | ||
241 | * Retrieve and validate the extension from the response of the url of the image. | ||
242 | * | ||
243 | * @param Response $res Guzzle Response | ||
244 | * @param string $imagePath Path from the src image from the content (used for log only) | ||
245 | * | ||
246 | * @return string|false Extension name or false if validation failed | ||
247 | */ | ||
248 | private function getExtensionFromResponse(Response $res, $imagePath) | ||
249 | { | ||
250 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | ||
251 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | ||
252 | |||
253 | // ok header doesn't have the extension, try a different way | ||
254 | if (empty($ext)) { | ||
255 | $types = [ | ||
256 | 'jpeg' => "\xFF\xD8\xFF", | ||
257 | 'gif' => 'GIF', | ||
258 | 'png' => "\x89\x50\x4e\x47\x0d\x0a", | ||
259 | ]; | ||
260 | $bytes = substr((string) $res->getBody(), 0, 8); | ||
261 | |||
262 | foreach ($types as $type => $header) { | ||
263 | if (0 === strpos($bytes, $header)) { | ||
264 | $ext = $type; | ||
265 | break; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); | ||
270 | } | ||
271 | |||
272 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
273 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); | ||
274 | |||
275 | return false; | ||
276 | } | ||
277 | |||
278 | return $ext; | ||
279 | } | ||
235 | } | 280 | } |