aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/DownloadImages.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php97
1 files changed, 71 insertions, 26 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 0d330d2a..252ba57c 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,11 +2,12 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Client;
6use GuzzleHttp\Message\Response;
5use Psr\Log\LoggerInterface; 7use Psr\Log\LoggerInterface;
6use Symfony\Component\DomCrawler\Crawler; 8use Symfony\Component\DomCrawler\Crawler;
7use GuzzleHttp\Client;
8use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
9use Symfony\Component\Finder\Finder; 9use Symfony\Component\Finder\Finder;
10use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
10 11
11class DownloadImages 12class DownloadImages
12{ 13{
@@ -30,17 +31,6 @@ class DownloadImages
30 } 31 }
31 32
32 /** 33 /**
33 * Setup base folder where all images are going to be saved.
34 */
35 private function setFolder()
36 {
37 // if folder doesn't exist, attempt to create one and store the folder name in property $folder
38 if (!file_exists($this->baseFolder)) {
39 mkdir($this->baseFolder, 0755, true);
40 }
41 }
42
43 /**
44 * Process the html and extract image from it, save them to local and return the updated html. 34 * Process the html and extract image from it, save them to local and return the updated html.
45 * 35 *
46 * @param int $entryId ID of the entry 36 * @param int $entryId ID of the entry
@@ -54,7 +44,7 @@ class DownloadImages
54 $crawler = new Crawler($html); 44 $crawler = new Crawler($html);
55 $result = $crawler 45 $result = $crawler
56 ->filterXpath('//img') 46 ->filterXpath('//img')
57 ->extract(array('src')); 47 ->extract(['src']);
58 48
59 $relativePath = $this->getRelativePath($entryId); 49 $relativePath = $this->getRelativePath($entryId);
60 50
@@ -66,6 +56,11 @@ class DownloadImages
66 continue; 56 continue;
67 } 57 }
68 58
59 // if image contains "&" and we can't find it in the html it might be because it's encoded as &
60 if (false !== stripos($image, '&') && false === stripos($html, $image)) {
61 $image = str_replace('&', '&', $image);
62 }
63
69 $html = str_replace($image, $imagePath, $html); 64 $html = str_replace($image, $imagePath, $html);
70 } 65 }
71 66
@@ -91,9 +86,9 @@ class DownloadImages
91 $relativePath = $this->getRelativePath($entryId); 86 $relativePath = $this->getRelativePath($entryId);
92 } 87 }
93 88
94 $this->logger->debug('DownloadImages: working on image: '.$imagePath); 89 $this->logger->debug('DownloadImages: working on image: ' . $imagePath);
95 90
96 $folderPath = $this->baseFolder.'/'.$relativePath; 91 $folderPath = $this->baseFolder . '/' . $relativePath;
97 92
98 // build image path 93 // build image path
99 $absolutePath = $this->getAbsoluteLink($url, $imagePath); 94 $absolutePath = $this->getAbsoluteLink($url, $imagePath);
@@ -111,15 +106,13 @@ class DownloadImages
111 return false; 106 return false;
112 } 107 }
113 108
114 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 109 $ext = $this->getExtensionFromResponse($res, $imagePath);
115 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 110 if (false === $res) {
116 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
117 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath);
118
119 return false; 111 return false;
120 } 112 }
113
121 $hashImage = hash('crc32', $absolutePath); 114 $hashImage = hash('crc32', $absolutePath);
122 $localPath = $folderPath.'/'.$hashImage.'.'.$ext; 115 $localPath = $folderPath . '/' . $hashImage . '.' . $ext;
123 116
124 try { 117 try {
125 $im = imagecreatefromstring($res->getBody()); 118 $im = imagecreatefromstring($res->getBody());
@@ -152,7 +145,7 @@ class DownloadImages
152 145
153 imagedestroy($im); 146 imagedestroy($im);
154 147
155 return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; 148 return $this->wallabagUrl . '/assets/images/' . $relativePath . '/' . $hashImage . '.' . $ext;
156 } 149 }
157 150
158 /** 151 /**
@@ -163,7 +156,7 @@ class DownloadImages
163 public function removeImages($entryId) 156 public function removeImages($entryId)
164 { 157 {
165 $relativePath = $this->getRelativePath($entryId); 158 $relativePath = $this->getRelativePath($entryId);
166 $folderPath = $this->baseFolder.'/'.$relativePath; 159 $folderPath = $this->baseFolder . '/' . $relativePath;
167 160
168 $finder = new Finder(); 161 $finder = new Finder();
169 $finder 162 $finder
@@ -179,6 +172,17 @@ class DownloadImages
179 } 172 }
180 173
181 /** 174 /**
175 * Setup base folder where all images are going to be saved.
176 */
177 private function setFolder()
178 {
179 // if folder doesn't exist, attempt to create one and store the folder name in property $folder
180 if (!file_exists($this->baseFolder)) {
181 mkdir($this->baseFolder, 0755, true);
182 }
183 }
184
185 /**
182 * Generate the folder where we are going to save images based on the entry url. 186 * Generate the folder where we are going to save images based on the entry url.
183 * 187 *
184 * @param int $entryId ID of the entry 188 * @param int $entryId ID of the entry
@@ -188,8 +192,8 @@ class DownloadImages
188 private function getRelativePath($entryId) 192 private function getRelativePath($entryId)
189 { 193 {
190 $hashId = hash('crc32', $entryId); 194 $hashId = hash('crc32', $entryId);
191 $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; 195 $relativePath = $hashId[0] . '/' . $hashId[1] . '/' . $hashId;
192 $folderPath = $this->baseFolder.'/'.$relativePath; 196 $folderPath = $this->baseFolder . '/' . $relativePath;
193 197
194 if (!file_exists($folderPath)) { 198 if (!file_exists($folderPath)) {
195 mkdir($folderPath, 0777, true); 199 mkdir($folderPath, 0777, true);
@@ -232,4 +236,45 @@ class DownloadImages
232 236
233 return false; 237 return false;
234 } 238 }
239
240 /**
241 * Retrieve and validate the extension from the response of the url of the image.
242 *
243 * @param Response $res Guzzle Response
244 * @param string $imagePath Path from the src image from the content (used for log only)
245 *
246 * @return string|false Extension name or false if validation failed
247 */
248 private function getExtensionFromResponse(Response $res, $imagePath)
249 {
250 $ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
251 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
252
253 // ok header doesn't have the extension, try a different way
254 if (empty($ext)) {
255 $types = [
256 'jpeg' => "\xFF\xD8\xFF",
257 'gif' => 'GIF',
258 'png' => "\x89\x50\x4e\x47\x0d\x0a",
259 ];
260 $bytes = substr((string) $res->getBody(), 0, 8);
261
262 foreach ($types as $type => $header) {
263 if (0 === strpos($bytes, $header)) {
264 $ext = $type;
265 break;
266 }
267 }
268
269 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
270 }
271
272 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
273 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath);
274
275 return false;
276 }
277
278 return $ext;
279 }
235} 280}