diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 84 |
1 files changed, 51 insertions, 33 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index e7982c56..c5298236 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -6,6 +6,7 @@ use Psr\Log\LoggerInterface; | |||
6 | use Symfony\Component\DomCrawler\Crawler; | 6 | use Symfony\Component\DomCrawler\Crawler; |
7 | use GuzzleHttp\Client; | 7 | use GuzzleHttp\Client; |
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
9 | use Symfony\Component\Finder\Finder; | ||
9 | 10 | ||
10 | class DownloadImages | 11 | class DownloadImages |
11 | { | 12 | { |
@@ -17,10 +18,11 @@ class DownloadImages | |||
17 | private $mimeGuesser; | 18 | private $mimeGuesser; |
18 | private $wallabagUrl; | 19 | private $wallabagUrl; |
19 | 20 | ||
20 | public function __construct(Client $client, $baseFolder, LoggerInterface $logger) | 21 | public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) |
21 | { | 22 | { |
22 | $this->client = $client; | 23 | $this->client = $client; |
23 | $this->baseFolder = $baseFolder; | 24 | $this->baseFolder = $baseFolder; |
25 | $this->wallabagUrl = rtrim($wallabagUrl, '/'); | ||
24 | $this->logger = $logger; | 26 | $this->logger = $logger; |
25 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); | 27 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
26 | 28 | ||
@@ -28,17 +30,6 @@ class DownloadImages | |||
28 | } | 30 | } |
29 | 31 | ||
30 | /** | 32 | /** |
31 | * Since we can't inject CraueConfig service because it'll generate a circular reference when injected in the subscriber | ||
32 | * we use a different way to inject the current wallabag url. | ||
33 | * | ||
34 | * @param string $url Usually from `$config->get('wallabag_url')` | ||
35 | */ | ||
36 | public function setWallabagUrl($url) | ||
37 | { | ||
38 | $this->wallabagUrl = rtrim($url, '/'); | ||
39 | } | ||
40 | |||
41 | /** | ||
42 | * Setup base folder where all images are going to be saved. | 33 | * Setup base folder where all images are going to be saved. |
43 | */ | 34 | */ |
44 | private function setFolder() | 35 | private function setFolder() |
@@ -52,23 +43,24 @@ class DownloadImages | |||
52 | /** | 43 | /** |
53 | * Process the html and extract image from it, save them to local and return the updated html. | 44 | * Process the html and extract image from it, save them to local and return the updated html. |
54 | * | 45 | * |
46 | * @param int $entryId ID of the entry | ||
55 | * @param string $html | 47 | * @param string $html |
56 | * @param string $url Used as a base path for relative image and folder | 48 | * @param string $url Used as a base path for relative image and folder |
57 | * | 49 | * |
58 | * @return string | 50 | * @return string |
59 | */ | 51 | */ |
60 | public function processHtml($html, $url) | 52 | public function processHtml($entryId, $html, $url) |
61 | { | 53 | { |
62 | $crawler = new Crawler($html); | 54 | $crawler = new Crawler($html); |
63 | $result = $crawler | 55 | $result = $crawler |
64 | ->filterXpath('//img') | 56 | ->filterXpath('//img') |
65 | ->extract(array('src')); | 57 | ->extract(array('src')); |
66 | 58 | ||
67 | $relativePath = $this->getRelativePath($url); | 59 | $relativePath = $this->getRelativePath($entryId); |
68 | 60 | ||
69 | // download and save the image to the folder | 61 | // download and save the image to the folder |
70 | foreach ($result as $image) { | 62 | foreach ($result as $image) { |
71 | $imagePath = $this->processSingleImage($image, $url, $relativePath); | 63 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); |
72 | 64 | ||
73 | if (false === $imagePath) { | 65 | if (false === $imagePath) { |
74 | continue; | 66 | continue; |
@@ -86,24 +78,27 @@ class DownloadImages | |||
86 | * - re-saved it (for security reason) | 78 | * - re-saved it (for security reason) |
87 | * - return the new local path. | 79 | * - return the new local path. |
88 | * | 80 | * |
81 | * @param int $entryId ID of the entry | ||
89 | * @param string $imagePath Path to the image to retrieve | 82 | * @param string $imagePath Path to the image to retrieve |
90 | * @param string $url Url from where the image were found | 83 | * @param string $url Url from where the image were found |
91 | * @param string $relativePath Relative local path to saved the image | 84 | * @param string $relativePath Relative local path to saved the image |
92 | * | 85 | * |
93 | * @return string Relative url to access the image from the web | 86 | * @return string Relative url to access the image from the web |
94 | */ | 87 | */ |
95 | public function processSingleImage($imagePath, $url, $relativePath = null) | 88 | public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) |
96 | { | 89 | { |
97 | if (null == $relativePath) { | 90 | if (null === $relativePath) { |
98 | $relativePath = $this->getRelativePath($url); | 91 | $relativePath = $this->getRelativePath($entryId); |
99 | } | 92 | } |
100 | 93 | ||
94 | $this->logger->debug('DownloadImages: working on image: '.$imagePath); | ||
95 | |||
101 | $folderPath = $this->baseFolder.'/'.$relativePath; | 96 | $folderPath = $this->baseFolder.'/'.$relativePath; |
102 | 97 | ||
103 | // build image path | 98 | // build image path |
104 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); | 99 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); |
105 | if (false === $absolutePath) { | 100 | if (false === $absolutePath) { |
106 | $this->logger->log('error', 'Can not determine the absolute path for that image, skipping.'); | 101 | $this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.'); |
107 | 102 | ||
108 | return false; | 103 | return false; |
109 | } | 104 | } |
@@ -111,15 +106,15 @@ class DownloadImages | |||
111 | try { | 106 | try { |
112 | $res = $this->client->get($absolutePath); | 107 | $res = $this->client->get($absolutePath); |
113 | } catch (\Exception $e) { | 108 | } catch (\Exception $e) { |
114 | $this->logger->log('error', 'Can not retrieve image, skipping.', ['exception' => $e]); | 109 | $this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]); |
115 | 110 | ||
116 | return false; | 111 | return false; |
117 | } | 112 | } |
118 | 113 | ||
119 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 114 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); |
120 | $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 115 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); |
121 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | 116 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { |
122 | $this->logger->log('error', 'Processed image with not allowed extension. Skipping '.$imagePath); | 117 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); |
123 | 118 | ||
124 | return false; | 119 | return false; |
125 | } | 120 | } |
@@ -133,7 +128,7 @@ class DownloadImages | |||
133 | } | 128 | } |
134 | 129 | ||
135 | if (false === $im) { | 130 | if (false === $im) { |
136 | $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); | 131 | $this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]); |
137 | 132 | ||
138 | return false; | 133 | return false; |
139 | } | 134 | } |
@@ -141,16 +136,16 @@ class DownloadImages | |||
141 | switch ($ext) { | 136 | switch ($ext) { |
142 | case 'gif': | 137 | case 'gif': |
143 | $result = imagegif($im, $localPath); | 138 | $result = imagegif($im, $localPath); |
144 | $this->logger->log('debug', 'Re-creating gif'); | 139 | $this->logger->debug('DownloadImages: Re-creating gif'); |
145 | break; | 140 | break; |
146 | case 'jpeg': | 141 | case 'jpeg': |
147 | case 'jpg': | 142 | case 'jpg': |
148 | $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); | 143 | $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); |
149 | $this->logger->log('debug', 'Re-creating jpg'); | 144 | $this->logger->debug('DownloadImages: Re-creating jpg'); |
150 | break; | 145 | break; |
151 | case 'png': | 146 | case 'png': |
152 | $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); | 147 | $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); |
153 | $this->logger->log('debug', 'Re-creating png'); | 148 | $this->logger->debug('DownloadImages: Re-creating png'); |
154 | } | 149 | } |
155 | 150 | ||
156 | imagedestroy($im); | 151 | imagedestroy($im); |
@@ -159,23 +154,46 @@ class DownloadImages | |||
159 | } | 154 | } |
160 | 155 | ||
161 | /** | 156 | /** |
157 | * Remove all images for the given entry id. | ||
158 | * | ||
159 | * @param int $entryId ID of the entry | ||
160 | */ | ||
161 | public function removeImages($entryId) | ||
162 | { | ||
163 | $relativePath = $this->getRelativePath($entryId); | ||
164 | $folderPath = $this->baseFolder.'/'.$relativePath; | ||
165 | |||
166 | $finder = new Finder(); | ||
167 | $finder | ||
168 | ->files() | ||
169 | ->ignoreDotFiles(true) | ||
170 | ->in($folderPath); | ||
171 | |||
172 | foreach ($finder as $file) { | ||
173 | @unlink($file->getRealPath()); | ||
174 | } | ||
175 | |||
176 | @rmdir($folderPath); | ||
177 | } | ||
178 | |||
179 | /** | ||
162 | * Generate the folder where we are going to save images based on the entry url. | 180 | * Generate the folder where we are going to save images based on the entry url. |
163 | * | 181 | * |
164 | * @param string $url | 182 | * @param int $entryId ID of the entry |
165 | * | 183 | * |
166 | * @return string | 184 | * @return string |
167 | */ | 185 | */ |
168 | private function getRelativePath($url) | 186 | private function getRelativePath($entryId) |
169 | { | 187 | { |
170 | $hashUrl = hash('crc32', $url); | 188 | $hashId = hash('crc32', $entryId); |
171 | $relativePath = $hashUrl[0].'/'.$hashUrl[1].'/'.$hashUrl; | 189 | $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; |
172 | $folderPath = $this->baseFolder.'/'.$relativePath; | 190 | $folderPath = $this->baseFolder.'/'.$relativePath; |
173 | 191 | ||
174 | if (!file_exists($folderPath)) { | 192 | if (!file_exists($folderPath)) { |
175 | mkdir($folderPath, 0777, true); | 193 | mkdir($folderPath, 0777, true); |
176 | } | 194 | } |
177 | 195 | ||
178 | $this->logger->log('debug', 'Folder used for that url', ['folder' => $folderPath, 'url' => $url]); | 196 | $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); |
179 | 197 | ||
180 | return $relativePath; | 198 | return $relativePath; |
181 | } | 199 | } |
@@ -208,7 +226,7 @@ class DownloadImages | |||
208 | return $absolute->get_uri(); | 226 | return $absolute->get_uri(); |
209 | } | 227 | } |
210 | 228 | ||
211 | $this->logger->log('error', 'Can not make an absolute link', ['base' => $base, 'url' => $url]); | 229 | $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); |
212 | 230 | ||
213 | return false; | 231 | return false; |
214 | } | 232 | } |