diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 5 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 233 |
2 files changed, 236 insertions, 2 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 8019df42..1986ab33 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -3,7 +3,7 @@ | |||
3 | namespace Wallabag\CoreBundle\Helper; | 3 | namespace Wallabag\CoreBundle\Helper; |
4 | 4 | ||
5 | use Graby\Graby; | 5 | use Graby\Graby; |
6 | use Psr\Log\LoggerInterface as Logger; | 6 | use Psr\Log\LoggerInterface; |
7 | use Wallabag\CoreBundle\Entity\Entry; | 7 | use Wallabag\CoreBundle\Entity\Entry; |
8 | use Wallabag\CoreBundle\Entity\Tag; | 8 | use Wallabag\CoreBundle\Entity\Tag; |
9 | use Wallabag\CoreBundle\Tools\Utils; | 9 | use Wallabag\CoreBundle\Tools\Utils; |
@@ -20,7 +20,7 @@ class ContentProxy | |||
20 | protected $logger; | 20 | protected $logger; |
21 | protected $tagRepository; | 21 | protected $tagRepository; |
22 | 22 | ||
23 | public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, Logger $logger) | 23 | public function __construct(Graby $graby, RuleBasedTagger $tagger, TagRepository $tagRepository, LoggerInterface $logger) |
24 | { | 24 | { |
25 | $this->graby = $graby; | 25 | $this->graby = $graby; |
26 | $this->tagger = $tagger; | 26 | $this->tagger = $tagger; |
@@ -66,6 +66,7 @@ class ContentProxy | |||
66 | $entry->setUrl($content['url'] ?: $url); | 66 | $entry->setUrl($content['url'] ?: $url); |
67 | $entry->setTitle($title); | 67 | $entry->setTitle($title); |
68 | $entry->setContent($html); | 68 | $entry->setContent($html); |
69 | |||
69 | $entry->setLanguage($content['language']); | 70 | $entry->setLanguage($content['language']); |
70 | $entry->setMimetype($content['content_type']); | 71 | $entry->setMimetype($content['content_type']); |
71 | $entry->setReadingTime(Utils::getReadingTime($html)); | 72 | $entry->setReadingTime(Utils::getReadingTime($html)); |
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php new file mode 100644 index 00000000..c5298236 --- /dev/null +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -0,0 +1,233 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Wallabag\CoreBundle\Helper; | ||
4 | |||
5 | use Psr\Log\LoggerInterface; | ||
6 | use Symfony\Component\DomCrawler\Crawler; | ||
7 | use GuzzleHttp\Client; | ||
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | ||
9 | use Symfony\Component\Finder\Finder; | ||
10 | |||
11 | class DownloadImages | ||
12 | { | ||
13 | const REGENERATE_PICTURES_QUALITY = 80; | ||
14 | |||
15 | private $client; | ||
16 | private $baseFolder; | ||
17 | private $logger; | ||
18 | private $mimeGuesser; | ||
19 | private $wallabagUrl; | ||
20 | |||
21 | public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) | ||
22 | { | ||
23 | $this->client = $client; | ||
24 | $this->baseFolder = $baseFolder; | ||
25 | $this->wallabagUrl = rtrim($wallabagUrl, '/'); | ||
26 | $this->logger = $logger; | ||
27 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); | ||
28 | |||
29 | $this->setFolder(); | ||
30 | } | ||
31 | |||
32 | /** | ||
33 | * Setup base folder where all images are going to be saved. | ||
34 | */ | ||
35 | private function setFolder() | ||
36 | { | ||
37 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder | ||
38 | if (!file_exists($this->baseFolder)) { | ||
39 | mkdir($this->baseFolder, 0777, true); | ||
40 | } | ||
41 | } | ||
42 | |||
43 | /** | ||
44 | * Process the html and extract image from it, save them to local and return the updated html. | ||
45 | * | ||
46 | * @param int $entryId ID of the entry | ||
47 | * @param string $html | ||
48 | * @param string $url Used as a base path for relative image and folder | ||
49 | * | ||
50 | * @return string | ||
51 | */ | ||
52 | public function processHtml($entryId, $html, $url) | ||
53 | { | ||
54 | $crawler = new Crawler($html); | ||
55 | $result = $crawler | ||
56 | ->filterXpath('//img') | ||
57 | ->extract(array('src')); | ||
58 | |||
59 | $relativePath = $this->getRelativePath($entryId); | ||
60 | |||
61 | // download and save the image to the folder | ||
62 | foreach ($result as $image) { | ||
63 | $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); | ||
64 | |||
65 | if (false === $imagePath) { | ||
66 | continue; | ||
67 | } | ||
68 | |||
69 | $html = str_replace($image, $imagePath, $html); | ||
70 | } | ||
71 | |||
72 | return $html; | ||
73 | } | ||
74 | |||
75 | /** | ||
76 | * Process a single image: | ||
77 | * - retrieve it | ||
78 | * - re-saved it (for security reason) | ||
79 | * - return the new local path. | ||
80 | * | ||
81 | * @param int $entryId ID of the entry | ||
82 | * @param string $imagePath Path to the image to retrieve | ||
83 | * @param string $url Url from where the image were found | ||
84 | * @param string $relativePath Relative local path to saved the image | ||
85 | * | ||
86 | * @return string Relative url to access the image from the web | ||
87 | */ | ||
88 | public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) | ||
89 | { | ||
90 | if (null === $relativePath) { | ||
91 | $relativePath = $this->getRelativePath($entryId); | ||
92 | } | ||
93 | |||
94 | $this->logger->debug('DownloadImages: working on image: '.$imagePath); | ||
95 | |||
96 | $folderPath = $this->baseFolder.'/'.$relativePath; | ||
97 | |||
98 | // build image path | ||
99 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); | ||
100 | if (false === $absolutePath) { | ||
101 | $this->logger->error('DownloadImages: Can not determine the absolute path for that image, skipping.'); | ||
102 | |||
103 | return false; | ||
104 | } | ||
105 | |||
106 | try { | ||
107 | $res = $this->client->get($absolutePath); | ||
108 | } catch (\Exception $e) { | ||
109 | $this->logger->error('DownloadImages: Can not retrieve image, skipping.', ['exception' => $e]); | ||
110 | |||
111 | return false; | ||
112 | } | ||
113 | |||
114 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | ||
115 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | ||
116 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
117 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping '.$imagePath); | ||
118 | |||
119 | return false; | ||
120 | } | ||
121 | $hashImage = hash('crc32', $absolutePath); | ||
122 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | ||
123 | |||
124 | try { | ||
125 | $im = imagecreatefromstring($res->getBody()); | ||
126 | } catch (\Exception $e) { | ||
127 | $im = false; | ||
128 | } | ||
129 | |||
130 | if (false === $im) { | ||
131 | $this->logger->error('DownloadImages: Error while regenerating image', ['path' => $localPath]); | ||
132 | |||
133 | return false; | ||
134 | } | ||
135 | |||
136 | switch ($ext) { | ||
137 | case 'gif': | ||
138 | $result = imagegif($im, $localPath); | ||
139 | $this->logger->debug('DownloadImages: Re-creating gif'); | ||
140 | break; | ||
141 | case 'jpeg': | ||
142 | case 'jpg': | ||
143 | $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); | ||
144 | $this->logger->debug('DownloadImages: Re-creating jpg'); | ||
145 | break; | ||
146 | case 'png': | ||
147 | $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); | ||
148 | $this->logger->debug('DownloadImages: Re-creating png'); | ||
149 | } | ||
150 | |||
151 | imagedestroy($im); | ||
152 | |||
153 | return $this->wallabagUrl.'/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; | ||
154 | } | ||
155 | |||
156 | /** | ||
157 | * Remove all images for the given entry id. | ||
158 | * | ||
159 | * @param int $entryId ID of the entry | ||
160 | */ | ||
161 | public function removeImages($entryId) | ||
162 | { | ||
163 | $relativePath = $this->getRelativePath($entryId); | ||
164 | $folderPath = $this->baseFolder.'/'.$relativePath; | ||
165 | |||
166 | $finder = new Finder(); | ||
167 | $finder | ||
168 | ->files() | ||
169 | ->ignoreDotFiles(true) | ||
170 | ->in($folderPath); | ||
171 | |||
172 | foreach ($finder as $file) { | ||
173 | @unlink($file->getRealPath()); | ||
174 | } | ||
175 | |||
176 | @rmdir($folderPath); | ||
177 | } | ||
178 | |||
179 | /** | ||
180 | * Generate the folder where we are going to save images based on the entry url. | ||
181 | * | ||
182 | * @param int $entryId ID of the entry | ||
183 | * | ||
184 | * @return string | ||
185 | */ | ||
186 | private function getRelativePath($entryId) | ||
187 | { | ||
188 | $hashId = hash('crc32', $entryId); | ||
189 | $relativePath = $hashId[0].'/'.$hashId[1].'/'.$hashId; | ||
190 | $folderPath = $this->baseFolder.'/'.$relativePath; | ||
191 | |||
192 | if (!file_exists($folderPath)) { | ||
193 | mkdir($folderPath, 0777, true); | ||
194 | } | ||
195 | |||
196 | $this->logger->debug('DownloadImages: Folder used for that Entry id', ['folder' => $folderPath, 'entryId' => $entryId]); | ||
197 | |||
198 | return $relativePath; | ||
199 | } | ||
200 | |||
201 | /** | ||
202 | * Make an $url absolute based on the $base. | ||
203 | * | ||
204 | * @see Graby->makeAbsoluteStr | ||
205 | * | ||
206 | * @param string $base Base url | ||
207 | * @param string $url Url to make it absolute | ||
208 | * | ||
209 | * @return false|string | ||
210 | */ | ||
211 | private function getAbsoluteLink($base, $url) | ||
212 | { | ||
213 | if (preg_match('!^https?://!i', $url)) { | ||
214 | // already absolute | ||
215 | return $url; | ||
216 | } | ||
217 | |||
218 | $base = new \SimplePie_IRI($base); | ||
219 | |||
220 | // remove '//' in URL path (causes URLs not to resolve properly) | ||
221 | if (isset($base->ipath)) { | ||
222 | $base->ipath = preg_replace('!//+!', '/', $base->ipath); | ||
223 | } | ||
224 | |||
225 | if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { | ||
226 | return $absolute->get_uri(); | ||
227 | } | ||
228 | |||
229 | $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); | ||
230 | |||
231 | return false; | ||
232 | } | ||
233 | } | ||