]>
Commit | Line | Data |
---|---|---|
419214d7 TC |
1 | <?php |
2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | |
4 | ||
7f559418 | 5 | use Psr\Log\LoggerInterface; |
419214d7 | 6 | use Symfony\Component\DomCrawler\Crawler; |
7f559418 JB |
7 | use GuzzleHttp\Client; |
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | |
419214d7 | 9 | |
156bf627 JB |
10 | class DownloadImages |
11 | { | |
7f559418 JB |
12 | const REGENERATE_PICTURES_QUALITY = 80; |
13 | ||
14 | private $client; | |
15 | private $baseFolder; | |
419214d7 | 16 | private $logger; |
7f559418 | 17 | private $mimeGuesser; |
419214d7 | 18 | |
7f559418 | 19 | public function __construct(Client $client, $baseFolder, LoggerInterface $logger) |
156bf627 | 20 | { |
7f559418 JB |
21 | $this->client = $client; |
22 | $this->baseFolder = $baseFolder; | |
419214d7 | 23 | $this->logger = $logger; |
7f559418 JB |
24 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
25 | ||
26 | $this->setFolder(); | |
419214d7 TC |
27 | } |
28 | ||
7f559418 JB |
29 | /** |
30 | * Setup base folder where all images are going to be saved. | |
31 | */ | |
32 | private function setFolder() | |
156bf627 | 33 | { |
419214d7 | 34 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder |
7f559418 JB |
35 | if (!file_exists($this->baseFolder)) { |
36 | mkdir($this->baseFolder, 0777, true); | |
419214d7 | 37 | } |
419214d7 TC |
38 | } |
39 | ||
7f559418 JB |
40 | /** |
41 | * Process the html and extract image from it, save them to local and return the updated html. | |
42 | * | |
43 | * @param string $html | |
44 | * @param string $url Used as a base path for relative image and folder | |
45 | * | |
46 | * @return string | |
47 | */ | |
48 | public function processHtml($html, $url) | |
156bf627 | 49 | { |
7f559418 | 50 | $crawler = new Crawler($html); |
419214d7 TC |
51 | $result = $crawler |
52 | ->filterXpath('//img') | |
53 | ->extract(array('src')); | |
54 | ||
7f559418 JB |
55 | $relativePath = $this->getRelativePath($url); |
56 | ||
419214d7 TC |
57 | // download and save the image to the folder |
58 | foreach ($result as $image) { | |
7f559418 JB |
59 | $imagePath = $this->processSingleImage($image, $url, $relativePath); |
60 | ||
61 | if (false === $imagePath) { | |
62 | continue; | |
63 | } | |
64 | ||
65 | $html = str_replace($image, $imagePath, $html); | |
419214d7 TC |
66 | } |
67 | ||
7f559418 | 68 | return $html; |
419214d7 TC |
69 | } |
70 | ||
7f559418 JB |
71 | /** |
72 | * Process a single image: | |
73 | * - retrieve it | |
74 | * - re-saved it (for security reason) | |
75 | * - return the new local path. | |
76 | * | |
77 | * @param string $imagePath Path to the image to retrieve | |
78 | * @param string $url Url from where the image were found | |
79 | * @param string $relativePath Relative local path to saved the image | |
80 | * | |
81 | * @return string Relative url to access the image from the web | |
82 | */ | |
83 | public function processSingleImage($imagePath, $url, $relativePath = null) | |
156bf627 | 84 | { |
7f559418 JB |
85 | if (null == $relativePath) { |
86 | $relativePath = $this->getRelativePath($url); | |
419214d7 TC |
87 | } |
88 | ||
7f559418 | 89 | $folderPath = $this->baseFolder.'/'.$relativePath; |
419214d7 | 90 | |
7f559418 JB |
91 | // build image path |
92 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); | |
93 | if (false === $absolutePath) { | |
48656e0e | 94 | $this->logger->log('error', 'Can not determine the absolute path for that image, skipping.'); |
419214d7 TC |
95 | |
96 | return false; | |
97 | } | |
98 | ||
48656e0e JB |
99 | try { |
100 | $res = $this->client->get($absolutePath); | |
101 | } catch (\Exception $e) { | |
102 | $this->logger->log('error', 'Can not retrieve image, skipping.', ['exception' => $e]); | |
103 | ||
104 | return false; | |
105 | } | |
7f559418 JB |
106 | |
107 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | |
108 | $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | |
48656e0e JB |
109 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { |
110 | $this->logger->log('error', 'Processed image with not allowed extension. Skipping '.$imagePath); | |
419214d7 TC |
111 | |
112 | return false; | |
113 | } | |
7f559418 JB |
114 | $hashImage = hash('crc32', $absolutePath); |
115 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | |
116 | ||
117 | try { | |
118 | $im = imagecreatefromstring($res->getBody()); | |
119 | } catch (\Exception $e) { | |
120 | $im = false; | |
121 | } | |
419214d7 | 122 | |
48656e0e | 123 | if (false === $im) { |
7f559418 | 124 | $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); |
419214d7 TC |
125 | |
126 | return false; | |
127 | } | |
128 | ||
7f559418 JB |
129 | switch ($ext) { |
130 | case 'gif': | |
131 | $result = imagegif($im, $localPath); | |
156bf627 | 132 | $this->logger->log('debug', 'Re-creating gif'); |
419214d7 | 133 | break; |
7f559418 JB |
134 | case 'jpeg': |
135 | case 'jpg': | |
136 | $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); | |
156bf627 | 137 | $this->logger->log('debug', 'Re-creating jpg'); |
419214d7 | 138 | break; |
7f559418 JB |
139 | case 'png': |
140 | $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); | |
156bf627 | 141 | $this->logger->log('debug', 'Re-creating png'); |
419214d7 | 142 | } |
7f559418 | 143 | |
419214d7 TC |
144 | imagedestroy($im); |
145 | ||
7f559418 | 146 | return '/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; |
419214d7 TC |
147 | } |
148 | ||
7f559418 JB |
149 | /** |
150 | * Generate the folder where we are going to save images based on the entry url. | |
151 | * | |
152 | * @param string $url | |
153 | * | |
154 | * @return string | |
155 | */ | |
156 | private function getRelativePath($url) | |
419214d7 | 157 | { |
7f559418 JB |
158 | $hashUrl = hash('crc32', $url); |
159 | $relativePath = $hashUrl[0].'/'.$hashUrl[1].'/'.$hashUrl; | |
160 | $folderPath = $this->baseFolder.'/'.$relativePath; | |
419214d7 | 161 | |
7f559418 JB |
162 | if (!file_exists($folderPath)) { |
163 | mkdir($folderPath, 0777, true); | |
419214d7 TC |
164 | } |
165 | ||
7f559418 | 166 | $this->logger->log('debug', 'Folder used for that url', ['folder' => $folderPath, 'url' => $url]); |
419214d7 | 167 | |
7f559418 JB |
168 | return $relativePath; |
169 | } | |
419214d7 | 170 | |
7f559418 JB |
171 | /** |
172 | * Make an $url absolute based on the $base. | |
173 | * | |
174 | * @see Graby->makeAbsoluteStr | |
175 | * | |
176 | * @param string $base Base url | |
177 | * @param string $url Url to make it absolute | |
178 | * | |
179 | * @return false|string | |
180 | */ | |
181 | private function getAbsoluteLink($base, $url) | |
182 | { | |
183 | if (preg_match('!^https?://!i', $url)) { | |
184 | // already absolute | |
185 | return $url; | |
419214d7 TC |
186 | } |
187 | ||
7f559418 | 188 | $base = new \SimplePie_IRI($base); |
419214d7 | 189 | |
7f559418 JB |
190 | // remove '//' in URL path (causes URLs not to resolve properly) |
191 | if (isset($base->ipath)) { | |
192 | $base->ipath = preg_replace('!//+!', '/', $base->ipath); | |
419214d7 TC |
193 | } |
194 | ||
7f559418 JB |
195 | if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { |
196 | return $absolute->get_uri(); | |
94654765 | 197 | } |
156bf627 | 198 | |
48656e0e JB |
199 | $this->logger->log('error', 'Can not make an absolute link', ['base' => $base, 'url' => $url]); |
200 | ||
7f559418 | 201 | return false; |
94654765 | 202 | } |
419214d7 | 203 | } |