]>
Commit | Line | Data |
---|---|---|
419214d7 TC |
1 | <?php |
2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | |
4 | ||
7f559418 | 5 | use Psr\Log\LoggerInterface; |
419214d7 | 6 | use Symfony\Component\DomCrawler\Crawler; |
7f559418 JB |
7 | use GuzzleHttp\Client; |
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | |
419214d7 | 9 | |
156bf627 JB |
10 | class DownloadImages |
11 | { | |
7f559418 JB |
12 | const REGENERATE_PICTURES_QUALITY = 80; |
13 | ||
14 | private $client; | |
15 | private $baseFolder; | |
419214d7 | 16 | private $logger; |
7f559418 | 17 | private $mimeGuesser; |
419214d7 | 18 | |
7f559418 | 19 | public function __construct(Client $client, $baseFolder, LoggerInterface $logger) |
156bf627 | 20 | { |
7f559418 JB |
21 | $this->client = $client; |
22 | $this->baseFolder = $baseFolder; | |
419214d7 | 23 | $this->logger = $logger; |
7f559418 JB |
24 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
25 | ||
26 | $this->setFolder(); | |
419214d7 TC |
27 | } |
28 | ||
7f559418 JB |
29 | /** |
30 | * Setup base folder where all images are going to be saved. | |
31 | */ | |
32 | private function setFolder() | |
156bf627 | 33 | { |
419214d7 | 34 | // if folder doesn't exist, attempt to create one and store the folder name in property $folder |
7f559418 JB |
35 | if (!file_exists($this->baseFolder)) { |
36 | mkdir($this->baseFolder, 0777, true); | |
419214d7 | 37 | } |
419214d7 TC |
38 | } |
39 | ||
7f559418 JB |
40 | /** |
41 | * Process the html and extract image from it, save them to local and return the updated html. | |
42 | * | |
43 | * @param string $html | |
44 | * @param string $url Used as a base path for relative image and folder | |
45 | * | |
46 | * @return string | |
47 | */ | |
48 | public function processHtml($html, $url) | |
156bf627 | 49 | { |
7f559418 | 50 | $crawler = new Crawler($html); |
419214d7 TC |
51 | $result = $crawler |
52 | ->filterXpath('//img') | |
53 | ->extract(array('src')); | |
54 | ||
7f559418 JB |
55 | $relativePath = $this->getRelativePath($url); |
56 | ||
419214d7 TC |
57 | // download and save the image to the folder |
58 | foreach ($result as $image) { | |
7f559418 JB |
59 | $imagePath = $this->processSingleImage($image, $url, $relativePath); |
60 | ||
61 | if (false === $imagePath) { | |
62 | continue; | |
63 | } | |
64 | ||
65 | $html = str_replace($image, $imagePath, $html); | |
419214d7 TC |
66 | } |
67 | ||
7f559418 | 68 | return $html; |
419214d7 TC |
69 | } |
70 | ||
7f559418 JB |
71 | /** |
72 | * Process a single image: | |
73 | * - retrieve it | |
74 | * - re-saved it (for security reason) | |
75 | * - return the new local path. | |
76 | * | |
77 | * @param string $imagePath Path to the image to retrieve | |
78 | * @param string $url Url from where the image were found | |
79 | * @param string $relativePath Relative local path to saved the image | |
80 | * | |
81 | * @return string Relative url to access the image from the web | |
82 | */ | |
83 | public function processSingleImage($imagePath, $url, $relativePath = null) | |
156bf627 | 84 | { |
7f559418 JB |
85 | if (null == $relativePath) { |
86 | $relativePath = $this->getRelativePath($url); | |
419214d7 TC |
87 | } |
88 | ||
7f559418 | 89 | $folderPath = $this->baseFolder.'/'.$relativePath; |
419214d7 | 90 | |
7f559418 JB |
91 | // build image path |
92 | $absolutePath = $this->getAbsoluteLink($url, $imagePath); | |
93 | if (false === $absolutePath) { | |
94 | $this->logger->log('debug', 'Can not determine the absolute path for that image, skipping.'); | |
419214d7 TC |
95 | |
96 | return false; | |
97 | } | |
98 | ||
7f559418 JB |
99 | $res = $this->client->get( |
100 | $absolutePath, | |
101 | ['exceptions' => false] | |
102 | ); | |
103 | ||
104 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | |
105 | $this->logger->log('debug', 'Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | |
106 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'])) { | |
107 | $this->logger->log('debug', 'Processed image with not allowed extension. Skipping '.$imagePath); | |
419214d7 TC |
108 | |
109 | return false; | |
110 | } | |
7f559418 JB |
111 | $hashImage = hash('crc32', $absolutePath); |
112 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | |
113 | ||
114 | try { | |
115 | $im = imagecreatefromstring($res->getBody()); | |
116 | } catch (\Exception $e) { | |
117 | $im = false; | |
118 | } | |
419214d7 | 119 | |
419214d7 | 120 | if ($im === false) { |
7f559418 | 121 | $this->logger->log('error', 'Error while regenerating image', ['path' => $localPath]); |
419214d7 TC |
122 | |
123 | return false; | |
124 | } | |
125 | ||
7f559418 JB |
126 | switch ($ext) { |
127 | case 'gif': | |
128 | $result = imagegif($im, $localPath); | |
156bf627 | 129 | $this->logger->log('debug', 'Re-creating gif'); |
419214d7 | 130 | break; |
7f559418 JB |
131 | case 'jpeg': |
132 | case 'jpg': | |
133 | $result = imagejpeg($im, $localPath, self::REGENERATE_PICTURES_QUALITY); | |
156bf627 | 134 | $this->logger->log('debug', 'Re-creating jpg'); |
419214d7 | 135 | break; |
7f559418 JB |
136 | case 'png': |
137 | $result = imagepng($im, $localPath, ceil(self::REGENERATE_PICTURES_QUALITY / 100 * 9)); | |
156bf627 | 138 | $this->logger->log('debug', 'Re-creating png'); |
419214d7 | 139 | } |
7f559418 | 140 | |
419214d7 TC |
141 | imagedestroy($im); |
142 | ||
7f559418 | 143 | return '/assets/images/'.$relativePath.'/'.$hashImage.'.'.$ext; |
419214d7 TC |
144 | } |
145 | ||
7f559418 JB |
146 | /** |
147 | * Generate the folder where we are going to save images based on the entry url. | |
148 | * | |
149 | * @param string $url | |
150 | * | |
151 | * @return string | |
152 | */ | |
153 | private function getRelativePath($url) | |
419214d7 | 154 | { |
7f559418 JB |
155 | $hashUrl = hash('crc32', $url); |
156 | $relativePath = $hashUrl[0].'/'.$hashUrl[1].'/'.$hashUrl; | |
157 | $folderPath = $this->baseFolder.'/'.$relativePath; | |
419214d7 | 158 | |
7f559418 JB |
159 | if (!file_exists($folderPath)) { |
160 | mkdir($folderPath, 0777, true); | |
419214d7 TC |
161 | } |
162 | ||
7f559418 | 163 | $this->logger->log('debug', 'Folder used for that url', ['folder' => $folderPath, 'url' => $url]); |
419214d7 | 164 | |
7f559418 JB |
165 | return $relativePath; |
166 | } | |
419214d7 | 167 | |
7f559418 JB |
168 | /** |
169 | * Make an $url absolute based on the $base. | |
170 | * | |
171 | * @see Graby->makeAbsoluteStr | |
172 | * | |
173 | * @param string $base Base url | |
174 | * @param string $url Url to make it absolute | |
175 | * | |
176 | * @return false|string | |
177 | */ | |
178 | private function getAbsoluteLink($base, $url) | |
179 | { | |
180 | if (preg_match('!^https?://!i', $url)) { | |
181 | // already absolute | |
182 | return $url; | |
419214d7 TC |
183 | } |
184 | ||
7f559418 | 185 | $base = new \SimplePie_IRI($base); |
419214d7 | 186 | |
7f559418 JB |
187 | // remove '//' in URL path (causes URLs not to resolve properly) |
188 | if (isset($base->ipath)) { | |
189 | $base->ipath = preg_replace('!//+!', '/', $base->ipath); | |
419214d7 TC |
190 | } |
191 | ||
7f559418 JB |
192 | if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { |
193 | return $absolute->get_uri(); | |
94654765 | 194 | } |
156bf627 | 195 | |
7f559418 | 196 | return false; |
94654765 | 197 | } |
419214d7 | 198 | } |