aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/DownloadImages.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php93
1 files changed, 61 insertions, 32 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index cc3dcfce..1d98fd1a 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,8 +2,15 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Client; 5use GuzzleHttp\Psr7\Uri;
6use GuzzleHttp\Message\Response; 6use GuzzleHttp\Psr7\UriResolver;
7use Http\Client\Common\HttpMethodsClient;
8use Http\Client\Common\Plugin\ErrorPlugin;
9use Http\Client\Common\PluginClient;
10use Http\Client\HttpClient;
11use Http\Discovery\MessageFactoryDiscovery;
12use Http\Message\MessageFactory;
13use Psr\Http\Message\ResponseInterface;
7use Psr\Log\LoggerInterface; 14use Psr\Log\LoggerInterface;
8use Symfony\Component\DomCrawler\Crawler; 15use Symfony\Component\DomCrawler\Crawler;
9use Symfony\Component\Finder\Finder; 16use Symfony\Component\Finder\Finder;
@@ -19,9 +26,9 @@ class DownloadImages
19 private $mimeGuesser; 26 private $mimeGuesser;
20 private $wallabagUrl; 27 private $wallabagUrl;
21 28
22 public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) 29 public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null)
23 { 30 {
24 $this->client = $client; 31 $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find());
25 $this->baseFolder = $baseFolder; 32 $this->baseFolder = $baseFolder;
26 $this->wallabagUrl = rtrim($wallabagUrl, '/'); 33 $this->wallabagUrl = rtrim($wallabagUrl, '/');
27 $this->logger = $logger; 34 $this->logger = $logger;
@@ -31,6 +38,23 @@ class DownloadImages
31 } 38 }
32 39
33 /** 40 /**
41 * Process the html and extract images URLs from it.
42 *
43 * @param string $html
44 *
45 * @return string[]
46 */
47 public static function extractImagesUrlsFromHtml($html)
48 {
49 $crawler = new Crawler($html);
50 $imagesCrawler = $crawler->filterXpath('//img');
51 $imagesUrls = $imagesCrawler->extract(['src']);
52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
55 }
56
57 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 58 * Process the html and extract image from it, save them to local and return the updated html.
35 * 59 *
36 * @param int $entryId ID of the entry 60 * @param int $entryId ID of the entry
@@ -41,13 +65,7 @@ class DownloadImages
41 */ 65 */
42 public function processHtml($entryId, $html, $url) 66 public function processHtml($entryId, $html, $url)
43 { 67 {
44 $crawler = new Crawler($html); 68 $imagesUrls = self::extractImagesUrlsFromHtml($html);
45 $imagesCrawler = $crawler
46 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler
48 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
51 69
52 $relativePath = $this->getRelativePath($entryId); 70 $relativePath = $this->getRelativePath($entryId);
53 71
@@ -122,7 +140,7 @@ class DownloadImages
122 $localPath = $folderPath . '/' . $hashImage . '.' . $ext; 140 $localPath = $folderPath . '/' . $hashImage . '.' . $ext;
123 141
124 try { 142 try {
125 $im = imagecreatefromstring($res->getBody()); 143 $im = imagecreatefromstring((string) $res->getBody());
126 } catch (\Exception $e) { 144 } catch (\Exception $e) {
127 $im = false; 145 $im = false;
128 } 146 }
@@ -135,7 +153,21 @@ class DownloadImages
135 153
136 switch ($ext) { 154 switch ($ext) {
137 case 'gif': 155 case 'gif':
138 imagegif($im, $localPath); 156 // use Imagick if available to keep GIF animation
157 if (class_exists('\\Imagick')) {
158 try {
159 $imagick = new \Imagick();
160 $imagick->readImageBlob($res->getBody());
161 $imagick->setImageFormat('gif');
162 $imagick->writeImages($localPath, true);
163 } catch (\Exception $e) {
164 // if Imagick fail, fallback to the default solution
165 imagegif($im, $localPath);
166 }
167 } else {
168 imagegif($im, $localPath);
169 }
170
139 $this->logger->debug('DownloadImages: Re-creating gif'); 171 $this->logger->debug('DownloadImages: Re-creating gif');
140 break; 172 break;
141 case 'jpeg': 173 case 'jpeg':
@@ -181,29 +213,30 @@ class DownloadImages
181 /** 213 /**
182 * Get images urls from the srcset image attribute. 214 * Get images urls from the srcset image attribute.
183 * 215 *
184 * @param Crawler $imagesCrawler
185 *
186 * @return array An array of urls 216 * @return array An array of urls
187 */ 217 */
188 private function getSrcsetUrls(Crawler $imagesCrawler) 218 private static function getSrcsetUrls(Crawler $imagesCrawler)
189 { 219 {
190 $urls = []; 220 $urls = [];
191 $iterator = $imagesCrawler 221 $iterator = $imagesCrawler->getIterator();
192 ->getIterator(); 222
193 while ($iterator->valid()) { 223 while ($iterator->valid()) {
194 $srcsetAttribute = $iterator->current()->getAttribute('srcset'); 224 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
225
195 if ('' !== $srcsetAttribute) { 226 if ('' !== $srcsetAttribute) {
196 // Couldn't start with " OR ' OR a white space 227 // Couldn't start with " OR ' OR a white space
197 // Could be one or more white space 228 // Could be one or more white space
198 // Must be one or more digits followed by w OR x 229 // Must be one or more digits followed by w OR x
199 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/"; 230 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
200 preg_match_all($pattern, $srcsetAttribute, $matches); 231 preg_match_all($pattern, $srcsetAttribute, $matches);
232
201 $srcset = \call_user_func_array('array_merge', $matches); 233 $srcset = \call_user_func_array('array_merge', $matches);
202 $srcsetUrls = array_map(function ($src) { 234 $srcsetUrls = array_map(function ($src) {
203 return trim(explode(' ', $src, 2)[0]); 235 return trim(explode(' ', $src, 2)[0]);
204 }, $srcset); 236 }, $srcset);
205 $urls = array_merge($srcsetUrls, $urls); 237 $urls = array_merge($srcsetUrls, $urls);
206 } 238 }
239
207 $iterator->next(); 240 $iterator->next();
208 } 241 }
209 242
@@ -260,33 +293,29 @@ class DownloadImages
260 return $url; 293 return $url;
261 } 294 }
262 295
263 $base = new \SimplePie_IRI($base); 296 $base = new Uri($base);
264 297
265 // remove '//' in URL path (causes URLs not to resolve properly) 298 // in case the url has no scheme & host
266 if (isset($base->ipath)) { 299 if ('' === $base->getAuthority() || '' === $base->getScheme()) {
267 $base->ipath = preg_replace('!//+!', '/', $base->ipath); 300 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]);
268 }
269 301
270 if ($absolute = \SimplePie_IRI::absolutize($base, $url)) { 302 return false;
271 return $absolute->get_uri();
272 } 303 }
273 304
274 $this->logger->error('DownloadImages: Can not make an absolute link', ['base' => $base, 'url' => $url]); 305 return (string) UriResolver::resolve($base, new Uri($url));
275
276 return false;
277 } 306 }
278 307
279 /** 308 /**
280 * Retrieve and validate the extension from the response of the url of the image. 309 * Retrieve and validate the extension from the response of the url of the image.
281 * 310 *
282 * @param Response $res Guzzle Response 311 * @param ResponseInterface $res Http Response
283 * @param string $imagePath Path from the src image from the content (used for log only) 312 * @param string $imagePath Path from the src image from the content (used for log only)
284 * 313 *
285 * @return string|false Extension name or false if validation failed 314 * @return string|false Extension name or false if validation failed
286 */ 315 */
287 private function getExtensionFromResponse(Response $res, $imagePath) 316 private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
288 { 317 {
289 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 318 $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type')));
290 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 319 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
291 320
292 // ok header doesn't have the extension, try a different way 321 // ok header doesn't have the extension, try a different way