diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 68 |
1 files changed, 50 insertions, 18 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index cc3dcfce..7a39a2e4 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -2,8 +2,13 @@ | |||
2 | 2 | ||
3 | namespace Wallabag\CoreBundle\Helper; | 3 | namespace Wallabag\CoreBundle\Helper; |
4 | 4 | ||
5 | use GuzzleHttp\Client; | 5 | use Http\Client\Common\HttpMethodsClient; |
6 | use GuzzleHttp\Message\Response; | 6 | use Http\Client\Common\Plugin\ErrorPlugin; |
7 | use Http\Client\Common\PluginClient; | ||
8 | use Http\Client\HttpClient; | ||
9 | use Http\Discovery\MessageFactoryDiscovery; | ||
10 | use Http\Message\MessageFactory; | ||
11 | use Psr\Http\Message\ResponseInterface; | ||
7 | use Psr\Log\LoggerInterface; | 12 | use Psr\Log\LoggerInterface; |
8 | use Symfony\Component\DomCrawler\Crawler; | 13 | use Symfony\Component\DomCrawler\Crawler; |
9 | use Symfony\Component\Finder\Finder; | 14 | use Symfony\Component\Finder\Finder; |
@@ -19,9 +24,9 @@ class DownloadImages | |||
19 | private $mimeGuesser; | 24 | private $mimeGuesser; |
20 | private $wallabagUrl; | 25 | private $wallabagUrl; |
21 | 26 | ||
22 | public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) | 27 | public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null) |
23 | { | 28 | { |
24 | $this->client = $client; | 29 | $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find()); |
25 | $this->baseFolder = $baseFolder; | 30 | $this->baseFolder = $baseFolder; |
26 | $this->wallabagUrl = rtrim($wallabagUrl, '/'); | 31 | $this->wallabagUrl = rtrim($wallabagUrl, '/'); |
27 | $this->logger = $logger; | 32 | $this->logger = $logger; |
@@ -31,23 +36,36 @@ class DownloadImages | |||
31 | } | 36 | } |
32 | 37 | ||
33 | /** | 38 | /** |
34 | * Process the html and extract image from it, save them to local and return the updated html. | 39 | * Process the html and extract images URLs from it. |
35 | * | 40 | * |
36 | * @param int $entryId ID of the entry | ||
37 | * @param string $html | 41 | * @param string $html |
38 | * @param string $url Used as a base path for relative image and folder | ||
39 | * | 42 | * |
40 | * @return string | 43 | * @return string[] |
41 | */ | 44 | */ |
42 | public function processHtml($entryId, $html, $url) | 45 | public static function extractImagesUrlsFromHtml($html) |
43 | { | 46 | { |
44 | $crawler = new Crawler($html); | 47 | $crawler = new Crawler($html); |
45 | $imagesCrawler = $crawler | 48 | $imagesCrawler = $crawler |
46 | ->filterXpath('//img'); | 49 | ->filterXpath('//img'); |
47 | $imagesUrls = $imagesCrawler | 50 | $imagesUrls = $imagesCrawler |
48 | ->extract(['src']); | 51 | ->extract(['src']); |
49 | $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); | 52 | $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler); |
50 | $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | 53 | |
54 | return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); | ||
55 | } | ||
56 | |||
57 | /** | ||
58 | * Process the html and extract image from it, save them to local and return the updated html. | ||
59 | * | ||
60 | * @param int $entryId ID of the entry | ||
61 | * @param string $html | ||
62 | * @param string $url Used as a base path for relative image and folder | ||
63 | * | ||
64 | * @return string | ||
65 | */ | ||
66 | public function processHtml($entryId, $html, $url) | ||
67 | { | ||
68 | $imagesUrls = self::extractImagesUrlsFromHtml($html); | ||
51 | 69 | ||
52 | $relativePath = $this->getRelativePath($entryId); | 70 | $relativePath = $this->getRelativePath($entryId); |
53 | 71 | ||
@@ -122,7 +140,7 @@ class DownloadImages | |||
122 | $localPath = $folderPath . '/' . $hashImage . '.' . $ext; | 140 | $localPath = $folderPath . '/' . $hashImage . '.' . $ext; |
123 | 141 | ||
124 | try { | 142 | try { |
125 | $im = imagecreatefromstring($res->getBody()); | 143 | $im = imagecreatefromstring((string) $res->getBody()); |
126 | } catch (\Exception $e) { | 144 | } catch (\Exception $e) { |
127 | $im = false; | 145 | $im = false; |
128 | } | 146 | } |
@@ -135,7 +153,21 @@ class DownloadImages | |||
135 | 153 | ||
136 | switch ($ext) { | 154 | switch ($ext) { |
137 | case 'gif': | 155 | case 'gif': |
138 | imagegif($im, $localPath); | 156 | // use Imagick if available to keep GIF animation |
157 | if (class_exists('\\Imagick')) { | ||
158 | try { | ||
159 | $imagick = new \Imagick(); | ||
160 | $imagick->readImageBlob($res->getBody()); | ||
161 | $imagick->setImageFormat('gif'); | ||
162 | $imagick->writeImages($localPath, true); | ||
163 | } catch (\Exception $e) { | ||
164 | // if Imagick fail, fallback to the default solution | ||
165 | imagegif($im, $localPath); | ||
166 | } | ||
167 | } else { | ||
168 | imagegif($im, $localPath); | ||
169 | } | ||
170 | |||
139 | $this->logger->debug('DownloadImages: Re-creating gif'); | 171 | $this->logger->debug('DownloadImages: Re-creating gif'); |
140 | break; | 172 | break; |
141 | case 'jpeg': | 173 | case 'jpeg': |
@@ -185,7 +217,7 @@ class DownloadImages | |||
185 | * | 217 | * |
186 | * @return array An array of urls | 218 | * @return array An array of urls |
187 | */ | 219 | */ |
188 | private function getSrcsetUrls(Crawler $imagesCrawler) | 220 | private static function getSrcsetUrls(Crawler $imagesCrawler) |
189 | { | 221 | { |
190 | $urls = []; | 222 | $urls = []; |
191 | $iterator = $imagesCrawler | 223 | $iterator = $imagesCrawler |
@@ -279,14 +311,14 @@ class DownloadImages | |||
279 | /** | 311 | /** |
280 | * Retrieve and validate the extension from the response of the url of the image. | 312 | * Retrieve and validate the extension from the response of the url of the image. |
281 | * | 313 | * |
282 | * @param Response $res Guzzle Response | 314 | * @param ResponseInterface $res Http Response |
283 | * @param string $imagePath Path from the src image from the content (used for log only) | 315 | * @param string $imagePath Path from the src image from the content (used for log only) |
284 | * | 316 | * |
285 | * @return string|false Extension name or false if validation failed | 317 | * @return string|false Extension name or false if validation failed |
286 | */ | 318 | */ |
287 | private function getExtensionFromResponse(Response $res, $imagePath) | 319 | private function getExtensionFromResponse(ResponseInterface $res, $imagePath) |
288 | { | 320 | { |
289 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 321 | $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type'))); |
290 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 322 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); |
291 | 323 | ||
292 | // ok header doesn't have the extension, try a different way | 324 | // ok header doesn't have the extension, try a different way |