aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper/DownloadImages.php
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper/DownloadImages.php')
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php52
1 files changed, 35 insertions, 17 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 9a7e9828..7a39a2e4 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,8 +2,13 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Client; 5use Http\Client\Common\HttpMethodsClient;
6use GuzzleHttp\Message\Response; 6use Http\Client\Common\Plugin\ErrorPlugin;
7use Http\Client\Common\PluginClient;
8use Http\Client\HttpClient;
9use Http\Discovery\MessageFactoryDiscovery;
10use Http\Message\MessageFactory;
11use Psr\Http\Message\ResponseInterface;
7use Psr\Log\LoggerInterface; 12use Psr\Log\LoggerInterface;
8use Symfony\Component\DomCrawler\Crawler; 13use Symfony\Component\DomCrawler\Crawler;
9use Symfony\Component\Finder\Finder; 14use Symfony\Component\Finder\Finder;
@@ -19,9 +24,9 @@ class DownloadImages
19 private $mimeGuesser; 24 private $mimeGuesser;
20 private $wallabagUrl; 25 private $wallabagUrl;
21 26
22 public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) 27 public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null)
23 { 28 {
24 $this->client = $client; 29 $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find());
25 $this->baseFolder = $baseFolder; 30 $this->baseFolder = $baseFolder;
26 $this->wallabagUrl = rtrim($wallabagUrl, '/'); 31 $this->wallabagUrl = rtrim($wallabagUrl, '/');
27 $this->logger = $logger; 32 $this->logger = $logger;
@@ -31,23 +36,36 @@ class DownloadImages
31 } 36 }
32 37
33 /** 38 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 39 * Process the html and extract images URLs from it.
35 * 40 *
36 * @param int $entryId ID of the entry
37 * @param string $html 41 * @param string $html
38 * @param string $url Used as a base path for relative image and folder
39 * 42 *
40 * @return string 43 * @return string[]
41 */ 44 */
42 public function processHtml($entryId, $html, $url) 45 public static function extractImagesUrlsFromHtml($html)
43 { 46 {
44 $crawler = new Crawler($html); 47 $crawler = new Crawler($html);
45 $imagesCrawler = $crawler 48 $imagesCrawler = $crawler
46 ->filterXpath('//img'); 49 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler 50 $imagesUrls = $imagesCrawler
48 ->extract(['src']); 51 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); 52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
55 }
56
57 /**
58 * Process the html and extract image from it, save them to local and return the updated html.
59 *
60 * @param int $entryId ID of the entry
61 * @param string $html
62 * @param string $url Used as a base path for relative image and folder
63 *
64 * @return string
65 */
66 public function processHtml($entryId, $html, $url)
67 {
68 $imagesUrls = self::extractImagesUrlsFromHtml($html);
51 69
52 $relativePath = $this->getRelativePath($entryId); 70 $relativePath = $this->getRelativePath($entryId);
53 71
@@ -122,7 +140,7 @@ class DownloadImages
122 $localPath = $folderPath . '/' . $hashImage . '.' . $ext; 140 $localPath = $folderPath . '/' . $hashImage . '.' . $ext;
123 141
124 try { 142 try {
125 $im = imagecreatefromstring($res->getBody()); 143 $im = imagecreatefromstring((string) $res->getBody());
126 } catch (\Exception $e) { 144 } catch (\Exception $e) {
127 $im = false; 145 $im = false;
128 } 146 }
@@ -199,7 +217,7 @@ class DownloadImages
199 * 217 *
200 * @return array An array of urls 218 * @return array An array of urls
201 */ 219 */
202 private function getSrcsetUrls(Crawler $imagesCrawler) 220 private static function getSrcsetUrls(Crawler $imagesCrawler)
203 { 221 {
204 $urls = []; 222 $urls = [];
205 $iterator = $imagesCrawler 223 $iterator = $imagesCrawler
@@ -293,14 +311,14 @@ class DownloadImages
293 /** 311 /**
294 * Retrieve and validate the extension from the response of the url of the image. 312 * Retrieve and validate the extension from the response of the url of the image.
295 * 313 *
296 * @param Response $res Guzzle Response 314 * @param ResponseInterface $res Http Response
297 * @param string $imagePath Path from the src image from the content (used for log only) 315 * @param string $imagePath Path from the src image from the content (used for log only)
298 * 316 *
299 * @return string|false Extension name or false if validation failed 317 * @return string|false Extension name or false if validation failed
300 */ 318 */
301 private function getExtensionFromResponse(Response $res, $imagePath) 319 private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
302 { 320 {
303 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 321 $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type')));
304 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 322 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
305 323
306 // ok header doesn't have the extension, try a different way 324 // ok header doesn't have the extension, try a different way