aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php49
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php52
-rw-r--r--src/Wallabag/CoreBundle/Helper/HttpClientFactory.php51
3 files changed, 97 insertions, 55 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index bc257ffb..c6fa0d98 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -54,7 +54,11 @@ class ContentProxy
54 54
55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { 55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
56 $fetchedContent = $this->graby->fetchContent($url); 56 $fetchedContent = $this->graby->fetchContent($url);
57 $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); 57
58 $fetchedContent['title'] = $this->sanitizeContentTitle(
59 $fetchedContent['title'],
60 isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
61 );
58 62
59 // when content is imported, we have information in $content 63 // when content is imported, we have information in $content
60 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 64 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
@@ -188,8 +192,8 @@ class ContentProxy
188 /** 192 /**
189 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. 193 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
190 * 194 *
191 * @param $title 195 * @param string $title
192 * @param $contentType 196 * @param string $contentType
193 * 197 *
194 * @return string 198 * @return string
195 */ 199 */
@@ -253,16 +257,14 @@ class ContentProxy
253 257
254 if (!empty($content['title'])) { 258 if (!empty($content['title'])) {
255 $entry->setTitle($content['title']); 259 $entry->setTitle($content['title']);
256 } elseif (!empty($content['open_graph']['og_title'])) {
257 $entry->setTitle($content['open_graph']['og_title']);
258 } 260 }
259 261
260 if (empty($content['html'])) { 262 if (empty($content['html'])) {
261 $content['html'] = $this->fetchingErrorMessage; 263 $content['html'] = $this->fetchingErrorMessage;
262 264
263 if (!empty($content['open_graph']['og_description'])) { 265 if (!empty($content['description'])) {
264 $content['html'] .= '<p><i>But we found a short description: </i></p>'; 266 $content['html'] .= '<p><i>But we found a short description: </i></p>';
265 $content['html'] .= $content['open_graph']['og_description']; 267 $content['html'] .= $content['description'];
266 } 268 }
267 } 269 }
268 270
@@ -277,8 +279,8 @@ class ContentProxy
277 $entry->setPublishedBy($content['authors']); 279 $entry->setPublishedBy($content['authors']);
278 } 280 }
279 281
280 if (!empty($content['all_headers']) && $this->storeArticleHeaders) { 282 if (!empty($content['headers'])) {
281 $entry->setHeaders($content['all_headers']); 283 $entry->setHeaders($content['headers']);
282 } 284 }
283 285
284 if (!empty($content['date'])) { 286 if (!empty($content['date'])) {
@@ -289,17 +291,30 @@ class ContentProxy
289 $this->updateLanguage($entry, $content['language']); 291 $this->updateLanguage($entry, $content['language']);
290 } 292 }
291 293
292 if (!empty($content['open_graph']['og_image'])) { 294 $previewPictureUrl = '';
293 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 295 if (!empty($content['image'])) {
296 $previewPictureUrl = $content['image'];
294 } 297 }
295 298
296 // if content is an image, define it as a preview too 299 // if content is an image, define it as a preview too
297 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 300 if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
298 $this->updatePreviewPicture($entry, $content['url']); 301 $previewPictureUrl = $content['url'];
302 } elseif (empty($previewPictureUrl)) {
303 $this->logger->debug('Extracting images from content to provide a default preview picture');
304 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
305 $this->logger->debug(\count($imagesUrls) . ' pictures found');
306
307 if (!empty($imagesUrls)) {
308 $previewPictureUrl = $imagesUrls[0];
309 }
310 }
311
312 if (!empty($content['headers']['content-type'])) {
313 $entry->setMimetype($content['headers']['content-type']);
299 } 314 }
300 315
301 if (!empty($content['content_type'])) { 316 if (!empty($previewPictureUrl)) {
302 $entry->setMimetype($content['content_type']); 317 $this->updatePreviewPicture($entry, $previewPictureUrl);
303 } 318 }
304 319
305 try { 320 try {
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 9a7e9828..7a39a2e4 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,8 +2,13 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Client; 5use Http\Client\Common\HttpMethodsClient;
6use GuzzleHttp\Message\Response; 6use Http\Client\Common\Plugin\ErrorPlugin;
7use Http\Client\Common\PluginClient;
8use Http\Client\HttpClient;
9use Http\Discovery\MessageFactoryDiscovery;
10use Http\Message\MessageFactory;
11use Psr\Http\Message\ResponseInterface;
7use Psr\Log\LoggerInterface; 12use Psr\Log\LoggerInterface;
8use Symfony\Component\DomCrawler\Crawler; 13use Symfony\Component\DomCrawler\Crawler;
9use Symfony\Component\Finder\Finder; 14use Symfony\Component\Finder\Finder;
@@ -19,9 +24,9 @@ class DownloadImages
19 private $mimeGuesser; 24 private $mimeGuesser;
20 private $wallabagUrl; 25 private $wallabagUrl;
21 26
22 public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) 27 public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null)
23 { 28 {
24 $this->client = $client; 29 $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find());
25 $this->baseFolder = $baseFolder; 30 $this->baseFolder = $baseFolder;
26 $this->wallabagUrl = rtrim($wallabagUrl, '/'); 31 $this->wallabagUrl = rtrim($wallabagUrl, '/');
27 $this->logger = $logger; 32 $this->logger = $logger;
@@ -31,23 +36,36 @@ class DownloadImages
31 } 36 }
32 37
33 /** 38 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 39 * Process the html and extract images URLs from it.
35 * 40 *
36 * @param int $entryId ID of the entry
37 * @param string $html 41 * @param string $html
38 * @param string $url Used as a base path for relative image and folder
39 * 42 *
40 * @return string 43 * @return string[]
41 */ 44 */
42 public function processHtml($entryId, $html, $url) 45 public static function extractImagesUrlsFromHtml($html)
43 { 46 {
44 $crawler = new Crawler($html); 47 $crawler = new Crawler($html);
45 $imagesCrawler = $crawler 48 $imagesCrawler = $crawler
46 ->filterXpath('//img'); 49 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler 50 $imagesUrls = $imagesCrawler
48 ->extract(['src']); 51 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); 52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
55 }
56
57 /**
58 * Process the html and extract image from it, save them to local and return the updated html.
59 *
60 * @param int $entryId ID of the entry
61 * @param string $html
62 * @param string $url Used as a base path for relative image and folder
63 *
64 * @return string
65 */
66 public function processHtml($entryId, $html, $url)
67 {
68 $imagesUrls = self::extractImagesUrlsFromHtml($html);
51 69
52 $relativePath = $this->getRelativePath($entryId); 70 $relativePath = $this->getRelativePath($entryId);
53 71
@@ -122,7 +140,7 @@ class DownloadImages
122 $localPath = $folderPath . '/' . $hashImage . '.' . $ext; 140 $localPath = $folderPath . '/' . $hashImage . '.' . $ext;
123 141
124 try { 142 try {
125 $im = imagecreatefromstring($res->getBody()); 143 $im = imagecreatefromstring((string) $res->getBody());
126 } catch (\Exception $e) { 144 } catch (\Exception $e) {
127 $im = false; 145 $im = false;
128 } 146 }
@@ -199,7 +217,7 @@ class DownloadImages
199 * 217 *
200 * @return array An array of urls 218 * @return array An array of urls
201 */ 219 */
202 private function getSrcsetUrls(Crawler $imagesCrawler) 220 private static function getSrcsetUrls(Crawler $imagesCrawler)
203 { 221 {
204 $urls = []; 222 $urls = [];
205 $iterator = $imagesCrawler 223 $iterator = $imagesCrawler
@@ -293,14 +311,14 @@ class DownloadImages
293 /** 311 /**
294 * Retrieve and validate the extension from the response of the url of the image. 312 * Retrieve and validate the extension from the response of the url of the image.
295 * 313 *
296 * @param Response $res Guzzle Response 314 * @param ResponseInterface $res Http Response
297 * @param string $imagePath Path from the src image from the content (used for log only) 315 * @param string $imagePath Path from the src image from the content (used for log only)
298 * 316 *
299 * @return string|false Extension name or false if validation failed 317 * @return string|false Extension name or false if validation failed
300 */ 318 */
301 private function getExtensionFromResponse(Response $res, $imagePath) 319 private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
302 { 320 {
303 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 321 $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type')));
304 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 322 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
305 323
306 // ok header doesn't have the extension, try a different way 324 // ok header doesn't have the extension, try a different way
diff --git a/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php b/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
index 4602a684..b8e95381 100644
--- a/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
+++ b/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
@@ -2,16 +2,18 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use Graby\Ring\Client\SafeCurlHandler; 5use GuzzleHttp\Client as GuzzleClient;
6use GuzzleHttp\Client;
7use GuzzleHttp\Cookie\CookieJar; 6use GuzzleHttp\Cookie\CookieJar;
8use GuzzleHttp\Event\SubscriberInterface; 7use GuzzleHttp\Event\SubscriberInterface;
8use Http\Adapter\Guzzle5\Client as GuzzleAdapter;
9use Http\Client\HttpClient;
10use Http\HttplugBundle\ClientFactory\ClientFactory;
9use Psr\Log\LoggerInterface; 11use Psr\Log\LoggerInterface;
10 12
11/** 13/**
12 * Builds and configures the Guzzle HTTP client. 14 * Builds and configures the HTTP client.
13 */ 15 */
14class HttpClientFactory 16class HttpClientFactory implements ClientFactory
15{ 17{
16 /** @var [\GuzzleHttp\Event\SubscriberInterface] */ 18 /** @var [\GuzzleHttp\Event\SubscriberInterface] */
17 private $subscribers = []; 19 private $subscribers = [];
@@ -37,35 +39,42 @@ class HttpClientFactory
37 } 39 }
38 40
39 /** 41 /**
40 * @return \GuzzleHttp\Client|null 42 * Adds a subscriber to the HTTP client.
43 *
44 * @param SubscriberInterface $subscriber
45 */
46 public function addSubscriber(SubscriberInterface $subscriber)
47 {
48 $this->subscribers[] = $subscriber;
49 }
50
51 /**
52 * Input an array of configuration to be able to create a HttpClient.
53 *
54 * @param array $config
55 *
56 * @return HttpClient
41 */ 57 */
42 public function buildHttpClient() 58 public function createClient(array $config = [])
43 { 59 {
44 $this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]); 60 $this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]);
45 61
46 if (0 === (int) $this->restrictedAccess) { 62 if (0 === (int) $this->restrictedAccess) {
47 return; 63 return new GuzzleAdapter(new GuzzleClient($config));
48 } 64 }
49 65
50 // we clear the cookie to avoid websites who use cookies for analytics 66 // we clear the cookie to avoid websites who use cookies for analytics
51 $this->cookieJar->clear(); 67 $this->cookieJar->clear();
52 // need to set the (shared) cookie jar 68 if (!isset($config['defaults']['cookies'])) {
53 $client = new Client(['handler' => new SafeCurlHandler(), 'defaults' => ['cookies' => $this->cookieJar]]); 69 // need to set the (shared) cookie jar
70 $config['defaults']['cookies'] = $this->cookieJar;
71 }
54 72
73 $guzzle = new GuzzleClient($config);
55 foreach ($this->subscribers as $subscriber) { 74 foreach ($this->subscribers as $subscriber) {
56 $client->getEmitter()->attach($subscriber); 75 $guzzle->getEmitter()->attach($subscriber);
57 } 76 }
58 77
59 return $client; 78 return new GuzzleAdapter($guzzle);
60 }
61
62 /**
63 * Adds a subscriber to the HTTP client.
64 *
65 * @param SubscriberInterface $subscriber
66 */
67 public function addSubscriber(SubscriberInterface $subscriber)
68 {
69 $this->subscribers[] = $subscriber;
70 } 79 }
71} 80}