aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php49
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php68
-rw-r--r--src/Wallabag/CoreBundle/Helper/HttpClientFactory.php51
-rw-r--r--src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php2
-rw-r--r--src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php1
-rw-r--r--src/Wallabag/CoreBundle/Helper/UrlHasher.php23
6 files changed, 137 insertions, 57 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index bc257ffb..c6fa0d98 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -12,8 +12,8 @@ use Wallabag\CoreBundle\Entity\Entry;
12use Wallabag\CoreBundle\Tools\Utils; 12use Wallabag\CoreBundle\Tools\Utils;
13 13
14/** 14/**
15 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class takes care of getting the content from an url
16 * and update the entry with what it found. 16 * and updates the entry with what it found.
17 */ 17 */
18class ContentProxy 18class ContentProxy
19{ 19{
@@ -54,7 +54,11 @@ class ContentProxy
54 54
55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { 55 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
56 $fetchedContent = $this->graby->fetchContent($url); 56 $fetchedContent = $this->graby->fetchContent($url);
57 $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']); 57
58 $fetchedContent['title'] = $this->sanitizeContentTitle(
59 $fetchedContent['title'],
60 isset($fetchedContent['headers']['content-type']) ? $fetchedContent['headers']['content-type'] : ''
61 );
58 62
59 // when content is imported, we have information in $content 63 // when content is imported, we have information in $content
60 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 64 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
@@ -188,8 +192,8 @@ class ContentProxy
188 /** 192 /**
189 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character. 193 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
190 * 194 *
191 * @param $title 195 * @param string $title
192 * @param $contentType 196 * @param string $contentType
193 * 197 *
194 * @return string 198 * @return string
195 */ 199 */
@@ -253,16 +257,14 @@ class ContentProxy
253 257
254 if (!empty($content['title'])) { 258 if (!empty($content['title'])) {
255 $entry->setTitle($content['title']); 259 $entry->setTitle($content['title']);
256 } elseif (!empty($content['open_graph']['og_title'])) {
257 $entry->setTitle($content['open_graph']['og_title']);
258 } 260 }
259 261
260 if (empty($content['html'])) { 262 if (empty($content['html'])) {
261 $content['html'] = $this->fetchingErrorMessage; 263 $content['html'] = $this->fetchingErrorMessage;
262 264
263 if (!empty($content['open_graph']['og_description'])) { 265 if (!empty($content['description'])) {
264 $content['html'] .= '<p><i>But we found a short description: </i></p>'; 266 $content['html'] .= '<p><i>But we found a short description: </i></p>';
265 $content['html'] .= $content['open_graph']['og_description']; 267 $content['html'] .= $content['description'];
266 } 268 }
267 } 269 }
268 270
@@ -277,8 +279,8 @@ class ContentProxy
277 $entry->setPublishedBy($content['authors']); 279 $entry->setPublishedBy($content['authors']);
278 } 280 }
279 281
280 if (!empty($content['all_headers']) && $this->storeArticleHeaders) { 282 if (!empty($content['headers'])) {
281 $entry->setHeaders($content['all_headers']); 283 $entry->setHeaders($content['headers']);
282 } 284 }
283 285
284 if (!empty($content['date'])) { 286 if (!empty($content['date'])) {
@@ -289,17 +291,30 @@ class ContentProxy
289 $this->updateLanguage($entry, $content['language']); 291 $this->updateLanguage($entry, $content['language']);
290 } 292 }
291 293
292 if (!empty($content['open_graph']['og_image'])) { 294 $previewPictureUrl = '';
293 $this->updatePreviewPicture($entry, $content['open_graph']['og_image']); 295 if (!empty($content['image'])) {
296 $previewPictureUrl = $content['image'];
294 } 297 }
295 298
296 // if content is an image, define it as a preview too 299 // if content is an image, define it as a preview too
297 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 300 if (!empty($content['headers']['content-type']) && \in_array($this->mimeGuesser->guess($content['headers']['content-type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
298 $this->updatePreviewPicture($entry, $content['url']); 301 $previewPictureUrl = $content['url'];
302 } elseif (empty($previewPictureUrl)) {
303 $this->logger->debug('Extracting images from content to provide a default preview picture');
304 $imagesUrls = DownloadImages::extractImagesUrlsFromHtml($content['html']);
305 $this->logger->debug(\count($imagesUrls) . ' pictures found');
306
307 if (!empty($imagesUrls)) {
308 $previewPictureUrl = $imagesUrls[0];
309 }
310 }
311
312 if (!empty($content['headers']['content-type'])) {
313 $entry->setMimetype($content['headers']['content-type']);
299 } 314 }
300 315
301 if (!empty($content['content_type'])) { 316 if (!empty($previewPictureUrl)) {
302 $entry->setMimetype($content['content_type']); 317 $this->updatePreviewPicture($entry, $previewPictureUrl);
303 } 318 }
304 319
305 try { 320 try {
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index cc3dcfce..7a39a2e4 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -2,8 +2,13 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use GuzzleHttp\Client; 5use Http\Client\Common\HttpMethodsClient;
6use GuzzleHttp\Message\Response; 6use Http\Client\Common\Plugin\ErrorPlugin;
7use Http\Client\Common\PluginClient;
8use Http\Client\HttpClient;
9use Http\Discovery\MessageFactoryDiscovery;
10use Http\Message\MessageFactory;
11use Psr\Http\Message\ResponseInterface;
7use Psr\Log\LoggerInterface; 12use Psr\Log\LoggerInterface;
8use Symfony\Component\DomCrawler\Crawler; 13use Symfony\Component\DomCrawler\Crawler;
9use Symfony\Component\Finder\Finder; 14use Symfony\Component\Finder\Finder;
@@ -19,9 +24,9 @@ class DownloadImages
19 private $mimeGuesser; 24 private $mimeGuesser;
20 private $wallabagUrl; 25 private $wallabagUrl;
21 26
22 public function __construct(Client $client, $baseFolder, $wallabagUrl, LoggerInterface $logger) 27 public function __construct(HttpClient $client, $baseFolder, $wallabagUrl, LoggerInterface $logger, MessageFactory $messageFactory = null)
23 { 28 {
24 $this->client = $client; 29 $this->client = new HttpMethodsClient(new PluginClient($client, [new ErrorPlugin()]), $messageFactory ?: MessageFactoryDiscovery::find());
25 $this->baseFolder = $baseFolder; 30 $this->baseFolder = $baseFolder;
26 $this->wallabagUrl = rtrim($wallabagUrl, '/'); 31 $this->wallabagUrl = rtrim($wallabagUrl, '/');
27 $this->logger = $logger; 32 $this->logger = $logger;
@@ -31,23 +36,36 @@ class DownloadImages
31 } 36 }
32 37
33 /** 38 /**
34 * Process the html and extract image from it, save them to local and return the updated html. 39 * Process the html and extract images URLs from it.
35 * 40 *
36 * @param int $entryId ID of the entry
37 * @param string $html 41 * @param string $html
38 * @param string $url Used as a base path for relative image and folder
39 * 42 *
40 * @return string 43 * @return string[]
41 */ 44 */
42 public function processHtml($entryId, $html, $url) 45 public static function extractImagesUrlsFromHtml($html)
43 { 46 {
44 $crawler = new Crawler($html); 47 $crawler = new Crawler($html);
45 $imagesCrawler = $crawler 48 $imagesCrawler = $crawler
46 ->filterXpath('//img'); 49 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler 50 $imagesUrls = $imagesCrawler
48 ->extract(['src']); 51 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler); 52 $imagesSrcsetUrls = self::getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls)); 53
54 return array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
55 }
56
57 /**
58 * Process the html and extract image from it, save them to local and return the updated html.
59 *
60 * @param int $entryId ID of the entry
61 * @param string $html
62 * @param string $url Used as a base path for relative image and folder
63 *
64 * @return string
65 */
66 public function processHtml($entryId, $html, $url)
67 {
68 $imagesUrls = self::extractImagesUrlsFromHtml($html);
51 69
52 $relativePath = $this->getRelativePath($entryId); 70 $relativePath = $this->getRelativePath($entryId);
53 71
@@ -122,7 +140,7 @@ class DownloadImages
122 $localPath = $folderPath . '/' . $hashImage . '.' . $ext; 140 $localPath = $folderPath . '/' . $hashImage . '.' . $ext;
123 141
124 try { 142 try {
125 $im = imagecreatefromstring($res->getBody()); 143 $im = imagecreatefromstring((string) $res->getBody());
126 } catch (\Exception $e) { 144 } catch (\Exception $e) {
127 $im = false; 145 $im = false;
128 } 146 }
@@ -135,7 +153,21 @@ class DownloadImages
135 153
136 switch ($ext) { 154 switch ($ext) {
137 case 'gif': 155 case 'gif':
138 imagegif($im, $localPath); 156 // use Imagick if available to keep GIF animation
157 if (class_exists('\\Imagick')) {
158 try {
159 $imagick = new \Imagick();
160 $imagick->readImageBlob($res->getBody());
161 $imagick->setImageFormat('gif');
162 $imagick->writeImages($localPath, true);
163 } catch (\Exception $e) {
164 // if Imagick fail, fallback to the default solution
165 imagegif($im, $localPath);
166 }
167 } else {
168 imagegif($im, $localPath);
169 }
170
139 $this->logger->debug('DownloadImages: Re-creating gif'); 171 $this->logger->debug('DownloadImages: Re-creating gif');
140 break; 172 break;
141 case 'jpeg': 173 case 'jpeg':
@@ -185,7 +217,7 @@ class DownloadImages
185 * 217 *
186 * @return array An array of urls 218 * @return array An array of urls
187 */ 219 */
188 private function getSrcsetUrls(Crawler $imagesCrawler) 220 private static function getSrcsetUrls(Crawler $imagesCrawler)
189 { 221 {
190 $urls = []; 222 $urls = [];
191 $iterator = $imagesCrawler 223 $iterator = $imagesCrawler
@@ -279,14 +311,14 @@ class DownloadImages
279 /** 311 /**
280 * Retrieve and validate the extension from the response of the url of the image. 312 * Retrieve and validate the extension from the response of the url of the image.
281 * 313 *
282 * @param Response $res Guzzle Response 314 * @param ResponseInterface $res Http Response
283 * @param string $imagePath Path from the src image from the content (used for log only) 315 * @param string $imagePath Path from the src image from the content (used for log only)
284 * 316 *
285 * @return string|false Extension name or false if validation failed 317 * @return string|false Extension name or false if validation failed
286 */ 318 */
287 private function getExtensionFromResponse(Response $res, $imagePath) 319 private function getExtensionFromResponse(ResponseInterface $res, $imagePath)
288 { 320 {
289 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 321 $ext = $this->mimeGuesser->guess(current($res->getHeader('content-type')));
290 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 322 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
291 323
292 // ok header doesn't have the extension, try a different way 324 // ok header doesn't have the extension, try a different way
diff --git a/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php b/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
index 4602a684..b8e95381 100644
--- a/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
+++ b/src/Wallabag/CoreBundle/Helper/HttpClientFactory.php
@@ -2,16 +2,18 @@
2 2
3namespace Wallabag\CoreBundle\Helper; 3namespace Wallabag\CoreBundle\Helper;
4 4
5use Graby\Ring\Client\SafeCurlHandler; 5use GuzzleHttp\Client as GuzzleClient;
6use GuzzleHttp\Client;
7use GuzzleHttp\Cookie\CookieJar; 6use GuzzleHttp\Cookie\CookieJar;
8use GuzzleHttp\Event\SubscriberInterface; 7use GuzzleHttp\Event\SubscriberInterface;
8use Http\Adapter\Guzzle5\Client as GuzzleAdapter;
9use Http\Client\HttpClient;
10use Http\HttplugBundle\ClientFactory\ClientFactory;
9use Psr\Log\LoggerInterface; 11use Psr\Log\LoggerInterface;
10 12
11/** 13/**
12 * Builds and configures the Guzzle HTTP client. 14 * Builds and configures the HTTP client.
13 */ 15 */
14class HttpClientFactory 16class HttpClientFactory implements ClientFactory
15{ 17{
16 /** @var [\GuzzleHttp\Event\SubscriberInterface] */ 18 /** @var [\GuzzleHttp\Event\SubscriberInterface] */
17 private $subscribers = []; 19 private $subscribers = [];
@@ -37,35 +39,42 @@ class HttpClientFactory
37 } 39 }
38 40
39 /** 41 /**
40 * @return \GuzzleHttp\Client|null 42 * Adds a subscriber to the HTTP client.
43 *
44 * @param SubscriberInterface $subscriber
45 */
46 public function addSubscriber(SubscriberInterface $subscriber)
47 {
48 $this->subscribers[] = $subscriber;
49 }
50
51 /**
52 * Input an array of configuration to be able to create a HttpClient.
53 *
54 * @param array $config
55 *
56 * @return HttpClient
41 */ 57 */
42 public function buildHttpClient() 58 public function createClient(array $config = [])
43 { 59 {
44 $this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]); 60 $this->logger->log('debug', 'Restricted access config enabled?', ['enabled' => (int) $this->restrictedAccess]);
45 61
46 if (0 === (int) $this->restrictedAccess) { 62 if (0 === (int) $this->restrictedAccess) {
47 return; 63 return new GuzzleAdapter(new GuzzleClient($config));
48 } 64 }
49 65
50 // we clear the cookie to avoid websites who use cookies for analytics 66 // we clear the cookie to avoid websites who use cookies for analytics
51 $this->cookieJar->clear(); 67 $this->cookieJar->clear();
52 // need to set the (shared) cookie jar 68 if (!isset($config['defaults']['cookies'])) {
53 $client = new Client(['handler' => new SafeCurlHandler(), 'defaults' => ['cookies' => $this->cookieJar]]); 69 // need to set the (shared) cookie jar
70 $config['defaults']['cookies'] = $this->cookieJar;
71 }
54 72
73 $guzzle = new GuzzleClient($config);
55 foreach ($this->subscribers as $subscriber) { 74 foreach ($this->subscribers as $subscriber) {
56 $client->getEmitter()->attach($subscriber); 75 $guzzle->getEmitter()->attach($subscriber);
57 } 76 }
58 77
59 return $client; 78 return new GuzzleAdapter($guzzle);
60 }
61
62 /**
63 * Adds a subscriber to the HTTP client.
64 *
65 * @param SubscriberInterface $subscriber
66 */
67 public function addSubscriber(SubscriberInterface $subscriber)
68 {
69 $this->subscribers[] = $subscriber;
70 } 79 }
71} 80}
diff --git a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
index 183d394a..04abc6d0 100644
--- a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
+++ b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
@@ -21,7 +21,7 @@ class PreparePagerForEntries
21 21
22 /** 22 /**
23 * @param AdapterInterface $adapter 23 * @param AdapterInterface $adapter
24 * @param User $user If user isn't logged in, we can force it (like for rss) 24 * @param User $user If user isn't logged in, we can force it (like for feed)
25 * 25 *
26 * @return Pagerfanta|null 26 * @return Pagerfanta|null
27 */ 27 */
diff --git a/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php b/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php
index 63f65067..fbdf2ac7 100644
--- a/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php
+++ b/src/Wallabag/CoreBundle/Helper/RuleBasedTagger.php
@@ -6,6 +6,7 @@ use Psr\Log\LoggerInterface;
6use RulerZ\RulerZ; 6use RulerZ\RulerZ;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Entity\Tag; 8use Wallabag\CoreBundle\Entity\Tag;
9use Wallabag\CoreBundle\Entity\TaggingRule;
9use Wallabag\CoreBundle\Repository\EntryRepository; 10use Wallabag\CoreBundle\Repository\EntryRepository;
10use Wallabag\CoreBundle\Repository\TagRepository; 11use Wallabag\CoreBundle\Repository\TagRepository;
11use Wallabag\UserBundle\Entity\User; 12use Wallabag\UserBundle\Entity\User;
diff --git a/src/Wallabag/CoreBundle/Helper/UrlHasher.php b/src/Wallabag/CoreBundle/Helper/UrlHasher.php
new file mode 100644
index 00000000..d123eaba
--- /dev/null
+++ b/src/Wallabag/CoreBundle/Helper/UrlHasher.php
@@ -0,0 +1,23 @@
1<?php
2
3namespace Wallabag\CoreBundle\Helper;
4
5/**
6 * Hash URLs for privacy and performance.
7 */
8class UrlHasher
9{
10 /**
11 * Hash the given url using the given algorithm.
12 * Hashed url are faster to be retrieved in the database than the real url.
13 *
14 * @param string $url
15 * @param string $algorithm
16 *
17 * @return string
18 */
19 public static function hashUrl(string $url, $algorithm = 'sha1')
20 {
21 return hash($algorithm, urldecode($url));
22 }
23}