aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php85
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php50
2 files changed, 118 insertions, 17 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index bfaa1976..0c971863 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -7,6 +7,9 @@ use Psr\Log\LoggerInterface;
7use Wallabag\CoreBundle\Entity\Entry; 7use Wallabag\CoreBundle\Entity\Entry;
8use Wallabag\CoreBundle\Tools\Utils; 8use Wallabag\CoreBundle\Tools\Utils;
9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
10use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint;
11use Symfony\Component\Validator\Constraints\Url as UrlConstraint;
12use Symfony\Component\Validator\Validator\ValidatorInterface;
10 13
11/** 14/**
12 * This kind of proxy class take care of getting the content from an url 15 * This kind of proxy class take care of getting the content from an url
@@ -16,15 +19,17 @@ class ContentProxy
16{ 19{
17 protected $graby; 20 protected $graby;
18 protected $tagger; 21 protected $tagger;
22 protected $validator;
19 protected $logger; 23 protected $logger;
20 protected $mimeGuesser; 24 protected $mimeGuesser;
21 protected $fetchingErrorMessage; 25 protected $fetchingErrorMessage;
22 protected $eventDispatcher; 26 protected $eventDispatcher;
23 27
24 public function __construct(Graby $graby, RuleBasedTagger $tagger, LoggerInterface $logger, $fetchingErrorMessage) 28 public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage)
25 { 29 {
26 $this->graby = $graby; 30 $this->graby = $graby;
27 $this->tagger = $tagger; 31 $this->tagger = $tagger;
32 $this->validator = $validator;
28 $this->logger = $logger; 33 $this->logger = $logger;
29 $this->mimeGuesser = new MimeTypeExtensionGuesser(); 34 $this->mimeGuesser = new MimeTypeExtensionGuesser();
30 $this->fetchingErrorMessage = $fetchingErrorMessage; 35 $this->fetchingErrorMessage = $fetchingErrorMessage;
@@ -105,7 +110,7 @@ class ContentProxy
105 } 110 }
106 } 111 }
107 112
108 if (!empty($content['authors'])) { 113 if (!empty($content['authors']) && is_array($content['authors'])) {
109 $entry->setPublishedBy($content['authors']); 114 $entry->setPublishedBy($content['authors']);
110 } 115 }
111 116
@@ -113,7 +118,24 @@ class ContentProxy
113 $entry->setHeaders($content['all_headers']); 118 $entry->setHeaders($content['all_headers']);
114 } 119 }
115 120
116 $entry->setLanguage(isset($content['language']) ? $content['language'] : ''); 121 $this->validateAndSetLanguage(
122 $entry,
123 isset($content['language']) ? $content['language'] : ''
124 );
125
126 $this->validateAndSetPreviewPicture(
127 $entry,
128 isset($content['open_graph']['og_image']) ? $content['open_graph']['og_image'] : ''
129 );
130
131 // if content is an image, define it as a preview too
132 if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
133 $this->validateAndSetPreviewPicture(
134 $entry,
135 $content['url']
136 );
137 }
138
117 $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); 139 $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : '');
118 $entry->setReadingTime(Utils::getReadingTime($html)); 140 $entry->setReadingTime(Utils::getReadingTime($html));
119 141
@@ -122,15 +144,6 @@ class ContentProxy
122 $entry->setDomainName($domainName); 144 $entry->setDomainName($domainName);
123 } 145 }
124 146
125 if (!empty($content['open_graph']['og_image'])) {
126 $entry->setPreviewPicture($content['open_graph']['og_image']);
127 }
128
129 // if content is an image define as a preview too
130 if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
131 $entry->setPreviewPicture($content['url']);
132 }
133
134 try { 147 try {
135 $this->tagger->tag($entry); 148 $this->tagger->tag($entry);
136 } catch (\Exception $e) { 149 } catch (\Exception $e) {
@@ -152,4 +165,52 @@ class ContentProxy
152 { 165 {
153 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); 166 return !empty($content['title']) && !empty($content['html']) && !empty($content['url']);
154 } 167 }
168
169 /**
170 * Use a Symfony validator to ensure the language is well formatted.
171 *
172 * @param Entry $entry
173 * @param string $value Language to validate
174 */
175 private function validateAndSetLanguage($entry, $value)
176 {
177 // some lang are defined as fr-FR, es-ES.
178 // replacing - by _ might increase language support
179 $value = str_replace('-', '_', $value);
180
181 $errors = $this->validator->validate(
182 $value,
183 (new LocaleConstraint())
184 );
185
186 if (0 === count($errors)) {
187 $entry->setLanguage($value);
188
189 return;
190 }
191
192 $this->logger->warning('Language validation failed. '.(string) $errors);
193 }
194
195 /**
196 * Use a Symfony validator to ensure the preview picture is a real url.
197 *
198 * @param Entry $entry
199 * @param string $value URL to validate
200 */
201 private function validateAndSetPreviewPicture($entry, $value)
202 {
203 $errors = $this->validator->validate(
204 $value,
205 (new UrlConstraint())
206 );
207
208 if (0 === count($errors)) {
209 $entry->setPreviewPicture($value);
210
211 return;
212 }
213
214 $this->logger->warning('PreviewPicture validation failed. '.(string) $errors);
215 }
155} 216}
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 54e23a05..ed888cdb 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper;
5use Psr\Log\LoggerInterface; 5use Psr\Log\LoggerInterface;
6use Symfony\Component\DomCrawler\Crawler; 6use Symfony\Component\DomCrawler\Crawler;
7use GuzzleHttp\Client; 7use GuzzleHttp\Client;
8use GuzzleHttp\Message\Response;
8use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; 9use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser;
9use Symfony\Component\Finder\Finder; 10use Symfony\Component\Finder\Finder;
10 11
@@ -116,13 +117,11 @@ class DownloadImages
116 return false; 117 return false;
117 } 118 }
118 119
119 $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); 120 $ext = $this->getExtensionFromResponse($res, $imagePath);
120 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); 121 if (false === $res) {
121 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
122 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
123
124 return false; 122 return false;
125 } 123 }
124
126 $hashImage = hash('crc32', $absolutePath); 125 $hashImage = hash('crc32', $absolutePath);
127 $localPath = $folderPath.'/'.$hashImage.'.'.$ext; 126 $localPath = $folderPath.'/'.$hashImage.'.'.$ext;
128 127
@@ -237,4 +236,45 @@ class DownloadImages
237 236
238 return false; 237 return false;
239 } 238 }
239
240 /**
241 * Retrieve and validate the extension from the response of the url of the image.
242 *
243 * @param Response $res Guzzle Response
244 * @param string $imagePath Path from the src image from the content (used for log only)
245 *
246 * @return string|false Extension name or false if validation failed
247 */
248 private function getExtensionFromResponse(Response $res, $imagePath)
249 {
250 $ext = $this->mimeGuesser->guess($res->getHeader('content-type'));
251 $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]);
252
253 // ok header doesn't have the extension, try a different way
254 if (empty($ext)) {
255 $types = [
256 'jpeg' => "\xFF\xD8\xFF",
257 'gif' => 'GIF',
258 'png' => "\x89\x50\x4e\x47\x0d\x0a",
259 ];
260 $bytes = substr((string) $res->getBody(), 0, 8);
261
262 foreach ($types as $type => $header) {
263 if (0 === strpos($bytes, $header)) {
264 $ext = $type;
265 break;
266 }
267 }
268
269 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
270 }
271
272 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
273 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath);
274
275 return false;
276 }
277
278 return $ext;
279 }
240} 280}