diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 85 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/DownloadImages.php | 50 |
2 files changed, 118 insertions, 17 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index bfaa1976..0c971863 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -7,6 +7,9 @@ use Psr\Log\LoggerInterface; | |||
7 | use Wallabag\CoreBundle\Entity\Entry; | 7 | use Wallabag\CoreBundle\Entity\Entry; |
8 | use Wallabag\CoreBundle\Tools\Utils; | 8 | use Wallabag\CoreBundle\Tools\Utils; |
9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
10 | use Symfony\Component\Validator\Constraints\Locale as LocaleConstraint; | ||
11 | use Symfony\Component\Validator\Constraints\Url as UrlConstraint; | ||
12 | use Symfony\Component\Validator\Validator\ValidatorInterface; | ||
10 | 13 | ||
11 | /** | 14 | /** |
12 | * This kind of proxy class take care of getting the content from an url | 15 | * This kind of proxy class take care of getting the content from an url |
@@ -16,15 +19,17 @@ class ContentProxy | |||
16 | { | 19 | { |
17 | protected $graby; | 20 | protected $graby; |
18 | protected $tagger; | 21 | protected $tagger; |
22 | protected $validator; | ||
19 | protected $logger; | 23 | protected $logger; |
20 | protected $mimeGuesser; | 24 | protected $mimeGuesser; |
21 | protected $fetchingErrorMessage; | 25 | protected $fetchingErrorMessage; |
22 | protected $eventDispatcher; | 26 | protected $eventDispatcher; |
23 | 27 | ||
24 | public function __construct(Graby $graby, RuleBasedTagger $tagger, LoggerInterface $logger, $fetchingErrorMessage) | 28 | public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage) |
25 | { | 29 | { |
26 | $this->graby = $graby; | 30 | $this->graby = $graby; |
27 | $this->tagger = $tagger; | 31 | $this->tagger = $tagger; |
32 | $this->validator = $validator; | ||
28 | $this->logger = $logger; | 33 | $this->logger = $logger; |
29 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); | 34 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
30 | $this->fetchingErrorMessage = $fetchingErrorMessage; | 35 | $this->fetchingErrorMessage = $fetchingErrorMessage; |
@@ -105,7 +110,7 @@ class ContentProxy | |||
105 | } | 110 | } |
106 | } | 111 | } |
107 | 112 | ||
108 | if (!empty($content['authors'])) { | 113 | if (!empty($content['authors']) && is_array($content['authors'])) { |
109 | $entry->setPublishedBy($content['authors']); | 114 | $entry->setPublishedBy($content['authors']); |
110 | } | 115 | } |
111 | 116 | ||
@@ -113,7 +118,24 @@ class ContentProxy | |||
113 | $entry->setHeaders($content['all_headers']); | 118 | $entry->setHeaders($content['all_headers']); |
114 | } | 119 | } |
115 | 120 | ||
116 | $entry->setLanguage(isset($content['language']) ? $content['language'] : ''); | 121 | $this->validateAndSetLanguage( |
122 | $entry, | ||
123 | isset($content['language']) ? $content['language'] : '' | ||
124 | ); | ||
125 | |||
126 | $this->validateAndSetPreviewPicture( | ||
127 | $entry, | ||
128 | isset($content['open_graph']['og_image']) ? $content['open_graph']['og_image'] : '' | ||
129 | ); | ||
130 | |||
131 | // if content is an image, define it as a preview too | ||
132 | if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
133 | $this->validateAndSetPreviewPicture( | ||
134 | $entry, | ||
135 | $content['url'] | ||
136 | ); | ||
137 | } | ||
138 | |||
117 | $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); | 139 | $entry->setMimetype(isset($content['content_type']) ? $content['content_type'] : ''); |
118 | $entry->setReadingTime(Utils::getReadingTime($html)); | 140 | $entry->setReadingTime(Utils::getReadingTime($html)); |
119 | 141 | ||
@@ -122,15 +144,6 @@ class ContentProxy | |||
122 | $entry->setDomainName($domainName); | 144 | $entry->setDomainName($domainName); |
123 | } | 145 | } |
124 | 146 | ||
125 | if (!empty($content['open_graph']['og_image'])) { | ||
126 | $entry->setPreviewPicture($content['open_graph']['og_image']); | ||
127 | } | ||
128 | |||
129 | // if content is an image define as a preview too | ||
130 | if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
131 | $entry->setPreviewPicture($content['url']); | ||
132 | } | ||
133 | |||
134 | try { | 147 | try { |
135 | $this->tagger->tag($entry); | 148 | $this->tagger->tag($entry); |
136 | } catch (\Exception $e) { | 149 | } catch (\Exception $e) { |
@@ -152,4 +165,52 @@ class ContentProxy | |||
152 | { | 165 | { |
153 | return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); | 166 | return !empty($content['title']) && !empty($content['html']) && !empty($content['url']); |
154 | } | 167 | } |
168 | |||
169 | /** | ||
170 | * Use a Symfony validator to ensure the language is well formatted. | ||
171 | * | ||
172 | * @param Entry $entry | ||
173 | * @param string $value Language to validate | ||
174 | */ | ||
175 | private function validateAndSetLanguage($entry, $value) | ||
176 | { | ||
177 | // some lang are defined as fr-FR, es-ES. | ||
178 | // replacing - by _ might increase language support | ||
179 | $value = str_replace('-', '_', $value); | ||
180 | |||
181 | $errors = $this->validator->validate( | ||
182 | $value, | ||
183 | (new LocaleConstraint()) | ||
184 | ); | ||
185 | |||
186 | if (0 === count($errors)) { | ||
187 | $entry->setLanguage($value); | ||
188 | |||
189 | return; | ||
190 | } | ||
191 | |||
192 | $this->logger->warning('Language validation failed. '.(string) $errors); | ||
193 | } | ||
194 | |||
195 | /** | ||
196 | * Use a Symfony validator to ensure the preview picture is a real url. | ||
197 | * | ||
198 | * @param Entry $entry | ||
199 | * @param string $value URL to validate | ||
200 | */ | ||
201 | private function validateAndSetPreviewPicture($entry, $value) | ||
202 | { | ||
203 | $errors = $this->validator->validate( | ||
204 | $value, | ||
205 | (new UrlConstraint()) | ||
206 | ); | ||
207 | |||
208 | if (0 === count($errors)) { | ||
209 | $entry->setPreviewPicture($value); | ||
210 | |||
211 | return; | ||
212 | } | ||
213 | |||
214 | $this->logger->warning('PreviewPicture validation failed. '.(string) $errors); | ||
215 | } | ||
155 | } | 216 | } |
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php index 54e23a05..ed888cdb 100644 --- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php +++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php | |||
@@ -5,6 +5,7 @@ namespace Wallabag\CoreBundle\Helper; | |||
5 | use Psr\Log\LoggerInterface; | 5 | use Psr\Log\LoggerInterface; |
6 | use Symfony\Component\DomCrawler\Crawler; | 6 | use Symfony\Component\DomCrawler\Crawler; |
7 | use GuzzleHttp\Client; | 7 | use GuzzleHttp\Client; |
8 | use GuzzleHttp\Message\Response; | ||
8 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; | 9 | use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeExtensionGuesser; |
9 | use Symfony\Component\Finder\Finder; | 10 | use Symfony\Component\Finder\Finder; |
10 | 11 | ||
@@ -116,13 +117,11 @@ class DownloadImages | |||
116 | return false; | 117 | return false; |
117 | } | 118 | } |
118 | 119 | ||
119 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | 120 | $ext = $this->getExtensionFromResponse($res, $imagePath); |
120 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | 121 | if (false === $res) { |
121 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
122 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
123 | |||
124 | return false; | 122 | return false; |
125 | } | 123 | } |
124 | |||
126 | $hashImage = hash('crc32', $absolutePath); | 125 | $hashImage = hash('crc32', $absolutePath); |
127 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; | 126 | $localPath = $folderPath.'/'.$hashImage.'.'.$ext; |
128 | 127 | ||
@@ -237,4 +236,45 @@ class DownloadImages | |||
237 | 236 | ||
238 | return false; | 237 | return false; |
239 | } | 238 | } |
239 | |||
240 | /** | ||
241 | * Retrieve and validate the extension from the response of the url of the image. | ||
242 | * | ||
243 | * @param Response $res Guzzle Response | ||
244 | * @param string $imagePath Path from the src image from the content (used for log only) | ||
245 | * | ||
246 | * @return string|false Extension name or false if validation failed | ||
247 | */ | ||
248 | private function getExtensionFromResponse(Response $res, $imagePath) | ||
249 | { | ||
250 | $ext = $this->mimeGuesser->guess($res->getHeader('content-type')); | ||
251 | $this->logger->debug('DownloadImages: Checking extension', ['ext' => $ext, 'header' => $res->getHeader('content-type')]); | ||
252 | |||
253 | // ok header doesn't have the extension, try a different way | ||
254 | if (empty($ext)) { | ||
255 | $types = [ | ||
256 | 'jpeg' => "\xFF\xD8\xFF", | ||
257 | 'gif' => 'GIF', | ||
258 | 'png' => "\x89\x50\x4e\x47\x0d\x0a", | ||
259 | ]; | ||
260 | $bytes = substr((string) $res->getBody(), 0, 8); | ||
261 | |||
262 | foreach ($types as $type => $header) { | ||
263 | if (0 === strpos($bytes, $header)) { | ||
264 | $ext = $type; | ||
265 | break; | ||
266 | } | ||
267 | } | ||
268 | |||
269 | $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); | ||
270 | } | ||
271 | |||
272 | if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { | ||
273 | $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: '.$imagePath); | ||
274 | |||
275 | return false; | ||
276 | } | ||
277 | |||
278 | return $ext; | ||
279 | } | ||
240 | } | 280 | } |