aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/Wallabag/CoreBundle/Helper
diff options
context:
space:
mode:
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php62
-rw-r--r--src/Wallabag/CoreBundle/Helper/CryptoProxy.php2
-rw-r--r--src/Wallabag/CoreBundle/Helper/DownloadImages.php47
-rw-r--r--src/Wallabag/CoreBundle/Helper/EntriesExport.php4
-rw-r--r--src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php2
-rw-r--r--src/Wallabag/CoreBundle/Helper/Redirect.php2
-rw-r--r--src/Wallabag/CoreBundle/Helper/TagsAssigner.php4
7 files changed, 108 insertions, 15 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index fe795d42..d4ea608f 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -53,6 +53,7 @@ class ContentProxy
53 53
54 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) { 54 if ((empty($content) || false === $this->validateContent($content)) && false === $disableContentUpdate) {
55 $fetchedContent = $this->graby->fetchContent($url); 55 $fetchedContent = $this->graby->fetchContent($url);
56 $fetchedContent['title'] = $this->sanitizeContentTitle($fetchedContent['title'], $fetchedContent['content_type']);
56 57
57 // when content is imported, we have information in $content 58 // when content is imported, we have information in $content
58 // in case fetching content goes bad, we'll keep the imported information instead of overriding them 59 // in case fetching content goes bad, we'll keep the imported information instead of overriding them
@@ -85,7 +86,7 @@ class ContentProxy
85 (new LocaleConstraint()) 86 (new LocaleConstraint())
86 ); 87 );
87 88
88 if (0 === count($errors)) { 89 if (0 === \count($errors)) {
89 $entry->setLanguage($value); 90 $entry->setLanguage($value);
90 91
91 return; 92 return;
@@ -107,7 +108,7 @@ class ContentProxy
107 (new UrlConstraint()) 108 (new UrlConstraint())
108 ); 109 );
109 110
110 if (0 === count($errors)) { 111 if (0 === \count($errors)) {
111 $entry->setPreviewPicture($value); 112 $entry->setPreviewPicture($value);
112 113
113 return; 114 return;
@@ -177,6 +178,59 @@ class ContentProxy
177 } 178 }
178 179
179 /** 180 /**
181 * Try to sanitize the title of the fetched content from wrong character encodings and invalid UTF-8 character.
182 *
183 * @param $title
184 * @param $contentType
185 *
186 * @return string
187 */
188 private function sanitizeContentTitle($title, $contentType)
189 {
190 if ('application/pdf' === $contentType) {
191 $title = $this->convertPdfEncodingToUTF8($title);
192 }
193
194 return $this->sanitizeUTF8Text($title);
195 }
196
197 /**
198 * If the title from the fetched content comes from a PDF, then its very possible that the character encoding is not
199 * UTF-8. This methods tries to identify the character encoding and translate the title to UTF-8.
200 *
201 * @param $title
202 *
203 * @return string (maybe contains invalid UTF-8 character)
204 */
205 private function convertPdfEncodingToUTF8($title)
206 {
207 // first try UTF-8 because its easier to detect its present/absence
208 foreach (['UTF-8', 'UTF-16BE', 'WINDOWS-1252'] as $encoding) {
209 if (mb_check_encoding($title, $encoding)) {
210 return mb_convert_encoding($title, 'UTF-8', $encoding);
211 }
212 }
213
214 return $title;
215 }
216
217 /**
218 * Remove invalid UTF-8 characters from the given string.
219 *
220 * @param string $rawText
221 *
222 * @return string
223 */
224 private function sanitizeUTF8Text($rawText)
225 {
226 if (mb_check_encoding($rawText, 'UTF-8')) {
227 return $rawText;
228 }
229
230 return iconv('UTF-8', 'UTF-8//IGNORE', $rawText);
231 }
232
233 /**
180 * Stock entry with fetched or imported content. 234 * Stock entry with fetched or imported content.
181 * Will fall back to OpenGraph data if available. 235 * Will fall back to OpenGraph data if available.
182 * 236 *
@@ -212,7 +266,7 @@ class ContentProxy
212 $entry->setHttpStatus($content['status']); 266 $entry->setHttpStatus($content['status']);
213 } 267 }
214 268
215 if (!empty($content['authors']) && is_array($content['authors'])) { 269 if (!empty($content['authors']) && \is_array($content['authors'])) {
216 $entry->setPublishedBy($content['authors']); 270 $entry->setPublishedBy($content['authors']);
217 } 271 }
218 272
@@ -233,7 +287,7 @@ class ContentProxy
233 } 287 }
234 288
235 // if content is an image, define it as a preview too 289 // if content is an image, define it as a preview too
236 if (!empty($content['content_type']) && in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) { 290 if (!empty($content['content_type']) && \in_array($this->mimeGuesser->guess($content['content_type']), ['jpeg', 'jpg', 'gif', 'png'], true)) {
237 $this->updatePreviewPicture($entry, $content['url']); 291 $this->updatePreviewPicture($entry, $content['url']);
238 } 292 }
239 293
diff --git a/src/Wallabag/CoreBundle/Helper/CryptoProxy.php b/src/Wallabag/CoreBundle/Helper/CryptoProxy.php
index 7d8c9888..67d73915 100644
--- a/src/Wallabag/CoreBundle/Helper/CryptoProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/CryptoProxy.php
@@ -81,6 +81,6 @@ class CryptoProxy
81 */ 81 */
82 private function mask($value) 82 private function mask($value)
83 { 83 {
84 return strlen($value) > 0 ? $value[0] . '*****' . $value[strlen($value) - 1] : 'Empty value'; 84 return \strlen($value) > 0 ? $value[0] . '*****' . $value[\strlen($value) - 1] : 'Empty value';
85 } 85 }
86} 86}
diff --git a/src/Wallabag/CoreBundle/Helper/DownloadImages.php b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
index 252ba57c..cc3dcfce 100644
--- a/src/Wallabag/CoreBundle/Helper/DownloadImages.php
+++ b/src/Wallabag/CoreBundle/Helper/DownloadImages.php
@@ -42,14 +42,17 @@ class DownloadImages
42 public function processHtml($entryId, $html, $url) 42 public function processHtml($entryId, $html, $url)
43 { 43 {
44 $crawler = new Crawler($html); 44 $crawler = new Crawler($html);
45 $result = $crawler 45 $imagesCrawler = $crawler
46 ->filterXpath('//img') 46 ->filterXpath('//img');
47 $imagesUrls = $imagesCrawler
47 ->extract(['src']); 48 ->extract(['src']);
49 $imagesSrcsetUrls = $this->getSrcsetUrls($imagesCrawler);
50 $imagesUrls = array_unique(array_merge($imagesUrls, $imagesSrcsetUrls));
48 51
49 $relativePath = $this->getRelativePath($entryId); 52 $relativePath = $this->getRelativePath($entryId);
50 53
51 // download and save the image to the folder 54 // download and save the image to the folder
52 foreach ($result as $image) { 55 foreach ($imagesUrls as $image) {
53 $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath); 56 $imagePath = $this->processSingleImage($entryId, $image, $url, $relativePath);
54 57
55 if (false === $imagePath) { 58 if (false === $imagePath) {
@@ -82,6 +85,10 @@ class DownloadImages
82 */ 85 */
83 public function processSingleImage($entryId, $imagePath, $url, $relativePath = null) 86 public function processSingleImage($entryId, $imagePath, $url, $relativePath = null)
84 { 87 {
88 if (null === $imagePath) {
89 return false;
90 }
91
85 if (null === $relativePath) { 92 if (null === $relativePath) {
86 $relativePath = $this->getRelativePath($entryId); 93 $relativePath = $this->getRelativePath($entryId);
87 } 94 }
@@ -172,6 +179,38 @@ class DownloadImages
172 } 179 }
173 180
174 /** 181 /**
182 * Get images urls from the srcset image attribute.
183 *
184 * @param Crawler $imagesCrawler
185 *
186 * @return array An array of urls
187 */
188 private function getSrcsetUrls(Crawler $imagesCrawler)
189 {
190 $urls = [];
191 $iterator = $imagesCrawler
192 ->getIterator();
193 while ($iterator->valid()) {
194 $srcsetAttribute = $iterator->current()->getAttribute('srcset');
195 if ('' !== $srcsetAttribute) {
196 // Couldn't start with " OR ' OR a white space
197 // Could be one or more white space
198 // Must be one or more digits followed by w OR x
199 $pattern = "/(?:[^\"'\s]+\s*(?:\d+[wx])+)/";
200 preg_match_all($pattern, $srcsetAttribute, $matches);
201 $srcset = \call_user_func_array('array_merge', $matches);
202 $srcsetUrls = array_map(function ($src) {
203 return trim(explode(' ', $src, 2)[0]);
204 }, $srcset);
205 $urls = array_merge($srcsetUrls, $urls);
206 }
207 $iterator->next();
208 }
209
210 return $urls;
211 }
212
213 /**
175 * Setup base folder where all images are going to be saved. 214 * Setup base folder where all images are going to be saved.
176 */ 215 */
177 private function setFolder() 216 private function setFolder()
@@ -269,7 +308,7 @@ class DownloadImages
269 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]); 308 $this->logger->debug('DownloadImages: Checking extension (alternative)', ['ext' => $ext]);
270 } 309 }
271 310
272 if (!in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) { 311 if (!\in_array($ext, ['jpeg', 'jpg', 'gif', 'png'], true)) {
273 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath); 312 $this->logger->error('DownloadImages: Processed image with not allowed extension. Skipping: ' . $imagePath);
274 313
275 return false; 314 return false;
diff --git a/src/Wallabag/CoreBundle/Helper/EntriesExport.php b/src/Wallabag/CoreBundle/Helper/EntriesExport.php
index 136f66f5..cbf1037b 100644
--- a/src/Wallabag/CoreBundle/Helper/EntriesExport.php
+++ b/src/Wallabag/CoreBundle/Helper/EntriesExport.php
@@ -45,7 +45,7 @@ class EntriesExport
45 */ 45 */
46 public function setEntries($entries) 46 public function setEntries($entries)
47 { 47 {
48 if (!is_array($entries)) { 48 if (!\is_array($entries)) {
49 $this->language = $entries->getLanguage(); 49 $this->language = $entries->getLanguage();
50 $entries = [$entries]; 50 $entries = [$entries];
51 } 51 }
@@ -325,7 +325,7 @@ class EntriesExport
325 { 325 {
326 $delimiter = ';'; 326 $delimiter = ';';
327 $enclosure = '"'; 327 $enclosure = '"';
328 $handle = fopen('php://memory', 'rb+'); 328 $handle = fopen('php://memory', 'b+r');
329 329
330 fputcsv($handle, ['Title', 'URL', 'Content', 'Tags', 'MIME Type', 'Language', 'Creation date'], $delimiter, $enclosure); 330 fputcsv($handle, ['Title', 'URL', 'Content', 'Tags', 'MIME Type', 'Language', 'Creation date'], $delimiter, $enclosure);
331 331
diff --git a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
index 49c1ea41..1c2c5093 100644
--- a/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
+++ b/src/Wallabag/CoreBundle/Helper/PreparePagerForEntries.php
@@ -31,7 +31,7 @@ class PreparePagerForEntries
31 $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null; 31 $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
32 } 32 }
33 33
34 if (null === $user || !is_object($user)) { 34 if (null === $user || !\is_object($user)) {
35 return; 35 return;
36 } 36 }
37 37
diff --git a/src/Wallabag/CoreBundle/Helper/Redirect.php b/src/Wallabag/CoreBundle/Helper/Redirect.php
index abc84d08..9d1a6345 100644
--- a/src/Wallabag/CoreBundle/Helper/Redirect.php
+++ b/src/Wallabag/CoreBundle/Helper/Redirect.php
@@ -31,7 +31,7 @@ class Redirect
31 { 31 {
32 $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null; 32 $user = $this->tokenStorage->getToken() ? $this->tokenStorage->getToken()->getUser() : null;
33 33
34 if (null === $user || !is_object($user)) { 34 if (null === $user || !\is_object($user)) {
35 return $url; 35 return $url;
36 } 36 }
37 37
diff --git a/src/Wallabag/CoreBundle/Helper/TagsAssigner.php b/src/Wallabag/CoreBundle/Helper/TagsAssigner.php
index 0bfe5c57..e6b4989f 100644
--- a/src/Wallabag/CoreBundle/Helper/TagsAssigner.php
+++ b/src/Wallabag/CoreBundle/Helper/TagsAssigner.php
@@ -32,7 +32,7 @@ class TagsAssigner
32 { 32 {
33 $tagsEntities = []; 33 $tagsEntities = [];
34 34
35 if (!is_array($tags)) { 35 if (!\is_array($tags)) {
36 $tags = explode(',', $tags); 36 $tags = explode(',', $tags);
37 } 37 }
38 38
@@ -48,7 +48,7 @@ class TagsAssigner
48 $label = trim(mb_convert_case($label, MB_CASE_LOWER)); 48 $label = trim(mb_convert_case($label, MB_CASE_LOWER));
49 49
50 // avoid empty tag 50 // avoid empty tag
51 if (0 === strlen($label)) { 51 if (0 === \strlen($label)) {
52 continue; 52 continue;
53 } 53 }
54 54