aboutsummaryrefslogtreecommitdiffhomepage
path: root/src
diff options
context:
space:
mode:
authorKevin Decherf <kevin@kdecherf.com>2019-08-11 23:55:52 +0200
committerKevin Decherf <kevin@kdecherf.com>2020-04-25 15:59:23 +0200
commitb22eb276232b5c15a6fbadc9dd10144e709faec3 (patch)
tree7c5b96a42f5b7d926c90b74b1bd949889dce5f70 /src
parent2495b197614d82b99eed6bbec4562078f4429ad7 (diff)
downloadwallabag-b22eb276232b5c15a6fbadc9dd10144e709faec3.tar.gz
wallabag-b22eb276232b5c15a6fbadc9dd10144e709faec3.tar.zst
wallabag-b22eb276232b5c15a6fbadc9dd10144e709faec3.zip
ContentProxy: replace ignoreUrl with new RuleBasedIgnoreOriginProcessor
Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
Diffstat (limited to 'src')
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php41
1 files changed, 4 insertions, 37 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index 9c6fa8db..7e93249d 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -19,6 +19,7 @@ class ContentProxy
19{ 19{
20 protected $graby; 20 protected $graby;
21 protected $tagger; 21 protected $tagger;
22 protected $ignoreOriginProcessor;
22 protected $validator; 23 protected $validator;
23 protected $logger; 24 protected $logger;
24 protected $mimeGuesser; 25 protected $mimeGuesser;
@@ -26,10 +27,11 @@ class ContentProxy
26 protected $eventDispatcher; 27 protected $eventDispatcher;
27 protected $storeArticleHeaders; 28 protected $storeArticleHeaders;
28 29
29 public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) 30 public function __construct(Graby $graby, RuleBasedTagger $tagger, RuleBasedIgnoreOriginProcessor $ignoreOriginProcessor, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false)
30 { 31 {
31 $this->graby = $graby; 32 $this->graby = $graby;
32 $this->tagger = $tagger; 33 $this->tagger = $tagger;
34 $this->ignoreOriginProcessor = $ignoreOriginProcessor;
33 $this->validator = $validator; 35 $this->validator = $validator;
34 $this->logger = $logger; 36 $this->logger = $logger;
35 $this->mimeGuesser = new MimeTypeExtensionGuesser(); 37 $this->mimeGuesser = new MimeTypeExtensionGuesser();
@@ -356,7 +358,7 @@ class ContentProxy
356 $diff_keys = array_keys($diff); 358 $diff_keys = array_keys($diff);
357 sort($diff_keys); 359 sort($diff_keys);
358 360
359 if ($this->ignoreUrl($entry->getUrl())) { 361 if ($this->ignoreOriginProcessor->process($entry)) {
360 $entry->setUrl($url); 362 $entry->setUrl($url);
361 363
362 return false; 364 return false;
@@ -396,41 +398,6 @@ class ContentProxy
396 } 398 }
397 399
398 /** 400 /**
399 * Check entry url against an ignore list to replace with content url.
400 *
401 * XXX: move the ignore list in the database to let users handle it
402 *
403 * @param string $url url to test
404 *
405 * @return bool true if url matches ignore list otherwise false
406 */
407 private function ignoreUrl($url)
408 {
409 $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com'];
410 $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*'];
411
412 $parsed_url = parse_url($url);
413
414 $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) {
415 return $var === $parsed_url['host'];
416 });
417
418 if ([] !== $filtered) {
419 return true;
420 }
421
422 $filtered = array_filter($ignored_patterns, function ($var) use ($url) {
423 return preg_match("`$var`i", $url);
424 });
425
426 if ([] !== $filtered) {
427 return true;
428 }
429
430 return false;
431 }
432
433 /**
434 * Validate that the given content has at least a title, an html and a url. 401 * Validate that the given content has at least a title, an html and a url.
435 * 402 *
436 * @return bool true if valid otherwise false 403 * @return bool true if valid otherwise false