From b22eb276232b5c15a6fbadc9dd10144e709faec3 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Sun, 11 Aug 2019 23:55:52 +0200 Subject: ContentProxy: replace ignoreUrl with new RuleBasedIgnoreOriginProcessor Signed-off-by: Kevin Decherf --- src/Wallabag/CoreBundle/Helper/ContentProxy.php | 41 +++---------------------- 1 file changed, 4 insertions(+), 37 deletions(-) (limited to 'src') diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 9c6fa8db..7e93249d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php @@ -19,6 +19,7 @@ class ContentProxy { protected $graby; protected $tagger; + protected $ignoreOriginProcessor; protected $validator; protected $logger; protected $mimeGuesser; @@ -26,10 +27,11 @@ class ContentProxy protected $eventDispatcher; protected $storeArticleHeaders; - public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) + public function __construct(Graby $graby, RuleBasedTagger $tagger, RuleBasedIgnoreOriginProcessor $ignoreOriginProcessor, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) { $this->graby = $graby; $this->tagger = $tagger; + $this->ignoreOriginProcessor = $ignoreOriginProcessor; $this->validator = $validator; $this->logger = $logger; $this->mimeGuesser = new MimeTypeExtensionGuesser(); @@ -356,7 +358,7 @@ class ContentProxy $diff_keys = array_keys($diff); sort($diff_keys); - if ($this->ignoreUrl($entry->getUrl())) { + if ($this->ignoreOriginProcessor->process($entry)) { $entry->setUrl($url); return false; @@ -395,41 +397,6 @@ class ContentProxy } } - /** - * Check entry url against an ignore list to replace with content url. - * - * XXX: move the ignore list in the database to let users handle it - * - * @param string $url url to test - * - * @return bool true if url matches ignore list otherwise false - */ - private function ignoreUrl($url) - { - $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com']; - $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*']; - - $parsed_url = parse_url($url); - - $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) { - return $var === $parsed_url['host']; - }); - - if ([] !== $filtered) { - return true; - } - - $filtered = array_filter($ignored_patterns, function ($var) use ($url) { - return preg_match("`$var`i", $url); - }); - - if ([] !== $filtered) { - return true; - } - - return false; - } - /** * Validate that the given content has at least a title, an html and a url. * -- cgit v1.2.3