diff options
Diffstat (limited to 'src/Wallabag/CoreBundle/Helper')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 41 | ||||
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/RuleBasedIgnoreOriginProcessor.php | 50 |
2 files changed, 54 insertions, 37 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 9c6fa8db..7e93249d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -19,6 +19,7 @@ class ContentProxy | |||
19 | { | 19 | { |
20 | protected $graby; | 20 | protected $graby; |
21 | protected $tagger; | 21 | protected $tagger; |
22 | protected $ignoreOriginProcessor; | ||
22 | protected $validator; | 23 | protected $validator; |
23 | protected $logger; | 24 | protected $logger; |
24 | protected $mimeGuesser; | 25 | protected $mimeGuesser; |
@@ -26,10 +27,11 @@ class ContentProxy | |||
26 | protected $eventDispatcher; | 27 | protected $eventDispatcher; |
27 | protected $storeArticleHeaders; | 28 | protected $storeArticleHeaders; |
28 | 29 | ||
29 | public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) | 30 | public function __construct(Graby $graby, RuleBasedTagger $tagger, RuleBasedIgnoreOriginProcessor $ignoreOriginProcessor, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) |
30 | { | 31 | { |
31 | $this->graby = $graby; | 32 | $this->graby = $graby; |
32 | $this->tagger = $tagger; | 33 | $this->tagger = $tagger; |
34 | $this->ignoreOriginProcessor = $ignoreOriginProcessor; | ||
33 | $this->validator = $validator; | 35 | $this->validator = $validator; |
34 | $this->logger = $logger; | 36 | $this->logger = $logger; |
35 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); | 37 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
@@ -356,7 +358,7 @@ class ContentProxy | |||
356 | $diff_keys = array_keys($diff); | 358 | $diff_keys = array_keys($diff); |
357 | sort($diff_keys); | 359 | sort($diff_keys); |
358 | 360 | ||
359 | if ($this->ignoreUrl($entry->getUrl())) { | 361 | if ($this->ignoreOriginProcessor->process($entry)) { |
360 | $entry->setUrl($url); | 362 | $entry->setUrl($url); |
361 | 363 | ||
362 | return false; | 364 | return false; |
@@ -396,41 +398,6 @@ class ContentProxy | |||
396 | } | 398 | } |
397 | 399 | ||
398 | /** | 400 | /** |
399 | * Check entry url against an ignore list to replace with content url. | ||
400 | * | ||
401 | * XXX: move the ignore list in the database to let users handle it | ||
402 | * | ||
403 | * @param string $url url to test | ||
404 | * | ||
405 | * @return bool true if url matches ignore list otherwise false | ||
406 | */ | ||
407 | private function ignoreUrl($url) | ||
408 | { | ||
409 | $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com']; | ||
410 | $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*']; | ||
411 | |||
412 | $parsed_url = parse_url($url); | ||
413 | |||
414 | $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) { | ||
415 | return $var === $parsed_url['host']; | ||
416 | }); | ||
417 | |||
418 | if ([] !== $filtered) { | ||
419 | return true; | ||
420 | } | ||
421 | |||
422 | $filtered = array_filter($ignored_patterns, function ($var) use ($url) { | ||
423 | return preg_match("`$var`i", $url); | ||
424 | }); | ||
425 | |||
426 | if ([] !== $filtered) { | ||
427 | return true; | ||
428 | } | ||
429 | |||
430 | return false; | ||
431 | } | ||
432 | |||
433 | /** | ||
434 | * Validate that the given content has at least a title, an html and a url. | 401 | * Validate that the given content has at least a title, an html and a url. |
435 | * | 402 | * |
436 | * @return bool true if valid otherwise false | 403 | * @return bool true if valid otherwise false |
diff --git a/src/Wallabag/CoreBundle/Helper/RuleBasedIgnoreOriginProcessor.php b/src/Wallabag/CoreBundle/Helper/RuleBasedIgnoreOriginProcessor.php new file mode 100644 index 00000000..333e5b0a --- /dev/null +++ b/src/Wallabag/CoreBundle/Helper/RuleBasedIgnoreOriginProcessor.php | |||
@@ -0,0 +1,50 @@ | |||
1 | <?php | ||
2 | |||
3 | namespace Wallabag\CoreBundle\Helper; | ||
4 | |||
5 | use Psr\Log\LoggerInterface; | ||
6 | use RulerZ\RulerZ; | ||
7 | use Wallabag\CoreBundle\Entity\Entry; | ||
8 | use Wallabag\CoreBundle\Repository\IgnoreOriginInstanceRuleRepository; | ||
9 | |||
10 | class RuleBasedIgnoreOriginProcessor | ||
11 | { | ||
12 | protected $rulerz; | ||
13 | protected $logger; | ||
14 | protected $ignoreOriginInstanceRuleRepository; | ||
15 | |||
16 | public function __construct(RulerZ $rulerz, LoggerInterface $logger, IgnoreOriginInstanceRuleRepository $ignoreOriginInstanceRuleRepository) | ||
17 | { | ||
18 | $this->rulerz = $rulerz; | ||
19 | $this->logger = $logger; | ||
20 | $this->ignoreOriginInstanceRuleRepository = $ignoreOriginInstanceRuleRepository; | ||
21 | } | ||
22 | |||
23 | /** | ||
24 | * @param Entry $entry Entry to process | ||
25 | * | ||
26 | * @return bool | ||
27 | */ | ||
28 | public function process(Entry $entry) | ||
29 | { | ||
30 | $url = $entry->getUrl(); | ||
31 | $userRules = $entry->getUser()->getConfig()->getIgnoreOriginRules()->toArray(); | ||
32 | $rules = array_merge($this->ignoreOriginInstanceRuleRepository->findAll(), $userRules); | ||
33 | |||
34 | $parsed_url = parse_url($url); | ||
35 | // We add the former url as a new key _all for pattern matching | ||
36 | $parsed_url['_all'] = $url; | ||
37 | |||
38 | foreach ($rules as $rule) { | ||
39 | if ($this->rulerz->satisfies($parsed_url, $rule->getRule())) { | ||
40 | $this->logger->info('Origin url matching ignore rule.', [ | ||
41 | 'rule' => $rule->getRule(), | ||
42 | ]); | ||
43 | |||
44 | return true; | ||
45 | } | ||
46 | } | ||
47 | |||
48 | return false; | ||
49 | } | ||
50 | } | ||