diff options
author | Kevin Decherf <kevin@kdecherf.com> | 2019-08-11 23:55:52 +0200 |
---|---|---|
committer | Kevin Decherf <kevin@kdecherf.com> | 2020-04-25 15:59:23 +0200 |
commit | b22eb276232b5c15a6fbadc9dd10144e709faec3 (patch) | |
tree | 7c5b96a42f5b7d926c90b74b1bd949889dce5f70 /src | |
parent | 2495b197614d82b99eed6bbec4562078f4429ad7 (diff) | |
download | wallabag-b22eb276232b5c15a6fbadc9dd10144e709faec3.tar.gz wallabag-b22eb276232b5c15a6fbadc9dd10144e709faec3.tar.zst wallabag-b22eb276232b5c15a6fbadc9dd10144e709faec3.zip |
ContentProxy: replace ignoreUrl with new RuleBasedIgnoreOriginProcessor
Signed-off-by: Kevin Decherf <kevin@kdecherf.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 41 |
1 files changed, 4 insertions, 37 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 9c6fa8db..7e93249d 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -19,6 +19,7 @@ class ContentProxy | |||
19 | { | 19 | { |
20 | protected $graby; | 20 | protected $graby; |
21 | protected $tagger; | 21 | protected $tagger; |
22 | protected $ignoreOriginProcessor; | ||
22 | protected $validator; | 23 | protected $validator; |
23 | protected $logger; | 24 | protected $logger; |
24 | protected $mimeGuesser; | 25 | protected $mimeGuesser; |
@@ -26,10 +27,11 @@ class ContentProxy | |||
26 | protected $eventDispatcher; | 27 | protected $eventDispatcher; |
27 | protected $storeArticleHeaders; | 28 | protected $storeArticleHeaders; |
28 | 29 | ||
29 | public function __construct(Graby $graby, RuleBasedTagger $tagger, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) | 30 | public function __construct(Graby $graby, RuleBasedTagger $tagger, RuleBasedIgnoreOriginProcessor $ignoreOriginProcessor, ValidatorInterface $validator, LoggerInterface $logger, $fetchingErrorMessage, $storeArticleHeaders = false) |
30 | { | 31 | { |
31 | $this->graby = $graby; | 32 | $this->graby = $graby; |
32 | $this->tagger = $tagger; | 33 | $this->tagger = $tagger; |
34 | $this->ignoreOriginProcessor = $ignoreOriginProcessor; | ||
33 | $this->validator = $validator; | 35 | $this->validator = $validator; |
34 | $this->logger = $logger; | 36 | $this->logger = $logger; |
35 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); | 37 | $this->mimeGuesser = new MimeTypeExtensionGuesser(); |
@@ -356,7 +358,7 @@ class ContentProxy | |||
356 | $diff_keys = array_keys($diff); | 358 | $diff_keys = array_keys($diff); |
357 | sort($diff_keys); | 359 | sort($diff_keys); |
358 | 360 | ||
359 | if ($this->ignoreUrl($entry->getUrl())) { | 361 | if ($this->ignoreOriginProcessor->process($entry)) { |
360 | $entry->setUrl($url); | 362 | $entry->setUrl($url); |
361 | 363 | ||
362 | return false; | 364 | return false; |
@@ -396,41 +398,6 @@ class ContentProxy | |||
396 | } | 398 | } |
397 | 399 | ||
398 | /** | 400 | /** |
399 | * Check entry url against an ignore list to replace with content url. | ||
400 | * | ||
401 | * XXX: move the ignore list in the database to let users handle it | ||
402 | * | ||
403 | * @param string $url url to test | ||
404 | * | ||
405 | * @return bool true if url matches ignore list otherwise false | ||
406 | */ | ||
407 | private function ignoreUrl($url) | ||
408 | { | ||
409 | $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com']; | ||
410 | $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*']; | ||
411 | |||
412 | $parsed_url = parse_url($url); | ||
413 | |||
414 | $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) { | ||
415 | return $var === $parsed_url['host']; | ||
416 | }); | ||
417 | |||
418 | if ([] !== $filtered) { | ||
419 | return true; | ||
420 | } | ||
421 | |||
422 | $filtered = array_filter($ignored_patterns, function ($var) use ($url) { | ||
423 | return preg_match("`$var`i", $url); | ||
424 | }); | ||
425 | |||
426 | if ([] !== $filtered) { | ||
427 | return true; | ||
428 | } | ||
429 | |||
430 | return false; | ||
431 | } | ||
432 | |||
433 | /** | ||
434 | * Validate that the given content has at least a title, an html and a url. | 401 | * Validate that the given content has at least a title, an html and a url. |
435 | * | 402 | * |
436 | * @return bool true if valid otherwise false | 403 | * @return bool true if valid otherwise false |