diff options
-rw-r--r-- | src/Wallabag/CoreBundle/Helper/ContentProxy.php | 79 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 34 |
2 files changed, 92 insertions, 21 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php index 1a2a330f..2dc436f8 100644 --- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php +++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php | |||
@@ -332,32 +332,71 @@ class ContentProxy | |||
332 | $diff_keys = array_keys($diff); | 332 | $diff_keys = array_keys($diff); |
333 | sort($diff_keys); | 333 | sort($diff_keys); |
334 | 334 | ||
335 | switch ($diff_keys) { | 335 | if ($this->ignoreUrl($entry->getUrl())) { |
336 | case ['path']: | 336 | $entry->setUrl($url); |
337 | if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry | 337 | } else { |
338 | || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId | 338 | switch ($diff_keys) { |
339 | case ['path']: | ||
340 | if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry | ||
341 | || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId | ||
342 | $entry->setUrl($url); | ||
343 | } | ||
344 | break; | ||
345 | case ['scheme']: | ||
339 | $entry->setUrl($url); | 346 | $entry->setUrl($url); |
340 | } | 347 | break; |
341 | break; | 348 | case ['fragment']: |
342 | case ['scheme']: | 349 | case ['query']: |
343 | $entry->setUrl($url); | 350 | case ['fragment', 'query']: |
344 | break; | 351 | // noop |
345 | case ['fragment']: | 352 | break; |
346 | case ['query']: | 353 | default: |
347 | case ['fragment', 'query']: | 354 | if (empty($entry->getOriginUrl())) { |
348 | // noop | 355 | $entry->setOriginUrl($entry->getUrl()); |
349 | break; | 356 | } |
350 | default: | 357 | $entry->setUrl($url); |
351 | if (empty($entry->getOriginUrl())) { | 358 | break; |
352 | $entry->setOriginUrl($entry->getUrl()); | 359 | } |
353 | } | ||
354 | $entry->setUrl($url); | ||
355 | break; | ||
356 | } | 360 | } |
357 | } | 361 | } |
358 | } | 362 | } |
359 | 363 | ||
360 | /** | 364 | /** |
365 | * Check entry url against an ignore list to replace with content url. | ||
366 | * | ||
367 | * XXX: move the ignore list in the database to let users handle it | ||
368 | * | ||
369 | * @param string $url url to test | ||
370 | * | ||
371 | * @return bool true if url matches ignore list otherwise false | ||
372 | */ | ||
373 | private function ignoreUrl($url) | ||
374 | { | ||
375 | $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com']; | ||
376 | $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*']; | ||
377 | |||
378 | $parsed_url = parse_url($url); | ||
379 | |||
380 | $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) { | ||
381 | return $var === $parsed_url['host']; | ||
382 | }); | ||
383 | |||
384 | if ([] !== $filtered) { | ||
385 | return true; | ||
386 | } | ||
387 | |||
388 | $filtered = array_filter($ignored_patterns, function ($var) use ($url) { | ||
389 | return preg_match("`$var`i", $url); | ||
390 | }); | ||
391 | |||
392 | if ([] !== $filtered) { | ||
393 | return true; | ||
394 | } | ||
395 | |||
396 | return false; | ||
397 | } | ||
398 | |||
399 | /** | ||
361 | * Validate that the given content has at least a title, an html and a url. | 400 | * Validate that the given content has at least a title, an html and a url. |
362 | * | 401 | * |
363 | * @param array $content | 402 | * @param array $content |
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 3debc457..a60aec5b 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | |||
@@ -808,7 +808,39 @@ class ContentProxyTest extends TestCase | |||
808 | 'https://example.org/hello', | 808 | 'https://example.org/hello', |
809 | null, | 809 | null, |
810 | 'example.org', | 810 | 'example.org', |
811 | ] | 811 | ], |
812 | 'different path and query string in fetch content' => [ | ||
813 | 'https://example.org/hello', | ||
814 | null, | ||
815 | 'https://example.org/world?foo', | ||
816 | 'https://example.org/world?foo', | ||
817 | 'https://example.org/hello', | ||
818 | 'example.org', | ||
819 | ], | ||
820 | 'feedproxy ignore list test' => [ | ||
821 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | ||
822 | null, | ||
823 | 'https://example.org/hello-wallabag', | ||
824 | 'https://example.org/hello-wallabag', | ||
825 | null, | ||
826 | 'example.org', | ||
827 | ], | ||
828 | 'feedproxy ignore list test with origin url already set' => [ | ||
829 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | ||
830 | 'https://example.org/this-is-source', | ||
831 | 'https://example.org/hello-wallabag', | ||
832 | 'https://example.org/hello-wallabag', | ||
833 | 'https://example.org/this-is-source', | ||
834 | 'example.org', | ||
835 | ], | ||
836 | 'lemonde ignore pattern test' => [ | ||
837 | 'http://www.lemonde.fr/tiny/url', | ||
838 | null, | ||
839 | 'http://example.com/hello-world', | ||
840 | 'http://example.com/hello-world', | ||
841 | null, | ||
842 | 'example.com', | ||
843 | ], | ||
812 | ]; | 844 | ]; |
813 | } | 845 | } |
814 | 846 | ||