aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--src/Wallabag/CoreBundle/Helper/ContentProxy.php79
-rw-r--r--tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php34
2 files changed, 92 insertions, 21 deletions
diff --git a/src/Wallabag/CoreBundle/Helper/ContentProxy.php b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
index 1a2a330f..2dc436f8 100644
--- a/src/Wallabag/CoreBundle/Helper/ContentProxy.php
+++ b/src/Wallabag/CoreBundle/Helper/ContentProxy.php
@@ -332,32 +332,71 @@ class ContentProxy
332 $diff_keys = array_keys($diff); 332 $diff_keys = array_keys($diff);
333 sort($diff_keys); 333 sort($diff_keys);
334 334
335 switch ($diff_keys) { 335 if ($this->ignoreUrl($entry->getUrl())) {
336 case ['path']: 336 $entry->setUrl($url);
337 if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry 337 } else {
338 || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId 338 switch ($diff_keys) {
339 case ['path']:
340 if (($parsed_entry_url['path'] . '/' === $parsed_content_url['path']) // diff is trailing slash, we only replace the url of the entry
341 || ($url === urldecode($entry->getUrl()))) { // we update entry url if new url is a decoded version of it, see EntryRepository#findByUrlAndUserId
342 $entry->setUrl($url);
343 }
344 break;
345 case ['scheme']:
339 $entry->setUrl($url); 346 $entry->setUrl($url);
340 } 347 break;
341 break; 348 case ['fragment']:
342 case ['scheme']: 349 case ['query']:
343 $entry->setUrl($url); 350 case ['fragment', 'query']:
344 break; 351 // noop
345 case ['fragment']: 352 break;
346 case ['query']: 353 default:
347 case ['fragment', 'query']: 354 if (empty($entry->getOriginUrl())) {
348 // noop 355 $entry->setOriginUrl($entry->getUrl());
349 break; 356 }
350 default: 357 $entry->setUrl($url);
351 if (empty($entry->getOriginUrl())) { 358 break;
352 $entry->setOriginUrl($entry->getUrl()); 359 }
353 }
354 $entry->setUrl($url);
355 break;
356 } 360 }
357 } 361 }
358 } 362 }
359 363
360 /** 364 /**
365 * Check entry url against an ignore list to replace with content url.
366 *
367 * XXX: move the ignore list in the database to let users handle it
368 *
369 * @param string $url url to test
370 *
371 * @return bool true if url matches ignore list otherwise false
372 */
373 private function ignoreUrl($url)
374 {
375 $ignored_hosts = ['feedproxy.google.com', 'feeds.reuters.com'];
376 $ignored_patterns = ['https?://www\.lemonde\.fr/tiny.*'];
377
378 $parsed_url = parse_url($url);
379
380 $filtered = array_filter($ignored_hosts, function ($var) use ($parsed_url) {
381 return $var === $parsed_url['host'];
382 });
383
384 if ([] !== $filtered) {
385 return true;
386 }
387
388 $filtered = array_filter($ignored_patterns, function ($var) use ($url) {
389 return preg_match("`$var`i", $url);
390 });
391
392 if ([] !== $filtered) {
393 return true;
394 }
395
396 return false;
397 }
398
399 /**
361 * Validate that the given content has at least a title, an html and a url. 400 * Validate that the given content has at least a title, an html and a url.
362 * 401 *
363 * @param array $content 402 * @param array $content
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 3debc457..a60aec5b 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -808,7 +808,39 @@ class ContentProxyTest extends TestCase
808 'https://example.org/hello', 808 'https://example.org/hello',
809 null, 809 null,
810 'example.org', 810 'example.org',
811 ] 811 ],
812 'different path and query string in fetch content' => [
813 'https://example.org/hello',
814 null,
815 'https://example.org/world?foo',
816 'https://example.org/world?foo',
817 'https://example.org/hello',
818 'example.org',
819 ],
820 'feedproxy ignore list test' => [
821 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
822 null,
823 'https://example.org/hello-wallabag',
824 'https://example.org/hello-wallabag',
825 null,
826 'example.org',
827 ],
828 'feedproxy ignore list test with origin url already set' => [
829 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
830 'https://example.org/this-is-source',
831 'https://example.org/hello-wallabag',
832 'https://example.org/hello-wallabag',
833 'https://example.org/this-is-source',
834 'example.org',
835 ],
836 'lemonde ignore pattern test' => [
837 'http://www.lemonde.fr/tiny/url',
838 null,
839 'http://example.com/hello-world',
840 'http://example.com/hello-world',
841 null,
842 'example.com',
843 ],
812 ]; 844 ];
813 } 845 }
814 846