From 781864b9546b0ff2d6fe42ce72f78b8f40b785e9 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Sun, 7 Jan 2018 17:28:04 +0100 Subject: ContentProxy: swap entry url to origin_url and set new url according to graby content Closes #3529 Signed-off-by: Kevin Decherf --- .../CoreBundle/Helper/ContentProxyTest.php | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tests/Wallabag') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 3f3c60d0..84b38f02 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -775,6 +775,32 @@ class ContentProxyTest extends TestCase return $string; } + public function testWithChangedUrl() + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); + $entry = new Entry(new User()); + $proxy->updateEntry( + $entry, + 'http://0.0.0.0', + [ + 'html' => false, + 'title' => '', + 'url' => 'http://1.1.1.1', + 'content_type' => '', + 'language' => '', + ], + true + ); + + $this->assertSame('http://1.1.1.1', $entry->getUrl()); + $this->assertSame('1.1.1.1', $entry->getDomainName()); + $this->assertSame('http://0.0.0.0', $entry->getOriginUrl()); + } + private function getTaggerMock() { return $this->getMockBuilder(RuleBasedTagger::class) -- cgit v1.2.3 From e07fadea76aa7329c4b955a59e74cb867c733706 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Thu, 6 Sep 2018 22:26:20 +0200 Subject: Refactor updateOriginUrl to include new behaviors behaviors - Leave origin_url unchanged if difference is an ending slash - Leave origin_url unchanged if difference is scheme - Ignore (noop) if difference is query string or fragment Signed-off-by: Kevin Decherf --- .../CoreBundle/Helper/ContentProxyTest.php | 121 ++++++++++++++++----- 1 file changed, 95 insertions(+), 26 deletions(-) (limited to 'tests/Wallabag') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 84b38f02..c20732cc 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -739,6 +739,101 @@ class ContentProxyTest extends TestCase $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); } + /** + * Data provider for testWithChangedUrl. + * + * Arrays contain the following values: + * $entry_url + * $origin_url + * $content_url + * $expected_entry_url + * $expected_origin_url + * $expected_domain + */ + public function dataForChangedUrl() + { + return [ + 'normal' => [ + 'http://0.0.0.0', + null, + 'http://1.1.1.1', + 'http://1.1.1.1', + 'http://0.0.0.0', + '1.1.1.1', + ], + 'origin already set' => [ + 'http://0.0.0.0', + 'http://hello', + 'http://1.1.1.1', + 'http://1.1.1.1', + 'http://hello', + '1.1.1.1', + ], + 'trailing slash' => [ + 'https://example.com/hello-world', + null, + 'https://example.com/hello-world/', + 'https://example.com/hello-world/', + null, + 'example.com', + ], + 'no query string in fetched content' => [ + 'https://example.org/hello?world=1', + null, + 'https://example.org/hello', + 'https://example.org/hello?world=1', + null, + 'example.org', + ], + 'query string in fetched content' => [ + 'https://example.org/hello', + null, + 'https://example.org/hello?world=1', + 'https://example.org/hello', + null, + 'example.org', + ], + 'fragment in fetched content' => [ + 'https://example.org/hello', + null, + 'https://example.org/hello#world', + 'https://example.org/hello', + null, + 'example.org', + ], + ]; + } + + /** + * @dataProvider dataForChangedUrl + */ + public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain) + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); + $entry = new Entry(new User()); + $entry->setOriginUrl($origin_url); + $proxy->updateEntry( + $entry, + $entry_url, + [ + 'html' => false, + 'title' => '', + 'url' => $content_url, + 'content_type' => '', + 'language' => '', + ], + true + ); + + $this->assertSame($expected_entry_url, $entry->getUrl()); + $this->assertSame($expected_domain, $entry->getDomainName()); + $this->assertSame($expected_origin_url, $entry->getOriginUrl()); + } + /** * https://stackoverflow.com/a/18506801. * @@ -775,32 +870,6 @@ class ContentProxyTest extends TestCase return $string; } - public function testWithChangedUrl() - { - $tagger = $this->getTaggerMock(); - $tagger->expects($this->once()) - ->method('tag'); - - $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); - $entry = new Entry(new User()); - $proxy->updateEntry( - $entry, - 'http://0.0.0.0', - [ - 'html' => false, - 'title' => '', - 'url' => 'http://1.1.1.1', - 'content_type' => '', - 'language' => '', - ], - true - ); - - $this->assertSame('http://1.1.1.1', $entry->getUrl()); - $this->assertSame('1.1.1.1', $entry->getDomainName()); - $this->assertSame('http://0.0.0.0', $entry->getOriginUrl()); - } - private function getTaggerMock() { return $this->getMockBuilder(RuleBasedTagger::class) -- cgit v1.2.3 From fc040c749dec0275e562182562c1c1cb89e6cfa1 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Mon, 22 Oct 2018 23:08:58 +0200 Subject: updateOriginUrl: add behavior when diff is fragment and query Signed-off-by: Kevin Decherf --- tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests/Wallabag') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index c20732cc..3debc457 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -801,6 +801,14 @@ class ContentProxyTest extends TestCase null, 'example.org', ], + 'fragment and query string in fetched content' => [ + 'https://example.org/hello', + null, + 'https://example.org/hello?foo#world', + 'https://example.org/hello', + null, + 'example.org', + ] ]; } -- cgit v1.2.3 From b49c87acf12f22e38db751fb35be5da2436abc45 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Mon, 22 Oct 2018 23:39:31 +0200 Subject: ignoreOriginUrl: add initial support of ignore lists Add the ability to specify hosts and patterns lists to ignore the given entry url and replace it with the fetched content url without touching to origin_url. This initial support should be reworked in the following months to move the hardcoded ignore lists in the database. Signed-off-by: Kevin Decherf --- .../CoreBundle/Helper/ContentProxyTest.php | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'tests/Wallabag') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 3debc457..a60aec5b 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -808,7 +808,39 @@ class ContentProxyTest extends TestCase 'https://example.org/hello', null, 'example.org', - ] + ], + 'different path and query string in fetch content' => [ + 'https://example.org/hello', + null, + 'https://example.org/world?foo', + 'https://example.org/world?foo', + 'https://example.org/hello', + 'example.org', + ], + 'feedproxy ignore list test' => [ + 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', + null, + 'https://example.org/hello-wallabag', + 'https://example.org/hello-wallabag', + null, + 'example.org', + ], + 'feedproxy ignore list test with origin url already set' => [ + 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', + 'https://example.org/this-is-source', + 'https://example.org/hello-wallabag', + 'https://example.org/hello-wallabag', + 'https://example.org/this-is-source', + 'example.org', + ], + 'lemonde ignore pattern test' => [ + 'http://www.lemonde.fr/tiny/url', + null, + 'http://example.com/hello-world', + 'http://example.com/hello-world', + null, + 'example.com', + ], ]; } -- cgit v1.2.3 From 60599679519e819301ce36185c3dd5ca7aa7f4ec Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Wed, 24 Oct 2018 22:27:27 +0200 Subject: updateOriginUrl: remove 'query string' case from ignore list Two urls with a different query string may refer to two different pages so keep them both. Signed-off-by: Kevin Decherf --- tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'tests/Wallabag') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index a60aec5b..3dd9273c 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -777,20 +777,12 @@ class ContentProxyTest extends TestCase null, 'example.com', ], - 'no query string in fetched content' => [ - 'https://example.org/hello?world=1', - null, - 'https://example.org/hello', - 'https://example.org/hello?world=1', - null, - 'example.org', - ], 'query string in fetched content' => [ 'https://example.org/hello', null, 'https://example.org/hello?world=1', + 'https://example.org/hello?world=1', 'https://example.org/hello', - null, 'example.org', ], 'fragment in fetched content' => [ @@ -805,8 +797,8 @@ class ContentProxyTest extends TestCase 'https://example.org/hello', null, 'https://example.org/hello?foo#world', + 'https://example.org/hello?foo#world', 'https://example.org/hello', - null, 'example.org', ], 'different path and query string in fetch content' => [ -- cgit v1.2.3