From c01d9532920ec5a298bb347dbb83a078d36d4841 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Wed, 19 Sep 2018 13:59:07 +0200 Subject: Add tests for logic Try to translate the title of a PDF from UTF-8 (then UTF-16BE, then WINDOWS-1252) to UTF-8 --- .../CoreBundle/Helper/ContentProxyTest.php | 236 +++++++++++++++++++++ 1 file changed, 236 insertions(+) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 51df8de1..9d8098ef 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -531,6 +531,242 @@ class ContentProxyTest extends TestCase $this->assertSame('1.1.1.1', $entry->getDomainName()); } + public function testWebsiteWithValidUTF8Title_doNothing() + { + // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex + // See http://graphemica.com for more info about the characters + // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 + $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A'); + + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => false, + 'title' => $actualTitle, + 'url' => '', + 'content_type' => 'text/html', + 'language' => '', + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 + $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; + $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); + } + + public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter() + { + // See http://graphemica.com for more info about the characters + // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character. + // The correct UTF-8 € character (U+20AC) is E282AC + $actualTitle = $this->hexToStr('61' . '80' . '62'); + + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => false, + 'title' => $actualTitle, + 'url' => '', + 'content_type' => 'text/html', + 'language' => '', + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed + $expectedTitle = '61' . '62'; + $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); + } + + public function testPdfWithUTF16BETitle_convertToUTF8() + { + // See http://graphemica.com for more info about the characters + // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE + $actualTitle = $this->hexToStr('D83DDE3B'); + + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => false, + 'title' => $actualTitle, + 'url' => '', + 'content_type' => 'application/pdf', + 'language' => '', + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 + $expectedTitle = 'F09F98BB'; + $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); + } + + public function testPdfWithUTF8Title_doNothing() + { + // See http://graphemica.com for more info about the characters + // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8 + $actualTitle = $this->hexToStr('F09F98BB'); + + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => false, + 'title' => $actualTitle, + 'url' => '', + 'content_type' => 'application/pdf', + 'language' => '', + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 + $expectedTitle = 'F09F98BB'; + $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); + } + + public function testPdfWithWINDOWS1252Title_convertToUTF8() + { + // See http://graphemica.com for more info about the characters + // '€' (80) in hexadecimal and WINDOWS-1252 + $actualTitle = $this->hexToStr('80'); + + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => false, + 'title' => $actualTitle, + 'url' => '', + 'content_type' => 'application/pdf', + 'language' => '', + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + // '€' (U+20AC or E282AC) in hexadecimal and UTF-8 + $expectedTitle = 'E282AC'; + $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); + } + + public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter() + { + // See http://graphemica.com for more info about the characters + // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8 + // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252 + $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A'); + + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $graby = $this->getMockBuilder('Graby\Graby') + ->setMethods(['fetchContent']) + ->disableOriginalConstructor() + ->getMock(); + + $graby->expects($this->any()) + ->method('fetchContent') + ->willReturn([ + 'html' => false, + 'title' => $actualTitle, + 'url' => '', + 'content_type' => 'application/pdf', + 'language' => '', + ]); + + $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); + $entry = new Entry(new User()); + $proxy->updateEntry($entry, 'http://0.0.0.0'); + + // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 + // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed + $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; + $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); + } + + /** + * https://stackoverflow.com/a/18506801 + * @param $string + * @return string + */ + function strToHex($string){ + $hex = ''; + for ($i=0; $igetMockBuilder(RuleBasedTagger::class) -- cgit v1.2.3 From d64139d8123ac88c8ba1b427c3ee3637b6ea1c96 Mon Sep 17 00:00:00 2001 From: Tobi823 Date: Fri, 21 Sep 2018 13:31:28 +0200 Subject: Make helper methods strToHex and hexToStr in ContentProxyTest.php private to prevent misusage (from outside this class) --- tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 9d8098ef..5f10f482 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -744,7 +744,7 @@ class ContentProxyTest extends TestCase * @param $string * @return string */ - function strToHex($string){ + private function strToHex($string){ $hex = ''; for ($i=0; $i Date: Sun, 23 Sep 2018 23:42:05 +0200 Subject: Run php-cs-fixer for fixing coding standard issues (on ContentProxyTest) --- .../CoreBundle/Helper/ContentProxyTest.php | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 5f10f482..3f3c60d0 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -740,30 +740,38 @@ class ContentProxyTest extends TestCase } /** - * https://stackoverflow.com/a/18506801 + * https://stackoverflow.com/a/18506801. + * * @param $string + * * @return string */ - private function strToHex($string){ + private function strToHex($string) + { $hex = ''; - for ($i=0; $i Date: Sun, 7 Jan 2018 17:28:04 +0100 Subject: ContentProxy: swap entry url to origin_url and set new url according to graby content Closes #3529 Signed-off-by: Kevin Decherf --- .../CoreBundle/Helper/ContentProxyTest.php | 26 ++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 3f3c60d0..84b38f02 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -775,6 +775,32 @@ class ContentProxyTest extends TestCase return $string; } + public function testWithChangedUrl() + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); + $entry = new Entry(new User()); + $proxy->updateEntry( + $entry, + 'http://0.0.0.0', + [ + 'html' => false, + 'title' => '', + 'url' => 'http://1.1.1.1', + 'content_type' => '', + 'language' => '', + ], + true + ); + + $this->assertSame('http://1.1.1.1', $entry->getUrl()); + $this->assertSame('1.1.1.1', $entry->getDomainName()); + $this->assertSame('http://0.0.0.0', $entry->getOriginUrl()); + } + private function getTaggerMock() { return $this->getMockBuilder(RuleBasedTagger::class) -- cgit v1.2.3 From e07fadea76aa7329c4b955a59e74cb867c733706 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Thu, 6 Sep 2018 22:26:20 +0200 Subject: Refactor updateOriginUrl to include new behaviors behaviors - Leave origin_url unchanged if difference is an ending slash - Leave origin_url unchanged if difference is scheme - Ignore (noop) if difference is query string or fragment Signed-off-by: Kevin Decherf --- .../CoreBundle/Helper/ContentProxyTest.php | 121 ++++++++++++++++----- 1 file changed, 95 insertions(+), 26 deletions(-) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 84b38f02..c20732cc 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -739,6 +739,101 @@ class ContentProxyTest extends TestCase $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); } + /** + * Data provider for testWithChangedUrl. + * + * Arrays contain the following values: + * $entry_url + * $origin_url + * $content_url + * $expected_entry_url + * $expected_origin_url + * $expected_domain + */ + public function dataForChangedUrl() + { + return [ + 'normal' => [ + 'http://0.0.0.0', + null, + 'http://1.1.1.1', + 'http://1.1.1.1', + 'http://0.0.0.0', + '1.1.1.1', + ], + 'origin already set' => [ + 'http://0.0.0.0', + 'http://hello', + 'http://1.1.1.1', + 'http://1.1.1.1', + 'http://hello', + '1.1.1.1', + ], + 'trailing slash' => [ + 'https://example.com/hello-world', + null, + 'https://example.com/hello-world/', + 'https://example.com/hello-world/', + null, + 'example.com', + ], + 'no query string in fetched content' => [ + 'https://example.org/hello?world=1', + null, + 'https://example.org/hello', + 'https://example.org/hello?world=1', + null, + 'example.org', + ], + 'query string in fetched content' => [ + 'https://example.org/hello', + null, + 'https://example.org/hello?world=1', + 'https://example.org/hello', + null, + 'example.org', + ], + 'fragment in fetched content' => [ + 'https://example.org/hello', + null, + 'https://example.org/hello#world', + 'https://example.org/hello', + null, + 'example.org', + ], + ]; + } + + /** + * @dataProvider dataForChangedUrl + */ + public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain) + { + $tagger = $this->getTaggerMock(); + $tagger->expects($this->once()) + ->method('tag'); + + $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); + $entry = new Entry(new User()); + $entry->setOriginUrl($origin_url); + $proxy->updateEntry( + $entry, + $entry_url, + [ + 'html' => false, + 'title' => '', + 'url' => $content_url, + 'content_type' => '', + 'language' => '', + ], + true + ); + + $this->assertSame($expected_entry_url, $entry->getUrl()); + $this->assertSame($expected_domain, $entry->getDomainName()); + $this->assertSame($expected_origin_url, $entry->getOriginUrl()); + } + /** * https://stackoverflow.com/a/18506801. * @@ -775,32 +870,6 @@ class ContentProxyTest extends TestCase return $string; } - public function testWithChangedUrl() - { - $tagger = $this->getTaggerMock(); - $tagger->expects($this->once()) - ->method('tag'); - - $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); - $entry = new Entry(new User()); - $proxy->updateEntry( - $entry, - 'http://0.0.0.0', - [ - 'html' => false, - 'title' => '', - 'url' => 'http://1.1.1.1', - 'content_type' => '', - 'language' => '', - ], - true - ); - - $this->assertSame('http://1.1.1.1', $entry->getUrl()); - $this->assertSame('1.1.1.1', $entry->getDomainName()); - $this->assertSame('http://0.0.0.0', $entry->getOriginUrl()); - } - private function getTaggerMock() { return $this->getMockBuilder(RuleBasedTagger::class) -- cgit v1.2.3 From fc040c749dec0275e562182562c1c1cb89e6cfa1 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Mon, 22 Oct 2018 23:08:58 +0200 Subject: updateOriginUrl: add behavior when diff is fragment and query Signed-off-by: Kevin Decherf --- tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index c20732cc..3debc457 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -801,6 +801,14 @@ class ContentProxyTest extends TestCase null, 'example.org', ], + 'fragment and query string in fetched content' => [ + 'https://example.org/hello', + null, + 'https://example.org/hello?foo#world', + 'https://example.org/hello', + null, + 'example.org', + ] ]; } -- cgit v1.2.3 From b49c87acf12f22e38db751fb35be5da2436abc45 Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Mon, 22 Oct 2018 23:39:31 +0200 Subject: ignoreOriginUrl: add initial support of ignore lists Add the ability to specify hosts and patterns lists to ignore the given entry url and replace it with the fetched content url without touching to origin_url. This initial support should be reworked in the following months to move the hardcoded ignore lists in the database. Signed-off-by: Kevin Decherf --- .../CoreBundle/Helper/ContentProxyTest.php | 34 +++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 3debc457..a60aec5b 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -808,7 +808,39 @@ class ContentProxyTest extends TestCase 'https://example.org/hello', null, 'example.org', - ] + ], + 'different path and query string in fetch content' => [ + 'https://example.org/hello', + null, + 'https://example.org/world?foo', + 'https://example.org/world?foo', + 'https://example.org/hello', + 'example.org', + ], + 'feedproxy ignore list test' => [ + 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', + null, + 'https://example.org/hello-wallabag', + 'https://example.org/hello-wallabag', + null, + 'example.org', + ], + 'feedproxy ignore list test with origin url already set' => [ + 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', + 'https://example.org/this-is-source', + 'https://example.org/hello-wallabag', + 'https://example.org/hello-wallabag', + 'https://example.org/this-is-source', + 'example.org', + ], + 'lemonde ignore pattern test' => [ + 'http://www.lemonde.fr/tiny/url', + null, + 'http://example.com/hello-world', + 'http://example.com/hello-world', + null, + 'example.com', + ], ]; } -- cgit v1.2.3 From 60599679519e819301ce36185c3dd5ca7aa7f4ec Mon Sep 17 00:00:00 2001 From: Kevin Decherf Date: Wed, 24 Oct 2018 22:27:27 +0200 Subject: updateOriginUrl: remove 'query string' case from ignore list Two urls with a different query string may refer to two different pages so keep them both. Signed-off-by: Kevin Decherf --- tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index a60aec5b..3dd9273c 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php @@ -777,20 +777,12 @@ class ContentProxyTest extends TestCase null, 'example.com', ], - 'no query string in fetched content' => [ - 'https://example.org/hello?world=1', - null, - 'https://example.org/hello', - 'https://example.org/hello?world=1', - null, - 'example.org', - ], 'query string in fetched content' => [ 'https://example.org/hello', null, 'https://example.org/hello?world=1', + 'https://example.org/hello?world=1', 'https://example.org/hello', - null, 'example.org', ], 'fragment in fetched content' => [ @@ -805,8 +797,8 @@ class ContentProxyTest extends TestCase 'https://example.org/hello', null, 'https://example.org/hello?foo#world', + 'https://example.org/hello?foo#world', 'https://example.org/hello', - null, 'example.org', ], 'different path and query string in fetch content' => [ -- cgit v1.2.3 From dc90eab32bd1912a1541ba238dcd2c34e6171e94 Mon Sep 17 00:00:00 2001 From: Jeremy Benoist Date: Wed, 21 Nov 2018 21:11:55 +0100 Subject: Prepare 2.3.4 release --- tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'tests/Wallabag/CoreBundle') diff --git a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php index bf0068b4..479e0700 100644 --- a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php +++ b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php @@ -166,9 +166,8 @@ class EntryControllerTest extends WallabagCoreTestCase $this->assertSame($this->url, $content->getUrl()); $this->assertContains('Google', $content->getTitle()); $this->assertSame('fr', $content->getLanguage()); - $this->assertSame('2015-03-28 11:43:19', $content->getPublishedAt()->format('Y-m-d H:i:s')); - $this->assertSame('Morgane Tual', $author[0]); - $this->assertArrayHasKey('x-varnish1', $content->getHeaders()); + $this->assertSame('2016-04-07 19:01:35', $content->getPublishedAt()->format('Y-m-d H:i:s')); + $this->assertArrayHasKey('x-frame-options', $content->getHeaders()); $client->getContainer()->get('craue_config')->set('store_article_headers', 0); } -- cgit v1.2.3