]> git.immae.eu Git - github/wallabag/wallabag.git/blobdiff - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Run php-cs-fixer for fixing coding standard issues (on ContentProxyTest)
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
index 33b3ff2a53d4e6d58f64a242caf9831a12fe40cc..3f3c60d0fd943436f88759853b9ee5c950b554e2 100644 (file)
@@ -2,14 +2,23 @@
 
 namespace Tests\Wallabag\CoreBundle\Helper;
 
+use Graby\Graby;
+use Monolog\Handler\TestHandler;
+use Monolog\Logger;
+use PHPUnit\Framework\TestCase;
 use Psr\Log\NullLogger;
-use Wallabag\CoreBundle\Helper\ContentProxy;
+use Symfony\Component\Validator\ConstraintViolation;
+use Symfony\Component\Validator\ConstraintViolationList;
+use Symfony\Component\Validator\Validator\RecursiveValidator;
 use Wallabag\CoreBundle\Entity\Entry;
-use Wallabag\CoreBundle\Entity\Tag;
+use Wallabag\CoreBundle\Helper\ContentProxy;
+use Wallabag\CoreBundle\Helper\RuleBasedTagger;
 use Wallabag\UserBundle\Entity\User;
 
-class ContentProxyTest extends \PHPUnit_Framework_TestCase
+class ContentProxyTest extends TestCase
 {
+    private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
+
     public function testWithBadUrl()
     {
         $tagger = $this->getTaggerMock();
@@ -31,17 +40,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 'language' => '',
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80');
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://user@:80');
 
-        $this->assertEquals('http://user@:80', $entry->getUrl());
+        $this->assertSame('http://user@:80', $entry->getUrl());
         $this->assertEmpty($entry->getTitle());
-        $this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent());
+        $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
         $this->assertEmpty($entry->getPreviewPicture());
         $this->assertEmpty($entry->getMimetype());
         $this->assertEmpty($entry->getLanguage());
-        $this->assertEquals(0.0, $entry->getReadingTime());
-        $this->assertEquals(false, $entry->getDomainName());
+        $this->assertSame(0.0, $entry->getReadingTime());
+        $this->assertNull($entry->getDomainName());
     }
 
     public function testWithEmptyContent()
@@ -65,17 +75,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 'language' => '',
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $this->assertEquals('http://0.0.0.0', $entry->getUrl());
+        $this->assertSame('http://0.0.0.0', $entry->getUrl());
         $this->assertEmpty($entry->getTitle());
-        $this->assertEquals('<p>Unable to retrieve readable content.</p>', $entry->getContent());
+        $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
         $this->assertEmpty($entry->getPreviewPicture());
         $this->assertEmpty($entry->getMimetype());
         $this->assertEmpty($entry->getLanguage());
-        $this->assertEquals(0.0, $entry->getReadingTime());
-        $this->assertEquals('0.0.0.0', $entry->getDomainName());
+        $this->assertSame(0.0, $entry->getReadingTime());
+        $this->assertSame('0.0.0.0', $entry->getDomainName());
     }
 
     public function testWithEmptyContentButOG()
@@ -104,18 +115,19 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 ],
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io');
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://domain.io');
 
-        $this->assertEquals('http://domain.io', $entry->getUrl());
-        $this->assertEquals('my title', $entry->getTitle());
-        $this->assertEquals('<p>Unable to retrieve readable content.</p><p><i>But we found a short description: </i></p>desc', $entry->getContent());
+        $this->assertSame('http://domain.io', $entry->getUrl());
+        $this->assertSame('my title', $entry->getTitle());
+        $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
         $this->assertEmpty($entry->getPreviewPicture());
         $this->assertEmpty($entry->getLanguage());
         $this->assertEmpty($entry->getHttpStatus());
         $this->assertEmpty($entry->getMimetype());
-        $this->assertEquals(0.0, $entry->getReadingTime());
-        $this->assertEquals('domain.io', $entry->getDomainName());
+        $this->assertSame(0.0, $entry->getReadingTime());
+        $this->assertSame('domain.io', $entry->getDomainName());
     }
 
     public function testWithContent()
@@ -145,201 +157,650 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
                 ],
             ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $this->assertEquals('http://1.1.1.1', $entry->getUrl());
-        $this->assertEquals('this is my title', $entry->getTitle());
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
         $this->assertContains('this is my content', $entry->getContent());
-        $this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
-        $this->assertEquals('text/html', $entry->getMimetype());
-        $this->assertEquals('fr', $entry->getLanguage());
-        $this->assertEquals('200', $entry->getHttpStatus());
-        $this->assertEquals(4.0, $entry->getReadingTime());
-        $this->assertEquals('1.1.1.1', $entry->getDomainName());
+        $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame('200', $entry->getHttpStatus());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
     }
 
-    public function testWithForcedContent()
+    public function testWithContentAndNoOgImage()
     {
         $tagger = $this->getTaggerMock();
         $tagger->expects($this->once())
             ->method('tag');
 
-        $graby = $this->getMockBuilder('Graby\Graby')->getMock();
+        $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'status' => '200',
+                'open_graph' => [
+                    'og_title' => 'my OG title',
+                    'og_description' => 'OG desc',
+                    'og_image' => null,
+                ],
+            ]);
 
-        $proxy = new ContentProxy($graby, $tagger, $this->getTagRepositoryMock(), $this->getLogger());
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
-            'html' => str_repeat('this is my content', 325),
-            'title' => 'this is my title',
-            'url' => 'http://1.1.1.1',
-            'content_type' => 'text/html',
-            'language' => 'fr',
-        ]);
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $this->assertEquals('http://1.1.1.1', $entry->getUrl());
-        $this->assertEquals('this is my title', $entry->getTitle());
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
         $this->assertContains('this is my content', $entry->getContent());
-        $this->assertEquals('text/html', $entry->getMimetype());
-        $this->assertEquals('fr', $entry->getLanguage());
-        $this->assertEquals(4.0, $entry->getReadingTime());
-        $this->assertEquals('1.1.1.1', $entry->getDomainName());
+        $this->assertNull($entry->getPreviewPicture());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame('200', $entry->getHttpStatus());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
     }
 
-    public function testTaggerThrowException()
+    public function testWithContentAndBadLanguage()
     {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $validator = $this->getValidator(false);
+        $validator->expects($this->once())
+            ->method('validate')
+            ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'dontexist',
+                'status' => '200',
+            ]);
+
+        $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertNull($entry->getLanguage());
+        $this->assertSame('200', $entry->getHttpStatus());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+    }
+
+    public function testWithContentAndBadOgImage()
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $validator = $this->getValidator(false);
+        $validator->expects($this->exactly(2))
+            ->method('validate')
+            ->will($this->onConsecutiveCalls(
+                new ConstraintViolationList(),
+                new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
+            ));
+
+        $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'status' => '200',
+                'open_graph' => [
+                    'og_title' => 'my OG title',
+                    'og_description' => 'OG desc',
+                    'og_image' => 'https://',
+                ],
+            ]);
+
+        $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertNull($entry->getPreviewPicture());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame('200', $entry->getHttpStatus());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+    }
+
+    public function testWithForcedContent()
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
+        $entry = new Entry(new User());
+        $proxy->updateEntry(
+            $entry,
+            'http://0.0.0.0',
+            [
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'date' => '1395635872',
+                'authors' => ['Jeremy', 'Nico', 'Thomas'],
+                'all_headers' => [
+                    'Cache-Control' => 'no-cache',
+                ],
+            ]
+        );
+
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+        $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
+        $this->assertContains('Jeremy', $entry->getPublishedBy());
+        $this->assertContains('Nico', $entry->getPublishedBy());
+        $this->assertContains('Thomas', $entry->getPublishedBy());
+        $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
+        $this->assertContains('no-cache', $entry->getHeaders());
+    }
+
+    public function testWithForcedContentAndDatetime()
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $logHandler = new TestHandler();
+        $logger = new Logger('test', [$logHandler]);
+
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry(
+            $entry,
+            'http://1.1.1.1',
+            [
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'date' => '2016-09-08T11:55:58+0200',
+            ]
+        );
+
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+        $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
+    }
+
+    public function testWithForcedContentAndBadDate()
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $logger = new Logger('foo');
+        $handler = new TestHandler();
+        $logger->pushHandler($handler);
+
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry(
+            $entry,
+            'http://1.1.1.1',
+            [
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'date' => '01 02 2012',
+            ]
+        );
+
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertContains('this is my content', $entry->getContent());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame(4.0, $entry->getReadingTime());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+        $this->assertNull($entry->getPublishedAt());
+
+        $records = $handler->getRecords();
+
+        $this->assertCount(1, $records);
+        $this->assertContains('Error while defining date', $records[0]['message']);
+    }
+
+    public function testTaggerThrowException()
+    {
         $tagger = $this->getTaggerMock();
         $tagger->expects($this->once())
             ->method('tag')
             ->will($this->throwException(new \Exception()));
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $proxy = new ContentProxy($graby, $tagger, $tagRepo, $this->getLogger());
-
-        $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
-            'html' => str_repeat('this is my content', 325),
-            'title' => 'this is my title',
-            'url' => 'http://1.1.1.1',
-            'content_type' => 'text/html',
-            'language' => 'fr',
-        ]);
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry(
+            $entry,
+            'http://1.1.1.1',
+            [
+                'html' => str_repeat('this is my content', 325),
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+            ]
+        );
 
         $this->assertCount(0, $entry->getTags());
     }
 
-    public function testAssignTagsWithArrayAndExtraSpaces()
+    public function dataForCrazyHtml()
+    {
+        return [
+            'script and comment' => [
+                '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
+                'lol',
+            ],
+            'script' => [
+                '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
+                'script',
+            ],
+        ];
+    }
+
+    /**
+     * @dataProvider dataForCrazyHtml
+     */
+    public function testWithCrazyHtmlContent($html, $escapedString)
+    {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry(
+            $entry,
+            'http://1.1.1.1',
+            [
+                'html' => $html,
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1',
+                'content_type' => 'text/html',
+                'language' => 'fr',
+                'status' => '200',
+                'open_graph' => [
+                    'og_title' => 'my OG title',
+                    'og_description' => 'OG desc',
+                    'og_image' => 'http://3.3.3.3/cover.jpg',
+                ],
+            ]
+        );
+
+        $this->assertSame('http://1.1.1.1', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertNotContains($escapedString, $entry->getContent());
+        $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
+        $this->assertSame('text/html', $entry->getMimetype());
+        $this->assertSame('fr', $entry->getLanguage());
+        $this->assertSame('200', $entry->getHttpStatus());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+    }
+
+    public function testWithImageAsContent()
     {
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
+                'title' => 'this is my title',
+                'url' => 'http://1.1.1.1/image.jpg',
+                'content_type' => 'image/jpeg',
+                'status' => '200',
+                'open_graph' => [],
+            ]);
 
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+        $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
+        $this->assertSame('this is my title', $entry->getTitle());
+        $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
+        $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
+        $this->assertSame('image/jpeg', $entry->getMimetype());
+        $this->assertSame('200', $entry->getHttpStatus());
+        $this->assertSame('1.1.1.1', $entry->getDomainName());
+    }
 
-        $proxy->assignTagsToEntry($entry, ['   tag1', 'tag2   ']);
+    public function testWebsiteWithValidUTF8Title_doNothing()
+    {
+        // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
+        // See http://graphemica.com for more info about the characters
+        // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+        $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
 
-        $this->assertCount(2, $entry->getTags());
-        $this->assertEquals('tag1', $entry->getTags()[0]->getLabel());
-        $this->assertEquals('tag2', $entry->getTags()[1]->getLabel());
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
+        $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => false,
+                'title' => $actualTitle,
+                'url' => '',
+                'content_type' => 'text/html',
+                'language' => '',
+            ]);
+
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+        // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+        $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
+        $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
     }
 
-    public function testAssignTagsWithString()
+    public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
     {
+        // See http://graphemica.com for more info about the characters
+        // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
+        // The correct UTF-8 â‚¬ character (U+20AC) is E282AC
+        $actualTitle = $this->hexToStr('61' . '80' . '62');
+
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => false,
+                'title' => $actualTitle,
+                'url' => '',
+                'content_type' => 'text/html',
+                'language' => '',
+            ]);
 
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $proxy->assignTagsToEntry($entry, 'tag1, tag2');
-
-        $this->assertCount(2, $entry->getTags());
-        $this->assertEquals('tag1', $entry->getTags()[0]->getLabel());
-        $this->assertEquals('tag2', $entry->getTags()[1]->getLabel());
+        // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
+        $expectedTitle = '61' . '62';
+        $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
     }
 
-    public function testAssignTagsWithEmptyArray()
+    public function testPdfWithUTF16BETitle_convertToUTF8()
     {
+        // See http://graphemica.com for more info about the characters
+        // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
+        $actualTitle = $this->hexToStr('D83DDE3B');
+
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => false,
+                'title' => $actualTitle,
+                'url' => '',
+                'content_type' => 'application/pdf',
+                'language' => '',
+            ]);
 
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $proxy->assignTagsToEntry($entry, []);
-
-        $this->assertCount(0, $entry->getTags());
+        // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
+        $expectedTitle = 'F09F98BB';
+        $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
     }
 
-    public function testAssignTagsWithEmptyString()
+    public function testPdfWithUTF8Title_doNothing()
     {
+        // See http://graphemica.com for more info about the characters
+        // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
+        $actualTitle = $this->hexToStr('F09F98BB');
+
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => false,
+                'title' => $actualTitle,
+                'url' => '',
+                'content_type' => 'application/pdf',
+                'language' => '',
+            ]);
 
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $proxy->assignTagsToEntry($entry, '');
-
-        $this->assertCount(0, $entry->getTags());
+        // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
+        $expectedTitle = 'F09F98BB';
+        $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
     }
 
-    public function testAssignTagsAlreadyAssigned()
+    public function testPdfWithWINDOWS1252Title_convertToUTF8()
     {
+        // See http://graphemica.com for more info about the characters
+        // '€' (80) in hexadecimal and WINDOWS-1252
+        $actualTitle = $this->hexToStr('80');
+
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
-
-        $tagEntity = new Tag();
-        $tagEntity->setLabel('tag1');
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => false,
+                'title' => $actualTitle,
+                'url' => '',
+                'content_type' => 'application/pdf',
+                'language' => '',
+            ]);
 
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
         $entry = new Entry(new User());
-        $entry->addTag($tagEntity);
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $proxy->assignTagsToEntry($entry, 'tag1, tag2');
-
-        $this->assertCount(2, $entry->getTags());
-        $this->assertEquals('tag1', $entry->getTags()[0]->getLabel());
-        $this->assertEquals('tag2', $entry->getTags()[1]->getLabel());
+        // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
+        $expectedTitle = 'E282AC';
+        $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
     }
 
-    public function testAssignTagsNotFlushed()
+    public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
     {
+        // See http://graphemica.com for more info about the characters
+        // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
+        // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
+        $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
+
+        $tagger = $this->getTaggerMock();
+        $tagger->expects($this->once())
+            ->method('tag');
+
         $graby = $this->getMockBuilder('Graby\Graby')
+            ->setMethods(['fetchContent'])
             ->disableOriginalConstructor()
             ->getMock();
 
-        $tagRepo = $this->getTagRepositoryMock();
-        $tagRepo->expects($this->never())
-            ->method('__call');
+        $graby->expects($this->any())
+            ->method('fetchContent')
+            ->willReturn([
+                'html' => false,
+                'title' => $actualTitle,
+                'url' => '',
+                'content_type' => 'application/pdf',
+                'language' => '',
+            ]);
 
-        $proxy = new ContentProxy($graby, $this->getTaggerMock(), $tagRepo, $this->getLogger());
+        $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+        $entry = new Entry(new User());
+        $proxy->updateEntry($entry, 'http://0.0.0.0');
 
-        $tagEntity = new Tag();
-        $tagEntity->setLabel('tag1');
+        // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+        // the 0x81 (represented by ï¿½) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
+        $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
+        $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+    }
 
-        $entry = new Entry(new User());
+    /**
+     * https://stackoverflow.com/a/18506801.
+     *
+     * @param $string
+     *
+     * @return string
+     */
+    private function strToHex($string)
+    {
+        $hex = '';
+        for ($i = 0; $i < \strlen($string); ++$i) {
+            $ord = \ord($string[$i]);
+            $hexCode = dechex($ord);
+            $hex .= substr('0' . $hexCode, -2);
+        }
+
+        return strtoupper($hex);
+    }
 
-        $proxy->assignTagsToEntry($entry, 'tag1', [$tagEntity]);
+    /**
+     * https://stackoverflow.com/a/18506801.
+     *
+     * @param $hex
+     *
+     * @return string
+     */
+    private function hexToStr($hex)
+    {
+        $string = '';
+        for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
+            $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
+        }
 
-        $this->assertCount(1, $entry->getTags());
-        $this->assertEquals('tag1', $entry->getTags()[0]->getLabel());
+        return $string;
     }
 
     private function getTaggerMock()
     {
-        return $this->getMockBuilder('Wallabag\CoreBundle\Helper\RuleBasedTagger')
+        return $this->getMockBuilder(RuleBasedTagger::class)
             ->setMethods(['tag'])
             ->disableOriginalConstructor()
             ->getMock();
     }
 
-    private function getTagRepositoryMock()
+    private function getLogger()
     {
-        return $this->getMockBuilder('Wallabag\CoreBundle\Repository\TagRepository')
-            ->disableOriginalConstructor()
-            ->getMock();
+        return new NullLogger();
     }
 
-    private function getLogger()
+    private function getValidator($withDefaultMock = true)
     {
-        return new NullLogger();
+        $mock = $this->getMockBuilder(RecursiveValidator::class)
+            ->setMethods(['validate'])
+            ->disableOriginalConstructor()
+            ->getMock();
+
+        if ($withDefaultMock) {
+            $mock->expects($this->any())
+                ->method('validate')
+                ->willReturn(new ConstraintViolationList());
+        }
+
+        return $mock;
     }
 }