X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=tests%2FWallabag%2FCoreBundle%2FHelper%2FContentProxyTest.php;h=c7caac1d326a3e60c1ebcbd2532f8bbd599c94dc;hb=92a66835624acf6fd14f5adc5f8aab399658592e;hp=44fca0737280f8c3507e0cbec4270753e936cc63;hpb=4423b88c5b2c2d530b0a83a822f521a61ca4d4b8;p=github%2Fwallabag%2Fwallabag.git
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 44fca073..c7caac1d 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -2,14 +2,20 @@
namespace Tests\Wallabag\CoreBundle\Helper;
+use Graby\Graby;
+use Monolog\Handler\TestHandler;
+use Monolog\Logger;
+use PHPUnit\Framework\TestCase;
use Psr\Log\NullLogger;
-use Wallabag\CoreBundle\Helper\ContentProxy;
+use Symfony\Component\Validator\ConstraintViolation;
+use Symfony\Component\Validator\ConstraintViolationList;
+use Symfony\Component\Validator\Validator\RecursiveValidator;
use Wallabag\CoreBundle\Entity\Entry;
-use Wallabag\CoreBundle\Entity\Tag;
-use Wallabag\UserBundle\Entity\User;
+use Wallabag\CoreBundle\Helper\ContentProxy;
use Wallabag\CoreBundle\Helper\RuleBasedTagger;
+use Wallabag\UserBundle\Entity\User;
-class ContentProxyTest extends \PHPUnit_Framework_TestCase
+class ContentProxyTest extends TestCase
{
private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please troubleshoot this issue.';
@@ -34,17 +40,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'language' => '',
]);
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://user@:80');
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://user@:80');
- $this->assertEquals('http://user@:80', $entry->getUrl());
+ $this->assertSame('http://user@:80', $entry->getUrl());
$this->assertEmpty($entry->getTitle());
- $this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
+ $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getMimetype());
$this->assertEmpty($entry->getLanguage());
- $this->assertEquals(0.0, $entry->getReadingTime());
- $this->assertEquals(false, $entry->getDomainName());
+ $this->assertSame(0.0, $entry->getReadingTime());
+ $this->assertNull($entry->getDomainName());
}
public function testWithEmptyContent()
@@ -68,17 +75,18 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'language' => '',
]);
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
- $this->assertEquals('http://0.0.0.0', $entry->getUrl());
+ $this->assertSame('http://0.0.0.0', $entry->getUrl());
$this->assertEmpty($entry->getTitle());
- $this->assertEquals($this->fetchingErrorMessage, $entry->getContent());
+ $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getMimetype());
$this->assertEmpty($entry->getLanguage());
- $this->assertEquals(0.0, $entry->getReadingTime());
- $this->assertEquals('0.0.0.0', $entry->getDomainName());
+ $this->assertSame(0.0, $entry->getReadingTime());
+ $this->assertSame('0.0.0.0', $entry->getDomainName());
}
public function testWithEmptyContentButOG()
@@ -107,18 +115,19 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
],
]);
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://domain.io');
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://domain.io');
- $this->assertEquals('http://domain.io', $entry->getUrl());
- $this->assertEquals('my title', $entry->getTitle());
- $this->assertEquals($this->fetchingErrorMessage.'
But we found a short description:
desc', $entry->getContent());
+ $this->assertSame('http://domain.io', $entry->getUrl());
+ $this->assertSame('my title', $entry->getTitle());
+ $this->assertSame($this->fetchingErrorMessage . 'But we found a short description:
desc', $entry->getContent());
$this->assertEmpty($entry->getPreviewPicture());
$this->assertEmpty($entry->getLanguage());
$this->assertEmpty($entry->getHttpStatus());
$this->assertEmpty($entry->getMimetype());
- $this->assertEquals(0.0, $entry->getReadingTime());
- $this->assertEquals('domain.io', $entry->getDomainName());
+ $this->assertSame(0.0, $entry->getReadingTime());
+ $this->assertSame('domain.io', $entry->getDomainName());
}
public function testWithContent()
@@ -148,18 +157,19 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
],
]);
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
-
- $this->assertEquals('http://1.1.1.1', $entry->getUrl());
- $this->assertEquals('this is my title', $entry->getTitle());
- $this->assertContains('this is my content', $entry->getContent());
- $this->assertEquals('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
- $this->assertEquals('text/html', $entry->getMimetype());
- $this->assertEquals('fr', $entry->getLanguage());
- $this->assertEquals('200', $entry->getHttpStatus());
- $this->assertEquals(4.0, $entry->getReadingTime());
- $this->assertEquals('1.1.1.1', $entry->getDomainName());
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
+ $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
}
public function testWithContentAndNoOgImage()
@@ -185,74 +195,797 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
'open_graph' => [
'og_title' => 'my OG title',
'og_description' => 'OG desc',
- 'og_image' => false,
+ 'og_image' => null,
],
]);
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0');
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
- $this->assertEquals('http://1.1.1.1', $entry->getUrl());
- $this->assertEquals('this is my title', $entry->getTitle());
- $this->assertContains('this is my content', $entry->getContent());
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
$this->assertNull($entry->getPreviewPicture());
- $this->assertEquals('text/html', $entry->getMimetype());
- $this->assertEquals('fr', $entry->getLanguage());
- $this->assertEquals('200', $entry->getHttpStatus());
- $this->assertEquals(4.0, $entry->getReadingTime());
- $this->assertEquals('1.1.1.1', $entry->getDomainName());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
}
- public function testWithForcedContent()
+ public function testWithContentAndContentImage()
{
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag');
- $graby = $this->getMockBuilder('Graby\Graby')->getMock();
-
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
- 'html' => str_repeat('this is my content', 325),
- 'title' => 'this is my title',
- 'url' => 'http://1.1.1.1',
- 'content_type' => 'text/html',
- 'language' => 'fr',
- ]);
-
- $this->assertEquals('http://1.1.1.1', $entry->getUrl());
- $this->assertEquals('this is my title', $entry->getTitle());
- $this->assertContains('this is my content', $entry->getContent());
- $this->assertEquals('text/html', $entry->getMimetype());
- $this->assertEquals('fr', $entry->getLanguage());
- $this->assertEquals(4.0, $entry->getReadingTime());
- $this->assertEquals('1.1.1.1', $entry->getDomainName());
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => "Test
",
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'status' => '200',
+ 'open_graph' => [
+ 'og_title' => 'my OG title',
+ 'og_description' => 'OG desc',
+ 'og_image' => null,
+ ],
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertSame("Test
", $entry->getContent());
+ $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame(0.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
}
- public function testTaggerThrowException()
+ public function testWithContentImageAndOgImage()
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => "Test
",
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'status' => '200',
+ 'open_graph' => [
+ 'og_title' => 'my OG title',
+ 'og_description' => 'OG desc',
+ 'og_image' => 'http://3.3.3.3/cover.jpg',
+ ],
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertSame("Test
", $entry->getContent());
+ $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame(0.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ }
+
+ public function testWithContentAndBadLanguage()
{
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $validator = $this->getValidator(false);
+ $validator->expects($this->once())
+ ->method('validate')
+ ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
+
$graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
->disableOriginalConstructor()
->getMock();
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => str_repeat('this is my content', 325),
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'dontexist',
+ 'status' => '200',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertNull($entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ }
+
+ public function testWithContentAndBadOgImage()
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $validator = $this->getValidator(false);
+ $validator->expects($this->exactly(2))
+ ->method('validate')
+ ->will($this->onConsecutiveCalls(
+ new ConstraintViolationList(),
+ new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
+ ));
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => str_repeat('this is my content', 325),
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'status' => '200',
+ 'open_graph' => [
+ 'og_title' => 'my OG title',
+ 'og_description' => 'OG desc',
+ 'og_image' => 'https://',
+ ],
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
+ $this->assertNull($entry->getPreviewPicture());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ }
+
+ public function testWithForcedContent()
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
+ $entry = new Entry(new User());
+ $proxy->updateEntry(
+ $entry,
+ 'http://0.0.0.0',
+ [
+ 'html' => str_repeat('this is my content', 325),
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'date' => '1395635872',
+ 'authors' => ['Jeremy', 'Nico', 'Thomas'],
+ 'all_headers' => [
+ 'Cache-Control' => 'no-cache',
+ ],
+ ]
+ );
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
+ $this->assertContains('Jeremy', $entry->getPublishedBy());
+ $this->assertContains('Nico', $entry->getPublishedBy());
+ $this->assertContains('Thomas', $entry->getPublishedBy());
+ $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
+ $this->assertContains('no-cache', $entry->getHeaders());
+ }
+
+ public function testWithForcedContentAndDatetime()
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $logHandler = new TestHandler();
+ $logger = new Logger('test', [$logHandler]);
+
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry(
+ $entry,
+ 'http://1.1.1.1',
+ [
+ 'html' => str_repeat('this is my content', 325),
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'date' => '2016-09-08T11:55:58+0200',
+ ]
+ );
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
+ }
+
+ public function testWithForcedContentAndBadDate()
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $logger = new Logger('foo');
+ $handler = new TestHandler();
+ $logger->pushHandler($handler);
+
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry(
+ $entry,
+ 'http://1.1.1.1',
+ [
+ 'html' => str_repeat('this is my content', 325),
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'date' => '01 02 2012',
+ ]
+ );
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('content', $entry->getContent());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame(4.0, $entry->getReadingTime());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ $this->assertNull($entry->getPublishedAt());
+
+ $records = $handler->getRecords();
+
+ $this->assertCount(3, $records);
+ $this->assertContains('Error while defining date', $records[0]['message']);
+ }
+
+ public function testTaggerThrowException()
+ {
$tagger = $this->getTaggerMock();
$tagger->expects($this->once())
->method('tag')
->will($this->throwException(new \Exception()));
- $proxy = new ContentProxy($graby, $tagger, $this->getLogger(), $this->fetchingErrorMessage);
-
- $entry = $proxy->updateEntry(new Entry(new User()), 'http://0.0.0.0', [
- 'html' => str_repeat('this is my content', 325),
- 'title' => 'this is my title',
- 'url' => 'http://1.1.1.1',
- 'content_type' => 'text/html',
- 'language' => 'fr',
- ]);
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry(
+ $entry,
+ 'http://1.1.1.1',
+ [
+ 'html' => str_repeat('this is my content', 325),
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ ]
+ );
$this->assertCount(0, $entry->getTags());
}
+ public function dataForCrazyHtml()
+ {
+ return [
+ 'script and comment' => [
+ 'Script inside:
',
+ 'lol',
+ ],
+ 'script' => [
+ 'Script inside:',
+ 'script',
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider dataForCrazyHtml
+ */
+ public function testWithCrazyHtmlContent($html, $escapedString)
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry(
+ $entry,
+ 'http://1.1.1.1',
+ [
+ 'html' => $html,
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1',
+ 'content_type' => 'text/html',
+ 'language' => 'fr',
+ 'status' => '200',
+ 'open_graph' => [
+ 'og_title' => 'my OG title',
+ 'og_description' => 'OG desc',
+ 'og_image' => 'http://3.3.3.3/cover.jpg',
+ ],
+ ]
+ );
+
+ $this->assertSame('http://1.1.1.1', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertNotContains($escapedString, $entry->getContent());
+ $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
+ $this->assertSame('text/html', $entry->getMimetype());
+ $this->assertSame('fr', $entry->getLanguage());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ }
+
+ public function testWithImageAsContent()
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => '',
+ 'title' => 'this is my title',
+ 'url' => 'http://1.1.1.1/image.jpg',
+ 'content_type' => 'image/jpeg',
+ 'status' => '200',
+ 'open_graph' => [],
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
+ $this->assertSame('this is my title', $entry->getTitle());
+ $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
+ $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
+ $this->assertSame('image/jpeg', $entry->getMimetype());
+ $this->assertSame('200', $entry->getHttpStatus());
+ $this->assertSame('1.1.1.1', $entry->getDomainName());
+ }
+
+ public function testWebsiteWithValidUTF8Title_doNothing()
+ {
+ // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
+ // See http://graphemica.com for more info about the characters
+ // 'ð»â¤z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+ $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'text/html',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»â¤z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+ $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'aâ¬b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
+ // The correct UTF-8 ⬠character (U+20AC) is E282AC
+ $actualTitle = $this->hexToStr('61' . '80' . '62');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'text/html',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
+ $expectedTitle = '61' . '62';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithUTF16BETitle_convertToUTF8()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'ð»' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
+ $actualTitle = $this->hexToStr('D83DDE3B');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
+ $expectedTitle = 'F09F98BB';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithUTF8Title_doNothing()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'ð»' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
+ $actualTitle = $this->hexToStr('F09F98BB');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
+ $expectedTitle = 'F09F98BB';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithWINDOWS1252Title_convertToUTF8()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'â¬' (80) in hexadecimal and WINDOWS-1252
+ $actualTitle = $this->hexToStr('80');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'â¬' (U+20AC or E282AC) in hexadecimal and UTF-8
+ $expectedTitle = 'E282AC';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'ð»â¤ï¿½z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
+ // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
+ $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»â¤z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+ // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
+ $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ /**
+ * Data provider for testWithChangedUrl.
+ *
+ * Arrays contain the following values:
+ * $entry_url
+ * $origin_url
+ * $content_url
+ * $expected_entry_url
+ * $expected_origin_url
+ * $expected_domain
+ */
+ public function dataForChangedUrl()
+ {
+ return [
+ 'normal' => [
+ 'http://0.0.0.0',
+ null,
+ 'http://1.1.1.1',
+ 'http://1.1.1.1',
+ 'http://0.0.0.0',
+ '1.1.1.1',
+ ],
+ 'origin already set' => [
+ 'http://0.0.0.0',
+ 'http://hello',
+ 'http://1.1.1.1',
+ 'http://1.1.1.1',
+ 'http://hello',
+ '1.1.1.1',
+ ],
+ 'trailing slash' => [
+ 'https://example.com/hello-world',
+ null,
+ 'https://example.com/hello-world/',
+ 'https://example.com/hello-world/',
+ null,
+ 'example.com',
+ ],
+ 'query string in fetched content' => [
+ 'https://example.org/hello',
+ null,
+ 'https://example.org/hello?world=1',
+ 'https://example.org/hello?world=1',
+ 'https://example.org/hello',
+ 'example.org',
+ ],
+ 'fragment in fetched content' => [
+ 'https://example.org/hello',
+ null,
+ 'https://example.org/hello#world',
+ 'https://example.org/hello',
+ null,
+ 'example.org',
+ ],
+ 'fragment and query string in fetched content' => [
+ 'https://example.org/hello',
+ null,
+ 'https://example.org/hello?foo#world',
+ 'https://example.org/hello?foo#world',
+ 'https://example.org/hello',
+ 'example.org',
+ ],
+ 'different path and query string in fetch content' => [
+ 'https://example.org/hello',
+ null,
+ 'https://example.org/world?foo',
+ 'https://example.org/world?foo',
+ 'https://example.org/hello',
+ 'example.org',
+ ],
+ 'feedproxy ignore list test' => [
+ 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
+ null,
+ 'https://example.org/hello-wallabag',
+ 'https://example.org/hello-wallabag',
+ null,
+ 'example.org',
+ ],
+ 'feedproxy ignore list test with origin url already set' => [
+ 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
+ 'https://example.org/this-is-source',
+ 'https://example.org/hello-wallabag',
+ 'https://example.org/hello-wallabag',
+ 'https://example.org/this-is-source',
+ 'example.org',
+ ],
+ 'lemonde ignore pattern test' => [
+ 'http://www.lemonde.fr/tiny/url',
+ null,
+ 'http://example.com/hello-world',
+ 'http://example.com/hello-world',
+ null,
+ 'example.com',
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider dataForChangedUrl
+ */
+ public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
+ {
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
+ $entry = new Entry(new User());
+ $entry->setOriginUrl($origin_url);
+ $proxy->updateEntry(
+ $entry,
+ $entry_url,
+ [
+ 'html' => false,
+ 'title' => '',
+ 'url' => $content_url,
+ 'content_type' => '',
+ 'language' => '',
+ ],
+ true
+ );
+
+ $this->assertSame($expected_entry_url, $entry->getUrl());
+ $this->assertSame($expected_domain, $entry->getDomainName());
+ $this->assertSame($expected_origin_url, $entry->getOriginUrl());
+ }
+
+ /**
+ * https://stackoverflow.com/a/18506801.
+ *
+ * @param $string
+ *
+ * @return string
+ */
+ private function strToHex($string)
+ {
+ $hex = '';
+ for ($i = 0; $i < \strlen($string); ++$i) {
+ $ord = \ord($string[$i]);
+ $hexCode = dechex($ord);
+ $hex .= substr('0' . $hexCode, -2);
+ }
+
+ return strtoupper($hex);
+ }
+
+ /**
+ * https://stackoverflow.com/a/18506801.
+ *
+ * @param $hex
+ *
+ * @return string
+ */
+ private function hexToStr($hex)
+ {
+ $string = '';
+ for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
+ $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
+ }
+
+ return $string;
+ }
+
private function getTaggerMock()
{
return $this->getMockBuilder(RuleBasedTagger::class)
@@ -265,4 +998,20 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
{
return new NullLogger();
}
+
+ private function getValidator($withDefaultMock = true)
+ {
+ $mock = $this->getMockBuilder(RecursiveValidator::class)
+ ->setMethods(['validate'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ if ($withDefaultMock) {
+ $mock->expects($this->any())
+ ->method('validate')
+ ->willReturn(new ConstraintViolationList());
+ }
+
+ return $mock;
+ }
}