X-Git-Url: https://git.immae.eu/?a=blobdiff_plain;f=tests%2FWallabag%2FCoreBundle%2FHelper%2FContentProxyTest.php;h=9d8098efdff15fdd44397d9915b19c3784aea68d;hb=c01d9532920ec5a298bb347dbb83a078d36d4841;hp=0731a0c0b9f1e28f92f03b8b00165b00edf0e43a;hpb=fb258aeef0a28ce9aaebac8f337b9970bd99e70d;p=github%2Fwallabag%2Fwallabag.git
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 0731a0c0..9d8098ef 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -5,17 +5,17 @@ namespace Tests\Wallabag\CoreBundle\Helper;
use Graby\Graby;
use Monolog\Handler\TestHandler;
use Monolog\Logger;
+use PHPUnit\Framework\TestCase;
use Psr\Log\NullLogger;
use Symfony\Component\Validator\ConstraintViolation;
use Symfony\Component\Validator\ConstraintViolationList;
use Symfony\Component\Validator\Validator\RecursiveValidator;
use Wallabag\CoreBundle\Entity\Entry;
-use Wallabag\CoreBundle\Entity\Tag;
use Wallabag\CoreBundle\Helper\ContentProxy;
use Wallabag\CoreBundle\Helper\RuleBasedTagger;
use Wallabag\UserBundle\Entity\User;
-class ContentProxyTest extends \PHPUnit_Framework_TestCase
+class ContentProxyTest extends TestCase
{
private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please troubleshoot this issue.';
@@ -220,7 +220,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagger->expects($this->once())
->method('tag');
- $validator = $this->getValidator();
+ $validator = $this->getValidator(false);
$validator->expects($this->once())
->method('validate')
->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
@@ -261,7 +261,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagger->expects($this->once())
->method('tag');
- $validator = $this->getValidator();
+ $validator = $this->getValidator(false);
$validator->expects($this->exactly(2))
->method('validate')
->will($this->onConsecutiveCalls(
@@ -311,7 +311,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$tagger->expects($this->once())
->method('tag');
- $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
$entry = new Entry(new User());
$proxy->updateEntry(
$entry,
@@ -341,6 +341,7 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$this->assertContains('Jeremy', $entry->getPublishedBy());
$this->assertContains('Nico', $entry->getPublishedBy());
$this->assertContains('Thomas', $entry->getPublishedBy());
+ $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
$this->assertContains('no-cache', $entry->getHeaders());
}
@@ -530,6 +531,242 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
$this->assertSame('1.1.1.1', $entry->getDomainName());
}
+ public function testWebsiteWithValidUTF8Title_doNothing()
+ {
+ // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
+ // See http://graphemica.com for more info about the characters
+ // 'ð»â¤z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+ $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'text/html',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»â¤z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+ $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'aâ¬b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
+ // The correct UTF-8 ⬠character (U+20AC) is E282AC
+ $actualTitle = $this->hexToStr('61' . '80' . '62');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'text/html',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
+ $expectedTitle = '61' . '62';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithUTF16BETitle_convertToUTF8()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'ð»' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
+ $actualTitle = $this->hexToStr('D83DDE3B');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
+ $expectedTitle = 'F09F98BB';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithUTF8Title_doNothing()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'ð»' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
+ $actualTitle = $this->hexToStr('F09F98BB');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
+ $expectedTitle = 'F09F98BB';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithWINDOWS1252Title_convertToUTF8()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'â¬' (80) in hexadecimal and WINDOWS-1252
+ $actualTitle = $this->hexToStr('80');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'â¬' (U+20AC or E282AC) in hexadecimal and UTF-8
+ $expectedTitle = 'E282AC';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
+ {
+ // See http://graphemica.com for more info about the characters
+ // 'ð»â¤ï¿½z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
+ // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
+ $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
+
+ $tagger = $this->getTaggerMock();
+ $tagger->expects($this->once())
+ ->method('tag');
+
+ $graby = $this->getMockBuilder('Graby\Graby')
+ ->setMethods(['fetchContent'])
+ ->disableOriginalConstructor()
+ ->getMock();
+
+ $graby->expects($this->any())
+ ->method('fetchContent')
+ ->willReturn([
+ 'html' => false,
+ 'title' => $actualTitle,
+ 'url' => '',
+ 'content_type' => 'application/pdf',
+ 'language' => '',
+ ]);
+
+ $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
+ $entry = new Entry(new User());
+ $proxy->updateEntry($entry, 'http://0.0.0.0');
+
+ // 'ð»â¤z' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
+ // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
+ $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
+ $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
+ }
+
+ /**
+ * https://stackoverflow.com/a/18506801
+ * @param $string
+ * @return string
+ */
+ function strToHex($string){
+ $hex = '';
+ for ($i=0; $igetMockBuilder(RuleBasedTagger::class)
@@ -543,11 +780,19 @@ class ContentProxyTest extends \PHPUnit_Framework_TestCase
return new NullLogger();
}
- private function getValidator()
+ private function getValidator($withDefaultMock = true)
{
- return $this->getMockBuilder(RecursiveValidator::class)
+ $mock = $this->getMockBuilder(RecursiveValidator::class)
->setMethods(['validate'])
->disableOriginalConstructor()
->getMock();
+
+ if ($withDefaultMock) {
+ $mock->expects($this->any())
+ ->method('validate')
+ ->willReturn(new ConstraintViolationList());
+ }
+
+ return $mock;
}
}