diff options
Diffstat (limited to 'tests/Wallabag/CoreBundle/Helper')
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 245 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | 69 |
2 files changed, 313 insertions, 1 deletions
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 398592e1..3f3c60d0 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | |||
@@ -11,7 +11,6 @@ use Symfony\Component\Validator\ConstraintViolation; | |||
11 | use Symfony\Component\Validator\ConstraintViolationList; | 11 | use Symfony\Component\Validator\ConstraintViolationList; |
12 | use Symfony\Component\Validator\Validator\RecursiveValidator; | 12 | use Symfony\Component\Validator\Validator\RecursiveValidator; |
13 | use Wallabag\CoreBundle\Entity\Entry; | 13 | use Wallabag\CoreBundle\Entity\Entry; |
14 | use Wallabag\CoreBundle\Entity\Tag; | ||
15 | use Wallabag\CoreBundle\Helper\ContentProxy; | 14 | use Wallabag\CoreBundle\Helper\ContentProxy; |
16 | use Wallabag\CoreBundle\Helper\RuleBasedTagger; | 15 | use Wallabag\CoreBundle\Helper\RuleBasedTagger; |
17 | use Wallabag\UserBundle\Entity\User; | 16 | use Wallabag\UserBundle\Entity\User; |
@@ -532,6 +531,250 @@ class ContentProxyTest extends TestCase | |||
532 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | 531 | $this->assertSame('1.1.1.1', $entry->getDomainName()); |
533 | } | 532 | } |
534 | 533 | ||
534 | public function testWebsiteWithValidUTF8Title_doNothing() | ||
535 | { | ||
536 | // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex | ||
537 | // See http://graphemica.com for more info about the characters | ||
538 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | ||
539 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A'); | ||
540 | |||
541 | $tagger = $this->getTaggerMock(); | ||
542 | $tagger->expects($this->once()) | ||
543 | ->method('tag'); | ||
544 | |||
545 | $graby = $this->getMockBuilder('Graby\Graby') | ||
546 | ->setMethods(['fetchContent']) | ||
547 | ->disableOriginalConstructor() | ||
548 | ->getMock(); | ||
549 | |||
550 | $graby->expects($this->any()) | ||
551 | ->method('fetchContent') | ||
552 | ->willReturn([ | ||
553 | 'html' => false, | ||
554 | 'title' => $actualTitle, | ||
555 | 'url' => '', | ||
556 | 'content_type' => 'text/html', | ||
557 | 'language' => '', | ||
558 | ]); | ||
559 | |||
560 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
561 | $entry = new Entry(new User()); | ||
562 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
563 | |||
564 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | ||
565 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | ||
566 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
567 | } | ||
568 | |||
569 | public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter() | ||
570 | { | ||
571 | // See http://graphemica.com for more info about the characters | ||
572 | // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character. | ||
573 | // The correct UTF-8 € character (U+20AC) is E282AC | ||
574 | $actualTitle = $this->hexToStr('61' . '80' . '62'); | ||
575 | |||
576 | $tagger = $this->getTaggerMock(); | ||
577 | $tagger->expects($this->once()) | ||
578 | ->method('tag'); | ||
579 | |||
580 | $graby = $this->getMockBuilder('Graby\Graby') | ||
581 | ->setMethods(['fetchContent']) | ||
582 | ->disableOriginalConstructor() | ||
583 | ->getMock(); | ||
584 | |||
585 | $graby->expects($this->any()) | ||
586 | ->method('fetchContent') | ||
587 | ->willReturn([ | ||
588 | 'html' => false, | ||
589 | 'title' => $actualTitle, | ||
590 | 'url' => '', | ||
591 | 'content_type' => 'text/html', | ||
592 | 'language' => '', | ||
593 | ]); | ||
594 | |||
595 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
596 | $entry = new Entry(new User()); | ||
597 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
598 | |||
599 | // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed | ||
600 | $expectedTitle = '61' . '62'; | ||
601 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
602 | } | ||
603 | |||
604 | public function testPdfWithUTF16BETitle_convertToUTF8() | ||
605 | { | ||
606 | // See http://graphemica.com for more info about the characters | ||
607 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE | ||
608 | $actualTitle = $this->hexToStr('D83DDE3B'); | ||
609 | |||
610 | $tagger = $this->getTaggerMock(); | ||
611 | $tagger->expects($this->once()) | ||
612 | ->method('tag'); | ||
613 | |||
614 | $graby = $this->getMockBuilder('Graby\Graby') | ||
615 | ->setMethods(['fetchContent']) | ||
616 | ->disableOriginalConstructor() | ||
617 | ->getMock(); | ||
618 | |||
619 | $graby->expects($this->any()) | ||
620 | ->method('fetchContent') | ||
621 | ->willReturn([ | ||
622 | 'html' => false, | ||
623 | 'title' => $actualTitle, | ||
624 | 'url' => '', | ||
625 | 'content_type' => 'application/pdf', | ||
626 | 'language' => '', | ||
627 | ]); | ||
628 | |||
629 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
630 | $entry = new Entry(new User()); | ||
631 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
632 | |||
633 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | ||
634 | $expectedTitle = 'F09F98BB'; | ||
635 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
636 | } | ||
637 | |||
638 | public function testPdfWithUTF8Title_doNothing() | ||
639 | { | ||
640 | // See http://graphemica.com for more info about the characters | ||
641 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8 | ||
642 | $actualTitle = $this->hexToStr('F09F98BB'); | ||
643 | |||
644 | $tagger = $this->getTaggerMock(); | ||
645 | $tagger->expects($this->once()) | ||
646 | ->method('tag'); | ||
647 | |||
648 | $graby = $this->getMockBuilder('Graby\Graby') | ||
649 | ->setMethods(['fetchContent']) | ||
650 | ->disableOriginalConstructor() | ||
651 | ->getMock(); | ||
652 | |||
653 | $graby->expects($this->any()) | ||
654 | ->method('fetchContent') | ||
655 | ->willReturn([ | ||
656 | 'html' => false, | ||
657 | 'title' => $actualTitle, | ||
658 | 'url' => '', | ||
659 | 'content_type' => 'application/pdf', | ||
660 | 'language' => '', | ||
661 | ]); | ||
662 | |||
663 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
664 | $entry = new Entry(new User()); | ||
665 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
666 | |||
667 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | ||
668 | $expectedTitle = 'F09F98BB'; | ||
669 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
670 | } | ||
671 | |||
672 | public function testPdfWithWINDOWS1252Title_convertToUTF8() | ||
673 | { | ||
674 | // See http://graphemica.com for more info about the characters | ||
675 | // '€' (80) in hexadecimal and WINDOWS-1252 | ||
676 | $actualTitle = $this->hexToStr('80'); | ||
677 | |||
678 | $tagger = $this->getTaggerMock(); | ||
679 | $tagger->expects($this->once()) | ||
680 | ->method('tag'); | ||
681 | |||
682 | $graby = $this->getMockBuilder('Graby\Graby') | ||
683 | ->setMethods(['fetchContent']) | ||
684 | ->disableOriginalConstructor() | ||
685 | ->getMock(); | ||
686 | |||
687 | $graby->expects($this->any()) | ||
688 | ->method('fetchContent') | ||
689 | ->willReturn([ | ||
690 | 'html' => false, | ||
691 | 'title' => $actualTitle, | ||
692 | 'url' => '', | ||
693 | 'content_type' => 'application/pdf', | ||
694 | 'language' => '', | ||
695 | ]); | ||
696 | |||
697 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
698 | $entry = new Entry(new User()); | ||
699 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
700 | |||
701 | // '€' (U+20AC or E282AC) in hexadecimal and UTF-8 | ||
702 | $expectedTitle = 'E282AC'; | ||
703 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
704 | } | ||
705 | |||
706 | public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter() | ||
707 | { | ||
708 | // See http://graphemica.com for more info about the characters | ||
709 | // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8 | ||
710 | // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252 | ||
711 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A'); | ||
712 | |||
713 | $tagger = $this->getTaggerMock(); | ||
714 | $tagger->expects($this->once()) | ||
715 | ->method('tag'); | ||
716 | |||
717 | $graby = $this->getMockBuilder('Graby\Graby') | ||
718 | ->setMethods(['fetchContent']) | ||
719 | ->disableOriginalConstructor() | ||
720 | ->getMock(); | ||
721 | |||
722 | $graby->expects($this->any()) | ||
723 | ->method('fetchContent') | ||
724 | ->willReturn([ | ||
725 | 'html' => false, | ||
726 | 'title' => $actualTitle, | ||
727 | 'url' => '', | ||
728 | 'content_type' => 'application/pdf', | ||
729 | 'language' => '', | ||
730 | ]); | ||
731 | |||
732 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
733 | $entry = new Entry(new User()); | ||
734 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
735 | |||
736 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | ||
737 | // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed | ||
738 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | ||
739 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
740 | } | ||
741 | |||
742 | /** | ||
743 | * https://stackoverflow.com/a/18506801. | ||
744 | * | ||
745 | * @param $string | ||
746 | * | ||
747 | * @return string | ||
748 | */ | ||
749 | private function strToHex($string) | ||
750 | { | ||
751 | $hex = ''; | ||
752 | for ($i = 0; $i < \strlen($string); ++$i) { | ||
753 | $ord = \ord($string[$i]); | ||
754 | $hexCode = dechex($ord); | ||
755 | $hex .= substr('0' . $hexCode, -2); | ||
756 | } | ||
757 | |||
758 | return strtoupper($hex); | ||
759 | } | ||
760 | |||
761 | /** | ||
762 | * https://stackoverflow.com/a/18506801. | ||
763 | * | ||
764 | * @param $hex | ||
765 | * | ||
766 | * @return string | ||
767 | */ | ||
768 | private function hexToStr($hex) | ||
769 | { | ||
770 | $string = ''; | ||
771 | for ($i = 0; $i < \strlen($hex) - 1; $i += 2) { | ||
772 | $string .= \chr(hexdec($hex[$i] . $hex[$i + 1])); | ||
773 | } | ||
774 | |||
775 | return $string; | ||
776 | } | ||
777 | |||
535 | private function getTaggerMock() | 778 | private function getTaggerMock() |
536 | { | 779 | { |
537 | return $this->getMockBuilder(RuleBasedTagger::class) | 780 | return $this->getMockBuilder(RuleBasedTagger::class) |
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php index 0e1d296b..cda5f843 100644 --- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php +++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php | |||
@@ -183,4 +183,73 @@ class DownloadImagesTest extends TestCase | |||
183 | $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); | 183 | $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); |
184 | $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); | 184 | $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); |
185 | } | 185 | } |
186 | |||
187 | public function testProcessImageWithSrcset() | ||
188 | { | ||
189 | $client = new Client(); | ||
190 | |||
191 | $mock = new Mock([ | ||
192 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
193 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
194 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
195 | ]); | ||
196 | |||
197 | $client->getEmitter()->attach($mock); | ||
198 | |||
199 | $logHandler = new TestHandler(); | ||
200 | $logger = new Logger('test', [$logHandler]); | ||
201 | |||
202 | $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); | ||
203 | $res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/'); | ||
204 | |||
205 | $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced'); | ||
206 | } | ||
207 | |||
208 | public function testProcessImageWithTrickySrcset() | ||
209 | { | ||
210 | $client = new Client(); | ||
211 | |||
212 | $mock = new Mock([ | ||
213 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
214 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
215 | new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
216 | ]); | ||
217 | |||
218 | $client->getEmitter()->attach($mock); | ||
219 | |||
220 | $logHandler = new TestHandler(); | ||
221 | $logger = new Logger('test', [$logHandler]); | ||
222 | |||
223 | $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); | ||
224 | $res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 ) | ||
225 | (min-width: 1251px) calc( (100vw - 530px) / 2 ) | ||
226 | (min-width: 1086px) calc(100vw - 480px) | ||
227 | (min-width: 626px) calc(100vw - 335px) | ||
228 | calc(100vw - 30px)" alt="" /></figure>', 'https://css-tricks.com/the-critical-request/'); | ||
229 | |||
230 | $this->assertNotContains('f_auto,q_auto', $res, 'Image srcset attribute were not replaced'); | ||
231 | } | ||
232 | |||
233 | public function testProcessImageWithNullPath() | ||
234 | { | ||
235 | $client = new Client(); | ||
236 | |||
237 | $mock = new Mock([ | ||
238 | new Response(200, ['content-type' => null], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))), | ||
239 | ]); | ||
240 | |||
241 | $client->getEmitter()->attach($mock); | ||
242 | |||
243 | $logHandler = new TestHandler(); | ||
244 | $logger = new Logger('test', [$logHandler]); | ||
245 | |||
246 | $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger); | ||
247 | |||
248 | $res = $download->processSingleImage( | ||
249 | 123, | ||
250 | null, | ||
251 | 'https://framablog.org/2018/06/30/engagement-atypique/' | ||
252 | ); | ||
253 | $this->assertFalse($res); | ||
254 | } | ||
186 | } | 255 | } |