diff options
Diffstat (limited to 'tests')
-rw-r--r-- | tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php | 5 | ||||
-rw-r--r-- | tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | 371 |
2 files changed, 373 insertions, 3 deletions
diff --git a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php index 6effe43e..006ca330 100644 --- a/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php +++ b/tests/Wallabag/CoreBundle/Controller/EntryControllerTest.php | |||
@@ -166,9 +166,8 @@ class EntryControllerTest extends WallabagCoreTestCase | |||
166 | $this->assertSame($this->url, $content->getUrl()); | 166 | $this->assertSame($this->url, $content->getUrl()); |
167 | $this->assertContains('Google', $content->getTitle()); | 167 | $this->assertContains('Google', $content->getTitle()); |
168 | $this->assertSame('fr', $content->getLanguage()); | 168 | $this->assertSame('fr', $content->getLanguage()); |
169 | $this->assertSame('2015-03-28 11:43:19', $content->getPublishedAt()->format('Y-m-d H:i:s')); | 169 | $this->assertSame('2016-04-07 19:01:35', $content->getPublishedAt()->format('Y-m-d H:i:s')); |
170 | $this->assertSame('Morgane Tual', $author[0]); | 170 | $this->assertArrayHasKey('x-frame-options', $content->getHeaders()); |
171 | $this->assertArrayHasKey('x-varnish1', $content->getHeaders()); | ||
172 | $client->getContainer()->get('craue_config')->set('store_article_headers', 0); | 171 | $client->getContainer()->get('craue_config')->set('store_article_headers', 0); |
173 | } | 172 | } |
174 | 173 | ||
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php index 51df8de1..3dd9273c 100644 --- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php +++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php | |||
@@ -531,6 +531,377 @@ class ContentProxyTest extends TestCase | |||
531 | $this->assertSame('1.1.1.1', $entry->getDomainName()); | 531 | $this->assertSame('1.1.1.1', $entry->getDomainName()); |
532 | } | 532 | } |
533 | 533 | ||
534 | public function testWebsiteWithValidUTF8Title_doNothing() | ||
535 | { | ||
536 | // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex | ||
537 | // See http://graphemica.com for more info about the characters | ||
538 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | ||
539 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A'); | ||
540 | |||
541 | $tagger = $this->getTaggerMock(); | ||
542 | $tagger->expects($this->once()) | ||
543 | ->method('tag'); | ||
544 | |||
545 | $graby = $this->getMockBuilder('Graby\Graby') | ||
546 | ->setMethods(['fetchContent']) | ||
547 | ->disableOriginalConstructor() | ||
548 | ->getMock(); | ||
549 | |||
550 | $graby->expects($this->any()) | ||
551 | ->method('fetchContent') | ||
552 | ->willReturn([ | ||
553 | 'html' => false, | ||
554 | 'title' => $actualTitle, | ||
555 | 'url' => '', | ||
556 | 'content_type' => 'text/html', | ||
557 | 'language' => '', | ||
558 | ]); | ||
559 | |||
560 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
561 | $entry = new Entry(new User()); | ||
562 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
563 | |||
564 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | ||
565 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | ||
566 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
567 | } | ||
568 | |||
569 | public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter() | ||
570 | { | ||
571 | // See http://graphemica.com for more info about the characters | ||
572 | // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character. | ||
573 | // The correct UTF-8 € character (U+20AC) is E282AC | ||
574 | $actualTitle = $this->hexToStr('61' . '80' . '62'); | ||
575 | |||
576 | $tagger = $this->getTaggerMock(); | ||
577 | $tagger->expects($this->once()) | ||
578 | ->method('tag'); | ||
579 | |||
580 | $graby = $this->getMockBuilder('Graby\Graby') | ||
581 | ->setMethods(['fetchContent']) | ||
582 | ->disableOriginalConstructor() | ||
583 | ->getMock(); | ||
584 | |||
585 | $graby->expects($this->any()) | ||
586 | ->method('fetchContent') | ||
587 | ->willReturn([ | ||
588 | 'html' => false, | ||
589 | 'title' => $actualTitle, | ||
590 | 'url' => '', | ||
591 | 'content_type' => 'text/html', | ||
592 | 'language' => '', | ||
593 | ]); | ||
594 | |||
595 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
596 | $entry = new Entry(new User()); | ||
597 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
598 | |||
599 | // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed | ||
600 | $expectedTitle = '61' . '62'; | ||
601 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
602 | } | ||
603 | |||
604 | public function testPdfWithUTF16BETitle_convertToUTF8() | ||
605 | { | ||
606 | // See http://graphemica.com for more info about the characters | ||
607 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE | ||
608 | $actualTitle = $this->hexToStr('D83DDE3B'); | ||
609 | |||
610 | $tagger = $this->getTaggerMock(); | ||
611 | $tagger->expects($this->once()) | ||
612 | ->method('tag'); | ||
613 | |||
614 | $graby = $this->getMockBuilder('Graby\Graby') | ||
615 | ->setMethods(['fetchContent']) | ||
616 | ->disableOriginalConstructor() | ||
617 | ->getMock(); | ||
618 | |||
619 | $graby->expects($this->any()) | ||
620 | ->method('fetchContent') | ||
621 | ->willReturn([ | ||
622 | 'html' => false, | ||
623 | 'title' => $actualTitle, | ||
624 | 'url' => '', | ||
625 | 'content_type' => 'application/pdf', | ||
626 | 'language' => '', | ||
627 | ]); | ||
628 | |||
629 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
630 | $entry = new Entry(new User()); | ||
631 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
632 | |||
633 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | ||
634 | $expectedTitle = 'F09F98BB'; | ||
635 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
636 | } | ||
637 | |||
638 | public function testPdfWithUTF8Title_doNothing() | ||
639 | { | ||
640 | // See http://graphemica.com for more info about the characters | ||
641 | // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8 | ||
642 | $actualTitle = $this->hexToStr('F09F98BB'); | ||
643 | |||
644 | $tagger = $this->getTaggerMock(); | ||
645 | $tagger->expects($this->once()) | ||
646 | ->method('tag'); | ||
647 | |||
648 | $graby = $this->getMockBuilder('Graby\Graby') | ||
649 | ->setMethods(['fetchContent']) | ||
650 | ->disableOriginalConstructor() | ||
651 | ->getMock(); | ||
652 | |||
653 | $graby->expects($this->any()) | ||
654 | ->method('fetchContent') | ||
655 | ->willReturn([ | ||
656 | 'html' => false, | ||
657 | 'title' => $actualTitle, | ||
658 | 'url' => '', | ||
659 | 'content_type' => 'application/pdf', | ||
660 | 'language' => '', | ||
661 | ]); | ||
662 | |||
663 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
664 | $entry = new Entry(new User()); | ||
665 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
666 | |||
667 | // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8 | ||
668 | $expectedTitle = 'F09F98BB'; | ||
669 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
670 | } | ||
671 | |||
672 | public function testPdfWithWINDOWS1252Title_convertToUTF8() | ||
673 | { | ||
674 | // See http://graphemica.com for more info about the characters | ||
675 | // '€' (80) in hexadecimal and WINDOWS-1252 | ||
676 | $actualTitle = $this->hexToStr('80'); | ||
677 | |||
678 | $tagger = $this->getTaggerMock(); | ||
679 | $tagger->expects($this->once()) | ||
680 | ->method('tag'); | ||
681 | |||
682 | $graby = $this->getMockBuilder('Graby\Graby') | ||
683 | ->setMethods(['fetchContent']) | ||
684 | ->disableOriginalConstructor() | ||
685 | ->getMock(); | ||
686 | |||
687 | $graby->expects($this->any()) | ||
688 | ->method('fetchContent') | ||
689 | ->willReturn([ | ||
690 | 'html' => false, | ||
691 | 'title' => $actualTitle, | ||
692 | 'url' => '', | ||
693 | 'content_type' => 'application/pdf', | ||
694 | 'language' => '', | ||
695 | ]); | ||
696 | |||
697 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
698 | $entry = new Entry(new User()); | ||
699 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
700 | |||
701 | // '€' (U+20AC or E282AC) in hexadecimal and UTF-8 | ||
702 | $expectedTitle = 'E282AC'; | ||
703 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
704 | } | ||
705 | |||
706 | public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter() | ||
707 | { | ||
708 | // See http://graphemica.com for more info about the characters | ||
709 | // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8 | ||
710 | // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252 | ||
711 | $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A'); | ||
712 | |||
713 | $tagger = $this->getTaggerMock(); | ||
714 | $tagger->expects($this->once()) | ||
715 | ->method('tag'); | ||
716 | |||
717 | $graby = $this->getMockBuilder('Graby\Graby') | ||
718 | ->setMethods(['fetchContent']) | ||
719 | ->disableOriginalConstructor() | ||
720 | ->getMock(); | ||
721 | |||
722 | $graby->expects($this->any()) | ||
723 | ->method('fetchContent') | ||
724 | ->willReturn([ | ||
725 | 'html' => false, | ||
726 | 'title' => $actualTitle, | ||
727 | 'url' => '', | ||
728 | 'content_type' => 'application/pdf', | ||
729 | 'language' => '', | ||
730 | ]); | ||
731 | |||
732 | $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage); | ||
733 | $entry = new Entry(new User()); | ||
734 | $proxy->updateEntry($entry, 'http://0.0.0.0'); | ||
735 | |||
736 | // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8 | ||
737 | // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed | ||
738 | $expectedTitle = 'F09F98BB' . 'E284A4' . '7A'; | ||
739 | $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle())); | ||
740 | } | ||
741 | |||
742 | /** | ||
743 | * Data provider for testWithChangedUrl. | ||
744 | * | ||
745 | * Arrays contain the following values: | ||
746 | * $entry_url | ||
747 | * $origin_url | ||
748 | * $content_url | ||
749 | * $expected_entry_url | ||
750 | * $expected_origin_url | ||
751 | * $expected_domain | ||
752 | */ | ||
753 | public function dataForChangedUrl() | ||
754 | { | ||
755 | return [ | ||
756 | 'normal' => [ | ||
757 | 'http://0.0.0.0', | ||
758 | null, | ||
759 | 'http://1.1.1.1', | ||
760 | 'http://1.1.1.1', | ||
761 | 'http://0.0.0.0', | ||
762 | '1.1.1.1', | ||
763 | ], | ||
764 | 'origin already set' => [ | ||
765 | 'http://0.0.0.0', | ||
766 | 'http://hello', | ||
767 | 'http://1.1.1.1', | ||
768 | 'http://1.1.1.1', | ||
769 | 'http://hello', | ||
770 | '1.1.1.1', | ||
771 | ], | ||
772 | 'trailing slash' => [ | ||
773 | 'https://example.com/hello-world', | ||
774 | null, | ||
775 | 'https://example.com/hello-world/', | ||
776 | 'https://example.com/hello-world/', | ||
777 | null, | ||
778 | 'example.com', | ||
779 | ], | ||
780 | 'query string in fetched content' => [ | ||
781 | 'https://example.org/hello', | ||
782 | null, | ||
783 | 'https://example.org/hello?world=1', | ||
784 | 'https://example.org/hello?world=1', | ||
785 | 'https://example.org/hello', | ||
786 | 'example.org', | ||
787 | ], | ||
788 | 'fragment in fetched content' => [ | ||
789 | 'https://example.org/hello', | ||
790 | null, | ||
791 | 'https://example.org/hello#world', | ||
792 | 'https://example.org/hello', | ||
793 | null, | ||
794 | 'example.org', | ||
795 | ], | ||
796 | 'fragment and query string in fetched content' => [ | ||
797 | 'https://example.org/hello', | ||
798 | null, | ||
799 | 'https://example.org/hello?foo#world', | ||
800 | 'https://example.org/hello?foo#world', | ||
801 | 'https://example.org/hello', | ||
802 | 'example.org', | ||
803 | ], | ||
804 | 'different path and query string in fetch content' => [ | ||
805 | 'https://example.org/hello', | ||
806 | null, | ||
807 | 'https://example.org/world?foo', | ||
808 | 'https://example.org/world?foo', | ||
809 | 'https://example.org/hello', | ||
810 | 'example.org', | ||
811 | ], | ||
812 | 'feedproxy ignore list test' => [ | ||
813 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | ||
814 | null, | ||
815 | 'https://example.org/hello-wallabag', | ||
816 | 'https://example.org/hello-wallabag', | ||
817 | null, | ||
818 | 'example.org', | ||
819 | ], | ||
820 | 'feedproxy ignore list test with origin url already set' => [ | ||
821 | 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld', | ||
822 | 'https://example.org/this-is-source', | ||
823 | 'https://example.org/hello-wallabag', | ||
824 | 'https://example.org/hello-wallabag', | ||
825 | 'https://example.org/this-is-source', | ||
826 | 'example.org', | ||
827 | ], | ||
828 | 'lemonde ignore pattern test' => [ | ||
829 | 'http://www.lemonde.fr/tiny/url', | ||
830 | null, | ||
831 | 'http://example.com/hello-world', | ||
832 | 'http://example.com/hello-world', | ||
833 | null, | ||
834 | 'example.com', | ||
835 | ], | ||
836 | ]; | ||
837 | } | ||
838 | |||
839 | /** | ||
840 | * @dataProvider dataForChangedUrl | ||
841 | */ | ||
842 | public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain) | ||
843 | { | ||
844 | $tagger = $this->getTaggerMock(); | ||
845 | $tagger->expects($this->once()) | ||
846 | ->method('tag'); | ||
847 | |||
848 | $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true); | ||
849 | $entry = new Entry(new User()); | ||
850 | $entry->setOriginUrl($origin_url); | ||
851 | $proxy->updateEntry( | ||
852 | $entry, | ||
853 | $entry_url, | ||
854 | [ | ||
855 | 'html' => false, | ||
856 | 'title' => '', | ||
857 | 'url' => $content_url, | ||
858 | 'content_type' => '', | ||
859 | 'language' => '', | ||
860 | ], | ||
861 | true | ||
862 | ); | ||
863 | |||
864 | $this->assertSame($expected_entry_url, $entry->getUrl()); | ||
865 | $this->assertSame($expected_domain, $entry->getDomainName()); | ||
866 | $this->assertSame($expected_origin_url, $entry->getOriginUrl()); | ||
867 | } | ||
868 | |||
869 | /** | ||
870 | * https://stackoverflow.com/a/18506801. | ||
871 | * | ||
872 | * @param $string | ||
873 | * | ||
874 | * @return string | ||
875 | */ | ||
876 | private function strToHex($string) | ||
877 | { | ||
878 | $hex = ''; | ||
879 | for ($i = 0; $i < \strlen($string); ++$i) { | ||
880 | $ord = \ord($string[$i]); | ||
881 | $hexCode = dechex($ord); | ||
882 | $hex .= substr('0' . $hexCode, -2); | ||
883 | } | ||
884 | |||
885 | return strtoupper($hex); | ||
886 | } | ||
887 | |||
888 | /** | ||
889 | * https://stackoverflow.com/a/18506801. | ||
890 | * | ||
891 | * @param $hex | ||
892 | * | ||
893 | * @return string | ||
894 | */ | ||
895 | private function hexToStr($hex) | ||
896 | { | ||
897 | $string = ''; | ||
898 | for ($i = 0; $i < \strlen($hex) - 1; $i += 2) { | ||
899 | $string .= \chr(hexdec($hex[$i] . $hex[$i + 1])); | ||
900 | } | ||
901 | |||
902 | return $string; | ||
903 | } | ||
904 | |||
534 | private function getTaggerMock() | 905 | private function getTaggerMock() |
535 | { | 906 | { |
536 | return $this->getMockBuilder(RuleBasedTagger::class) | 907 | return $this->getMockBuilder(RuleBasedTagger::class) |