aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
diff options
context:
space:
mode:
authorJeremy Benoist <jeremy.benoist@gmail.com>2018-11-25 09:58:18 +0100
committerJeremy Benoist <jeremy.benoist@gmail.com>2018-11-25 09:58:18 +0100
commitb878be4cc99fd4927c70b59386cf7a57b33bb381 (patch)
tree9a5186ead93b9f6114b5341a7713f614eb58f6ec /tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
parente673b54f702f274a087e4feff409663d9636e57b (diff)
parentbffe65478de71113a16f6e7a7ef75845c9d61180 (diff)
downloadwallabag-b878be4cc99fd4927c70b59386cf7a57b33bb381.tar.gz
wallabag-b878be4cc99fd4927c70b59386cf7a57b33bb381.tar.zst
wallabag-b878be4cc99fd4927c70b59386cf7a57b33bb381.zip
Merge remote-tracking branch 'origin/master' into 2.4
# Conflicts: # web/wallassets/baggy.js # web/wallassets/manifest.json # web/wallassets/material.css # web/wallassets/material.js
Diffstat (limited to 'tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php')
-rw-r--r--tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php371
1 files changed, 371 insertions, 0 deletions
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 51df8de1..3dd9273c 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -531,6 +531,377 @@ class ContentProxyTest extends TestCase
531 $this->assertSame('1.1.1.1', $entry->getDomainName()); 531 $this->assertSame('1.1.1.1', $entry->getDomainName());
532 } 532 }
533 533
534 public function testWebsiteWithValidUTF8Title_doNothing()
535 {
536 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
537 // See http://graphemica.com for more info about the characters
538 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
539 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
540
541 $tagger = $this->getTaggerMock();
542 $tagger->expects($this->once())
543 ->method('tag');
544
545 $graby = $this->getMockBuilder('Graby\Graby')
546 ->setMethods(['fetchContent'])
547 ->disableOriginalConstructor()
548 ->getMock();
549
550 $graby->expects($this->any())
551 ->method('fetchContent')
552 ->willReturn([
553 'html' => false,
554 'title' => $actualTitle,
555 'url' => '',
556 'content_type' => 'text/html',
557 'language' => '',
558 ]);
559
560 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
561 $entry = new Entry(new User());
562 $proxy->updateEntry($entry, 'http://0.0.0.0');
563
564 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
565 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
566 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
567 }
568
569 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
570 {
571 // See http://graphemica.com for more info about the characters
572 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
573 // The correct UTF-8 € character (U+20AC) is E282AC
574 $actualTitle = $this->hexToStr('61' . '80' . '62');
575
576 $tagger = $this->getTaggerMock();
577 $tagger->expects($this->once())
578 ->method('tag');
579
580 $graby = $this->getMockBuilder('Graby\Graby')
581 ->setMethods(['fetchContent'])
582 ->disableOriginalConstructor()
583 ->getMock();
584
585 $graby->expects($this->any())
586 ->method('fetchContent')
587 ->willReturn([
588 'html' => false,
589 'title' => $actualTitle,
590 'url' => '',
591 'content_type' => 'text/html',
592 'language' => '',
593 ]);
594
595 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
596 $entry = new Entry(new User());
597 $proxy->updateEntry($entry, 'http://0.0.0.0');
598
599 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
600 $expectedTitle = '61' . '62';
601 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
602 }
603
604 public function testPdfWithUTF16BETitle_convertToUTF8()
605 {
606 // See http://graphemica.com for more info about the characters
607 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
608 $actualTitle = $this->hexToStr('D83DDE3B');
609
610 $tagger = $this->getTaggerMock();
611 $tagger->expects($this->once())
612 ->method('tag');
613
614 $graby = $this->getMockBuilder('Graby\Graby')
615 ->setMethods(['fetchContent'])
616 ->disableOriginalConstructor()
617 ->getMock();
618
619 $graby->expects($this->any())
620 ->method('fetchContent')
621 ->willReturn([
622 'html' => false,
623 'title' => $actualTitle,
624 'url' => '',
625 'content_type' => 'application/pdf',
626 'language' => '',
627 ]);
628
629 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
630 $entry = new Entry(new User());
631 $proxy->updateEntry($entry, 'http://0.0.0.0');
632
633 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
634 $expectedTitle = 'F09F98BB';
635 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
636 }
637
638 public function testPdfWithUTF8Title_doNothing()
639 {
640 // See http://graphemica.com for more info about the characters
641 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
642 $actualTitle = $this->hexToStr('F09F98BB');
643
644 $tagger = $this->getTaggerMock();
645 $tagger->expects($this->once())
646 ->method('tag');
647
648 $graby = $this->getMockBuilder('Graby\Graby')
649 ->setMethods(['fetchContent'])
650 ->disableOriginalConstructor()
651 ->getMock();
652
653 $graby->expects($this->any())
654 ->method('fetchContent')
655 ->willReturn([
656 'html' => false,
657 'title' => $actualTitle,
658 'url' => '',
659 'content_type' => 'application/pdf',
660 'language' => '',
661 ]);
662
663 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
664 $entry = new Entry(new User());
665 $proxy->updateEntry($entry, 'http://0.0.0.0');
666
667 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
668 $expectedTitle = 'F09F98BB';
669 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
670 }
671
672 public function testPdfWithWINDOWS1252Title_convertToUTF8()
673 {
674 // See http://graphemica.com for more info about the characters
675 // '€' (80) in hexadecimal and WINDOWS-1252
676 $actualTitle = $this->hexToStr('80');
677
678 $tagger = $this->getTaggerMock();
679 $tagger->expects($this->once())
680 ->method('tag');
681
682 $graby = $this->getMockBuilder('Graby\Graby')
683 ->setMethods(['fetchContent'])
684 ->disableOriginalConstructor()
685 ->getMock();
686
687 $graby->expects($this->any())
688 ->method('fetchContent')
689 ->willReturn([
690 'html' => false,
691 'title' => $actualTitle,
692 'url' => '',
693 'content_type' => 'application/pdf',
694 'language' => '',
695 ]);
696
697 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
698 $entry = new Entry(new User());
699 $proxy->updateEntry($entry, 'http://0.0.0.0');
700
701 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
702 $expectedTitle = 'E282AC';
703 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
704 }
705
706 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
707 {
708 // See http://graphemica.com for more info about the characters
709 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
710 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
711 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
712
713 $tagger = $this->getTaggerMock();
714 $tagger->expects($this->once())
715 ->method('tag');
716
717 $graby = $this->getMockBuilder('Graby\Graby')
718 ->setMethods(['fetchContent'])
719 ->disableOriginalConstructor()
720 ->getMock();
721
722 $graby->expects($this->any())
723 ->method('fetchContent')
724 ->willReturn([
725 'html' => false,
726 'title' => $actualTitle,
727 'url' => '',
728 'content_type' => 'application/pdf',
729 'language' => '',
730 ]);
731
732 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
733 $entry = new Entry(new User());
734 $proxy->updateEntry($entry, 'http://0.0.0.0');
735
736 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
737 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
738 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
739 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
740 }
741
742 /**
743 * Data provider for testWithChangedUrl.
744 *
745 * Arrays contain the following values:
746 * $entry_url
747 * $origin_url
748 * $content_url
749 * $expected_entry_url
750 * $expected_origin_url
751 * $expected_domain
752 */
753 public function dataForChangedUrl()
754 {
755 return [
756 'normal' => [
757 'http://0.0.0.0',
758 null,
759 'http://1.1.1.1',
760 'http://1.1.1.1',
761 'http://0.0.0.0',
762 '1.1.1.1',
763 ],
764 'origin already set' => [
765 'http://0.0.0.0',
766 'http://hello',
767 'http://1.1.1.1',
768 'http://1.1.1.1',
769 'http://hello',
770 '1.1.1.1',
771 ],
772 'trailing slash' => [
773 'https://example.com/hello-world',
774 null,
775 'https://example.com/hello-world/',
776 'https://example.com/hello-world/',
777 null,
778 'example.com',
779 ],
780 'query string in fetched content' => [
781 'https://example.org/hello',
782 null,
783 'https://example.org/hello?world=1',
784 'https://example.org/hello?world=1',
785 'https://example.org/hello',
786 'example.org',
787 ],
788 'fragment in fetched content' => [
789 'https://example.org/hello',
790 null,
791 'https://example.org/hello#world',
792 'https://example.org/hello',
793 null,
794 'example.org',
795 ],
796 'fragment and query string in fetched content' => [
797 'https://example.org/hello',
798 null,
799 'https://example.org/hello?foo#world',
800 'https://example.org/hello?foo#world',
801 'https://example.org/hello',
802 'example.org',
803 ],
804 'different path and query string in fetch content' => [
805 'https://example.org/hello',
806 null,
807 'https://example.org/world?foo',
808 'https://example.org/world?foo',
809 'https://example.org/hello',
810 'example.org',
811 ],
812 'feedproxy ignore list test' => [
813 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
814 null,
815 'https://example.org/hello-wallabag',
816 'https://example.org/hello-wallabag',
817 null,
818 'example.org',
819 ],
820 'feedproxy ignore list test with origin url already set' => [
821 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
822 'https://example.org/this-is-source',
823 'https://example.org/hello-wallabag',
824 'https://example.org/hello-wallabag',
825 'https://example.org/this-is-source',
826 'example.org',
827 ],
828 'lemonde ignore pattern test' => [
829 'http://www.lemonde.fr/tiny/url',
830 null,
831 'http://example.com/hello-world',
832 'http://example.com/hello-world',
833 null,
834 'example.com',
835 ],
836 ];
837 }
838
839 /**
840 * @dataProvider dataForChangedUrl
841 */
842 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
843 {
844 $tagger = $this->getTaggerMock();
845 $tagger->expects($this->once())
846 ->method('tag');
847
848 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
849 $entry = new Entry(new User());
850 $entry->setOriginUrl($origin_url);
851 $proxy->updateEntry(
852 $entry,
853 $entry_url,
854 [
855 'html' => false,
856 'title' => '',
857 'url' => $content_url,
858 'content_type' => '',
859 'language' => '',
860 ],
861 true
862 );
863
864 $this->assertSame($expected_entry_url, $entry->getUrl());
865 $this->assertSame($expected_domain, $entry->getDomainName());
866 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
867 }
868
869 /**
870 * https://stackoverflow.com/a/18506801.
871 *
872 * @param $string
873 *
874 * @return string
875 */
876 private function strToHex($string)
877 {
878 $hex = '';
879 for ($i = 0; $i < \strlen($string); ++$i) {
880 $ord = \ord($string[$i]);
881 $hexCode = dechex($ord);
882 $hex .= substr('0' . $hexCode, -2);
883 }
884
885 return strtoupper($hex);
886 }
887
888 /**
889 * https://stackoverflow.com/a/18506801.
890 *
891 * @param $hex
892 *
893 * @return string
894 */
895 private function hexToStr($hex)
896 {
897 $string = '';
898 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
899 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
900 }
901
902 return $string;
903 }
904
534 private function getTaggerMock() 905 private function getTaggerMock()
535 { 906 {
536 return $this->getMockBuilder(RuleBasedTagger::class) 907 return $this->getMockBuilder(RuleBasedTagger::class)