aboutsummaryrefslogtreecommitdiffhomepage
path: root/tests/Wallabag/CoreBundle/Helper
diff options
context:
space:
mode:
Diffstat (limited to 'tests/Wallabag/CoreBundle/Helper')
-rw-r--r--tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php245
-rw-r--r--tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php69
2 files changed, 313 insertions, 1 deletions
diff --git a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
index 398592e1..3f3c60d0 100644
--- a/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
@@ -11,7 +11,6 @@ use Symfony\Component\Validator\ConstraintViolation;
11use Symfony\Component\Validator\ConstraintViolationList; 11use Symfony\Component\Validator\ConstraintViolationList;
12use Symfony\Component\Validator\Validator\RecursiveValidator; 12use Symfony\Component\Validator\Validator\RecursiveValidator;
13use Wallabag\CoreBundle\Entity\Entry; 13use Wallabag\CoreBundle\Entity\Entry;
14use Wallabag\CoreBundle\Entity\Tag;
15use Wallabag\CoreBundle\Helper\ContentProxy; 14use Wallabag\CoreBundle\Helper\ContentProxy;
16use Wallabag\CoreBundle\Helper\RuleBasedTagger; 15use Wallabag\CoreBundle\Helper\RuleBasedTagger;
17use Wallabag\UserBundle\Entity\User; 16use Wallabag\UserBundle\Entity\User;
@@ -532,6 +531,250 @@ class ContentProxyTest extends TestCase
532 $this->assertSame('1.1.1.1', $entry->getDomainName()); 531 $this->assertSame('1.1.1.1', $entry->getDomainName());
533 } 532 }
534 533
534 public function testWebsiteWithValidUTF8Title_doNothing()
535 {
536 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
537 // See http://graphemica.com for more info about the characters
538 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
539 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
540
541 $tagger = $this->getTaggerMock();
542 $tagger->expects($this->once())
543 ->method('tag');
544
545 $graby = $this->getMockBuilder('Graby\Graby')
546 ->setMethods(['fetchContent'])
547 ->disableOriginalConstructor()
548 ->getMock();
549
550 $graby->expects($this->any())
551 ->method('fetchContent')
552 ->willReturn([
553 'html' => false,
554 'title' => $actualTitle,
555 'url' => '',
556 'content_type' => 'text/html',
557 'language' => '',
558 ]);
559
560 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
561 $entry = new Entry(new User());
562 $proxy->updateEntry($entry, 'http://0.0.0.0');
563
564 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
565 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
566 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
567 }
568
569 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
570 {
571 // See http://graphemica.com for more info about the characters
572 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
573 // The correct UTF-8 € character (U+20AC) is E282AC
574 $actualTitle = $this->hexToStr('61' . '80' . '62');
575
576 $tagger = $this->getTaggerMock();
577 $tagger->expects($this->once())
578 ->method('tag');
579
580 $graby = $this->getMockBuilder('Graby\Graby')
581 ->setMethods(['fetchContent'])
582 ->disableOriginalConstructor()
583 ->getMock();
584
585 $graby->expects($this->any())
586 ->method('fetchContent')
587 ->willReturn([
588 'html' => false,
589 'title' => $actualTitle,
590 'url' => '',
591 'content_type' => 'text/html',
592 'language' => '',
593 ]);
594
595 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
596 $entry = new Entry(new User());
597 $proxy->updateEntry($entry, 'http://0.0.0.0');
598
599 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
600 $expectedTitle = '61' . '62';
601 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
602 }
603
604 public function testPdfWithUTF16BETitle_convertToUTF8()
605 {
606 // See http://graphemica.com for more info about the characters
607 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
608 $actualTitle = $this->hexToStr('D83DDE3B');
609
610 $tagger = $this->getTaggerMock();
611 $tagger->expects($this->once())
612 ->method('tag');
613
614 $graby = $this->getMockBuilder('Graby\Graby')
615 ->setMethods(['fetchContent'])
616 ->disableOriginalConstructor()
617 ->getMock();
618
619 $graby->expects($this->any())
620 ->method('fetchContent')
621 ->willReturn([
622 'html' => false,
623 'title' => $actualTitle,
624 'url' => '',
625 'content_type' => 'application/pdf',
626 'language' => '',
627 ]);
628
629 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
630 $entry = new Entry(new User());
631 $proxy->updateEntry($entry, 'http://0.0.0.0');
632
633 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
634 $expectedTitle = 'F09F98BB';
635 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
636 }
637
638 public function testPdfWithUTF8Title_doNothing()
639 {
640 // See http://graphemica.com for more info about the characters
641 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
642 $actualTitle = $this->hexToStr('F09F98BB');
643
644 $tagger = $this->getTaggerMock();
645 $tagger->expects($this->once())
646 ->method('tag');
647
648 $graby = $this->getMockBuilder('Graby\Graby')
649 ->setMethods(['fetchContent'])
650 ->disableOriginalConstructor()
651 ->getMock();
652
653 $graby->expects($this->any())
654 ->method('fetchContent')
655 ->willReturn([
656 'html' => false,
657 'title' => $actualTitle,
658 'url' => '',
659 'content_type' => 'application/pdf',
660 'language' => '',
661 ]);
662
663 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
664 $entry = new Entry(new User());
665 $proxy->updateEntry($entry, 'http://0.0.0.0');
666
667 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
668 $expectedTitle = 'F09F98BB';
669 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
670 }
671
672 public function testPdfWithWINDOWS1252Title_convertToUTF8()
673 {
674 // See http://graphemica.com for more info about the characters
675 // '€' (80) in hexadecimal and WINDOWS-1252
676 $actualTitle = $this->hexToStr('80');
677
678 $tagger = $this->getTaggerMock();
679 $tagger->expects($this->once())
680 ->method('tag');
681
682 $graby = $this->getMockBuilder('Graby\Graby')
683 ->setMethods(['fetchContent'])
684 ->disableOriginalConstructor()
685 ->getMock();
686
687 $graby->expects($this->any())
688 ->method('fetchContent')
689 ->willReturn([
690 'html' => false,
691 'title' => $actualTitle,
692 'url' => '',
693 'content_type' => 'application/pdf',
694 'language' => '',
695 ]);
696
697 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
698 $entry = new Entry(new User());
699 $proxy->updateEntry($entry, 'http://0.0.0.0');
700
701 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
702 $expectedTitle = 'E282AC';
703 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
704 }
705
706 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
707 {
708 // See http://graphemica.com for more info about the characters
709 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
710 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
711 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
712
713 $tagger = $this->getTaggerMock();
714 $tagger->expects($this->once())
715 ->method('tag');
716
717 $graby = $this->getMockBuilder('Graby\Graby')
718 ->setMethods(['fetchContent'])
719 ->disableOriginalConstructor()
720 ->getMock();
721
722 $graby->expects($this->any())
723 ->method('fetchContent')
724 ->willReturn([
725 'html' => false,
726 'title' => $actualTitle,
727 'url' => '',
728 'content_type' => 'application/pdf',
729 'language' => '',
730 ]);
731
732 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
733 $entry = new Entry(new User());
734 $proxy->updateEntry($entry, 'http://0.0.0.0');
735
736 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
737 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
738 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
739 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
740 }
741
742 /**
743 * https://stackoverflow.com/a/18506801.
744 *
745 * @param $string
746 *
747 * @return string
748 */
749 private function strToHex($string)
750 {
751 $hex = '';
752 for ($i = 0; $i < \strlen($string); ++$i) {
753 $ord = \ord($string[$i]);
754 $hexCode = dechex($ord);
755 $hex .= substr('0' . $hexCode, -2);
756 }
757
758 return strtoupper($hex);
759 }
760
761 /**
762 * https://stackoverflow.com/a/18506801.
763 *
764 * @param $hex
765 *
766 * @return string
767 */
768 private function hexToStr($hex)
769 {
770 $string = '';
771 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
772 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
773 }
774
775 return $string;
776 }
777
535 private function getTaggerMock() 778 private function getTaggerMock()
536 { 779 {
537 return $this->getMockBuilder(RuleBasedTagger::class) 780 return $this->getMockBuilder(RuleBasedTagger::class)
diff --git a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
index 0e1d296b..cda5f843 100644
--- a/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
+++ b/tests/Wallabag/CoreBundle/Helper/DownloadImagesTest.php
@@ -183,4 +183,73 @@ class DownloadImagesTest extends TestCase
183 $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image'); 183 $this->assertContains('http://wallabag.io/assets/images/9/b/9b0ead26/', $res, 'Content-Type was empty but data is ok for an image');
184 $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']); 184 $this->assertContains('DownloadImages: Checking extension (alternative)', $logHandler->getRecords()[3]['message']);
185 } 185 }
186
187 public function testProcessImageWithSrcset()
188 {
189 $client = new Client();
190
191 $mock = new Mock([
192 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
193 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
194 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
195 ]);
196
197 $client->getEmitter()->attach($mock);
198
199 $logHandler = new TestHandler();
200 $logger = new Logger('test', [$logHandler]);
201
202 $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
203 $res = $download->processHtml(123, '<p><img class="alignnone wp-image-1153" src="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg" alt="" width="628" height="444" srcset="http://piketty.blog.lemonde.fr/files/2017/10/F1FR-530x375.jpg 530w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-768x543.jpg 768w, http://piketty.blog.lemonde.fr/files/2017/10/F1FR-900x636.jpg 900w" sizes="(max-width: 628px) 100vw, 628px" /></p>', 'http://piketty.blog.lemonde.fr/2017/10/12/budget-2018-la-jeunesse-sacrifiee/');
204
205 $this->assertNotContains('http://piketty.blog.lemonde.fr/', $res, 'Image srcset attribute were not replaced');
206 }
207
208 public function testProcessImageWithTrickySrcset()
209 {
210 $client = new Client();
211
212 $mock = new Mock([
213 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
214 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
215 new Response(200, ['content-type' => 'image/jpeg'], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
216 ]);
217
218 $client->getEmitter()->attach($mock);
219
220 $logHandler = new TestHandler();
221 $logger = new Logger('test', [$logHandler]);
222
223 $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
224 $res = $download->processHtml(123, '<figure id="post-257260" class="align-none media-257260"><img src="https://cdn.css-tricks.com/wp-content/uploads/2017/08/the-critical-request.png" srcset="https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_1000,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 1000w, https://res.cloudinary.com/css-tricks/image/upload/c_scale,w_200,f_auto,q_auto/v1501594717/the-critical-request_bqdfaa.png 200w" sizes="(min-width: 1850px) calc( (100vw - 555px) / 3 )
225 (min-width: 1251px) calc( (100vw - 530px) / 2 )
226 (min-width: 1086px) calc(100vw - 480px)
227 (min-width: 626px) calc(100vw - 335px)
228 calc(100vw - 30px)" alt="" /></figure>', 'https://css-tricks.com/the-critical-request/');
229
230 $this->assertNotContains('f_auto,q_auto', $res, 'Image srcset attribute were not replaced');
231 }
232
233 public function testProcessImageWithNullPath()
234 {
235 $client = new Client();
236
237 $mock = new Mock([
238 new Response(200, ['content-type' => null], Stream::factory(file_get_contents(__DIR__ . '/../fixtures/image-no-content-type.jpg'))),
239 ]);
240
241 $client->getEmitter()->attach($mock);
242
243 $logHandler = new TestHandler();
244 $logger = new Logger('test', [$logHandler]);
245
246 $download = new DownloadImages($client, sys_get_temp_dir() . '/wallabag_test', 'http://wallabag.io/', $logger);
247
248 $res = $download->processSingleImage(
249 123,
250 null,
251 'https://framablog.org/2018/06/30/engagement-atypique/'
252 );
253 $this->assertFalse($res);
254 }
186} 255}