3 namespace Tests\Wallabag\CoreBundle\Helper
;
6 use Monolog\Handler\TestHandler
;
8 use PHPUnit\Framework\TestCase
;
9 use Psr\Log\NullLogger
;
10 use Symfony\Component\Validator\ConstraintViolation
;
11 use Symfony\Component\Validator\ConstraintViolationList
;
12 use Symfony\Component\Validator\Validator\RecursiveValidator
;
13 use Wallabag\CoreBundle\Entity\Entry
;
14 use Wallabag\CoreBundle\Helper\ContentProxy
;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger
;
16 use Wallabag\UserBundle\Entity\User
;
18 class ContentProxyTest
extends TestCase
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
22 public function testWithBadUrl()
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
33 $graby->expects($this->any())
34 ->method('fetchContent')
45 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
46 $entry = new Entry(new User());
47 $proxy->updateEntry($entry, 'http://user@:80');
49 $this->assertSame('http://user@:80', $entry->getUrl());
50 $this->assertEmpty($entry->getTitle());
51 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
52 $this->assertEmpty($entry->getPreviewPicture());
53 $this->assertEmpty($entry->getMimetype());
54 $this->assertEmpty($entry->getLanguage());
55 $this->assertSame(0.0, $entry->getReadingTime());
56 $this->assertNull($entry->getDomainName());
59 public function testWithEmptyContent()
61 $tagger = $this->getTaggerMock();
62 $tagger->expects($this->once())
65 $graby = $this->getMockBuilder('Graby\Graby')
66 ->setMethods(['fetchContent'])
67 ->disableOriginalConstructor()
70 $graby->expects($this->any())
71 ->method('fetchContent')
82 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
83 $entry = new Entry(new User());
84 $proxy->updateEntry($entry, 'http://0.0.0.0');
86 $this->assertSame('http://0.0.0.0', $entry->getUrl());
87 $this->assertEmpty($entry->getTitle());
88 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
89 $this->assertEmpty($entry->getPreviewPicture());
90 $this->assertEmpty($entry->getMimetype());
91 $this->assertEmpty($entry->getLanguage());
92 $this->assertSame(0.0, $entry->getReadingTime());
93 $this->assertSame('0.0.0.0', $entry->getDomainName());
96 public function testWithEmptyContentButOG()
98 $tagger = $this->getTaggerMock();
99 $tagger->expects($this->once())
102 $graby = $this->getMockBuilder('Graby\Graby')
103 ->setMethods(['fetchContent'])
104 ->disableOriginalConstructor()
107 $graby->expects($this->any())
108 ->method('fetchContent')
111 'title' => 'my title',
114 'content-type' => '',
118 'description' => 'desc',
121 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
122 $entry = new Entry(new User());
123 $proxy->updateEntry($entry, 'http://domain.io');
125 $this->assertSame('http://domain.io', $entry->getUrl());
126 $this->assertSame('my title', $entry->getTitle());
127 $this->assertSame($this->fetchingErrorMessage
. '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
128 $this->assertEmpty($entry->getPreviewPicture());
129 $this->assertEmpty($entry->getLanguage());
130 $this->assertEmpty($entry->getHttpStatus());
131 $this->assertEmpty($entry->getMimetype());
132 $this->assertSame(0.0, $entry->getReadingTime());
133 $this->assertSame('domain.io', $entry->getDomainName());
136 public function testWithContent()
138 $tagger = $this->getTaggerMock();
139 $tagger->expects($this->once())
142 $graby = $this->getMockBuilder('Graby\Graby')
143 ->setMethods(['fetchContent'])
144 ->disableOriginalConstructor()
147 $graby->expects($this->any())
148 ->method('fetchContent')
150 'html' => str_repeat('this is my content', 325),
151 'title' => 'this is my title',
152 'url' => 'http://1.1.1.1',
155 'description' => 'OG desc',
156 'image' => 'http://3.3.3.3/cover.jpg',
158 'content-type' => 'text/html',
162 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
163 $entry = new Entry(new User());
164 $proxy->updateEntry($entry, 'http://0.0.0.0');
166 $this->assertSame('http://1.1.1.1', $entry->getUrl());
167 $this->assertSame('this is my title', $entry->getTitle());
168 $this->assertContains('content', $entry->getContent());
169 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
170 $this->assertSame('text/html', $entry->getMimetype());
171 $this->assertSame('fr', $entry->getLanguage());
172 $this->assertSame('200', $entry->getHttpStatus());
173 $this->assertSame(4.0, $entry->getReadingTime());
174 $this->assertSame('1.1.1.1', $entry->getDomainName());
177 public function testWithContentAndNoOgImage()
179 $tagger = $this->getTaggerMock();
180 $tagger->expects($this->once())
183 $graby = $this->getMockBuilder('Graby\Graby')
184 ->setMethods(['fetchContent'])
185 ->disableOriginalConstructor()
188 $graby->expects($this->any())
189 ->method('fetchContent')
191 'html' => str_repeat('this is my content', 325),
192 'title' => 'this is my title',
193 'url' => 'http://1.1.1.1',
196 'description' => 'OG desc',
199 'content-type' => 'text/html',
203 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
204 $entry = new Entry(new User());
205 $proxy->updateEntry($entry, 'http://0.0.0.0');
207 $this->assertSame('http://1.1.1.1', $entry->getUrl());
208 $this->assertSame('this is my title', $entry->getTitle());
209 $this->assertContains('content', $entry->getContent());
210 $this->assertNull($entry->getPreviewPicture());
211 $this->assertSame('text/html', $entry->getMimetype());
212 $this->assertSame('fr', $entry->getLanguage());
213 $this->assertSame('200', $entry->getHttpStatus());
214 $this->assertSame(4.0, $entry->getReadingTime());
215 $this->assertSame('1.1.1.1', $entry->getDomainName());
218 public function testWithContentAndContentImage()
220 $tagger = $this->getTaggerMock();
221 $tagger->expects($this->once())
224 $graby = $this->getMockBuilder('Graby\Graby')
225 ->setMethods(['fetchContent'])
226 ->disableOriginalConstructor()
229 $graby->expects($this->any())
230 ->method('fetchContent')
232 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
233 'title' => 'this is my title',
234 'url' => 'http://1.1.1.1',
236 'content-type' => 'text/html',
243 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
244 $entry = new Entry(new User());
245 $proxy->updateEntry($entry, 'http://0.0.0.0');
247 $this->assertSame('http://1.1.1.1', $entry->getUrl());
248 $this->assertSame('this is my title', $entry->getTitle());
249 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
250 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
251 $this->assertSame('text/html', $entry->getMimetype());
252 $this->assertSame('fr', $entry->getLanguage());
253 $this->assertSame('200', $entry->getHttpStatus());
254 $this->assertSame(0.0, $entry->getReadingTime());
255 $this->assertSame('1.1.1.1', $entry->getDomainName());
258 public function testWithContentImageAndOgImage()
260 $tagger = $this->getTaggerMock();
261 $tagger->expects($this->once())
264 $graby = $this->getMockBuilder('Graby\Graby')
265 ->setMethods(['fetchContent'])
266 ->disableOriginalConstructor()
269 $graby->expects($this->any())
270 ->method('fetchContent')
272 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
273 'title' => 'this is my title',
274 'url' => 'http://1.1.1.1',
276 'content-type' => 'text/html',
280 'image' => 'http://3.3.3.3/cover.jpg',
283 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
284 $entry = new Entry(new User());
285 $proxy->updateEntry($entry, 'http://0.0.0.0');
287 $this->assertSame('http://1.1.1.1', $entry->getUrl());
288 $this->assertSame('this is my title', $entry->getTitle());
289 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
290 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
291 $this->assertSame('text/html', $entry->getMimetype());
292 $this->assertSame('fr', $entry->getLanguage());
293 $this->assertSame('200', $entry->getHttpStatus());
294 $this->assertSame(0.0, $entry->getReadingTime());
295 $this->assertSame('1.1.1.1', $entry->getDomainName());
298 public function testWithContentAndBadLanguage()
300 $tagger = $this->getTaggerMock();
301 $tagger->expects($this->once())
304 $validator = $this->getValidator(false);
305 $validator->expects($this->once())
307 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
309 $graby = $this->getMockBuilder('Graby\Graby')
310 ->setMethods(['fetchContent'])
311 ->disableOriginalConstructor()
314 $graby->expects($this->any())
315 ->method('fetchContent')
317 'html' => str_repeat('this is my content', 325),
318 'title' => 'this is my title',
319 'url' => 'http://1.1.1.1',
320 'language' => 'dontexist',
323 'content-type' => 'text/html',
327 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
328 $entry = new Entry(new User());
329 $proxy->updateEntry($entry, 'http://0.0.0.0');
331 $this->assertSame('http://1.1.1.1', $entry->getUrl());
332 $this->assertSame('this is my title', $entry->getTitle());
333 $this->assertContains('content', $entry->getContent());
334 $this->assertSame('text/html', $entry->getMimetype());
335 $this->assertNull($entry->getLanguage());
336 $this->assertSame('200', $entry->getHttpStatus());
337 $this->assertSame(4.0, $entry->getReadingTime());
338 $this->assertSame('1.1.1.1', $entry->getDomainName());
341 public function testWithContentAndBadOgImage()
343 $tagger = $this->getTaggerMock();
344 $tagger->expects($this->once())
347 $validator = $this->getValidator(false);
348 $validator->expects($this->exactly(2))
350 ->will($this->onConsecutiveCalls(
351 new ConstraintViolationList(),
352 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
355 $graby = $this->getMockBuilder('Graby\Graby')
356 ->setMethods(['fetchContent'])
357 ->disableOriginalConstructor()
360 $graby->expects($this->any())
361 ->method('fetchContent')
363 'html' => str_repeat('this is my content', 325),
364 'title' => 'this is my title',
365 'url' => 'http://1.1.1.1',
367 'content-type' => 'text/html',
371 'description' => 'OG desc',
372 'image' => 'https://',
375 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
376 $entry = new Entry(new User());
377 $proxy->updateEntry($entry, 'http://0.0.0.0');
379 $this->assertSame('http://1.1.1.1', $entry->getUrl());
380 $this->assertSame('this is my title', $entry->getTitle());
381 $this->assertContains('content', $entry->getContent());
382 $this->assertNull($entry->getPreviewPicture());
383 $this->assertSame('text/html', $entry->getMimetype());
384 $this->assertSame('fr', $entry->getLanguage());
385 $this->assertSame('200', $entry->getHttpStatus());
386 $this->assertSame(4.0, $entry->getReadingTime());
387 $this->assertSame('1.1.1.1', $entry->getDomainName());
390 public function testWithForcedContent()
392 $tagger = $this->getTaggerMock();
393 $tagger->expects($this->once())
396 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
397 $entry = new Entry(new User());
402 'html' => str_repeat('this is my content', 325),
403 'title' => 'this is my title',
404 'url' => 'http://1.1.1.1',
406 'date' => '1395635872',
407 'authors' => ['Jeremy', 'Nico', 'Thomas'],
409 'cache-control' => 'no-cache',
410 'content-type' => 'text/html',
415 $this->assertSame('http://1.1.1.1', $entry->getUrl());
416 $this->assertSame('this is my title', $entry->getTitle());
417 $this->assertContains('content', $entry->getContent());
418 $this->assertSame('text/html', $entry->getMimetype());
419 $this->assertSame('fr', $entry->getLanguage());
420 $this->assertSame(4.0, $entry->getReadingTime());
421 $this->assertSame('1.1.1.1', $entry->getDomainName());
422 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
423 $this->assertContains('Jeremy', $entry->getPublishedBy());
424 $this->assertContains('Nico', $entry->getPublishedBy());
425 $this->assertContains('Thomas', $entry->getPublishedBy());
426 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
427 $this->assertContains('no-cache', $entry->getHeaders());
430 public function testWithForcedContentAndDatetime()
432 $tagger = $this->getTaggerMock();
433 $tagger->expects($this->once())
436 $logHandler = new TestHandler();
437 $logger = new Logger('test', [$logHandler]);
439 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
440 $entry = new Entry(new User());
445 'html' => str_repeat('this is my content', 325),
446 'title' => 'this is my title',
447 'url' => 'http://1.1.1.1',
449 'date' => '2016-09-08T11:55:58+0200',
451 'content-type' => 'text/html',
456 $this->assertSame('http://1.1.1.1', $entry->getUrl());
457 $this->assertSame('this is my title', $entry->getTitle());
458 $this->assertContains('content', $entry->getContent());
459 $this->assertSame('text/html', $entry->getMimetype());
460 $this->assertSame('fr', $entry->getLanguage());
461 $this->assertSame(4.0, $entry->getReadingTime());
462 $this->assertSame('1.1.1.1', $entry->getDomainName());
463 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
466 public function testWithForcedContentAndBadDate()
468 $tagger = $this->getTaggerMock();
469 $tagger->expects($this->once())
472 $logger = new Logger('foo');
473 $handler = new TestHandler();
474 $logger->pushHandler($handler);
476 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
477 $entry = new Entry(new User());
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
486 'date' => '01 02 2012',
488 'content-type' => 'text/html',
493 $this->assertSame('http://1.1.1.1', $entry->getUrl());
494 $this->assertSame('this is my title', $entry->getTitle());
495 $this->assertContains('content', $entry->getContent());
496 $this->assertSame('text/html', $entry->getMimetype());
497 $this->assertSame('fr', $entry->getLanguage());
498 $this->assertSame(4.0, $entry->getReadingTime());
499 $this->assertSame('1.1.1.1', $entry->getDomainName());
500 $this->assertNull($entry->getPublishedAt());
502 $records = $handler->getRecords();
504 $this->assertCount(3, $records);
505 $this->assertContains('Error while defining date', $records[0]['message']);
508 public function testTaggerThrowException()
510 $tagger = $this->getTaggerMock();
511 $tagger->expects($this->once())
513 ->will($this->throwException(new \
Exception()));
515 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
516 $entry = new Entry(new User());
521 'html' => str_repeat('this is my content', 325),
522 'title' => 'this is my title',
523 'url' => 'http://1.1.1.1',
526 'content-type' => 'text/html',
531 $this->assertCount(0, $entry->getTags());
534 public function dataForCrazyHtml()
537 'script and comment' => [
538 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
542 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
549 * @dataProvider dataForCrazyHtml
551 public function testWithCrazyHtmlContent($html, $escapedString)
553 $tagger = $this->getTaggerMock();
554 $tagger->expects($this->once())
557 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
558 $entry = new Entry(new User());
564 'title' => 'this is my title',
565 'url' => 'http://1.1.1.1',
568 //'og_title' => 'my OG title',
569 'description' => 'OG desc',
570 'image' => 'http://3.3.3.3/cover.jpg',
572 'content-type' => 'text/html',
577 $this->assertSame('http://1.1.1.1', $entry->getUrl());
578 $this->assertSame('this is my title', $entry->getTitle());
579 $this->assertNotContains($escapedString, $entry->getContent());
580 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
581 $this->assertSame('text/html', $entry->getMimetype());
582 $this->assertSame('fr', $entry->getLanguage());
583 $this->assertSame('200', $entry->getHttpStatus());
584 $this->assertSame('1.1.1.1', $entry->getDomainName());
587 public function testWithImageAsContent()
589 $tagger = $this->getTaggerMock();
590 $tagger->expects($this->once())
593 $graby = $this->getMockBuilder('Graby\Graby')
594 ->setMethods(['fetchContent'])
595 ->disableOriginalConstructor()
598 $graby->expects($this->any())
599 ->method('fetchContent')
601 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
602 'title' => 'this is my title',
603 'url' => 'http://1.1.1.1/image.jpg',
606 'content-type' => 'image/jpeg',
610 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
611 $entry = new Entry(new User());
612 $proxy->updateEntry($entry, 'http://0.0.0.0');
614 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
615 $this->assertSame('this is my title', $entry->getTitle());
616 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
617 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
618 $this->assertSame('image/jpeg', $entry->getMimetype());
619 $this->assertSame('200', $entry->getHttpStatus());
620 $this->assertSame('1.1.1.1', $entry->getDomainName());
623 public function testWebsiteWithValidUTF8Title_doNothing()
625 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
626 // See http://graphemica.com for more info about the characters
627 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
628 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
630 $tagger = $this->getTaggerMock();
631 $tagger->expects($this->once())
634 $graby = $this->getMockBuilder('Graby\Graby')
635 ->setMethods(['fetchContent'])
636 ->disableOriginalConstructor()
639 $graby->expects($this->any())
640 ->method('fetchContent')
643 'title' => $actualTitle,
646 'content-type' => 'text/html',
651 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
652 $entry = new Entry(new User());
653 $proxy->updateEntry($entry, 'http://0.0.0.0');
655 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
656 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
657 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
660 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
662 // See http://graphemica.com for more info about the characters
663 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
664 // The correct UTF-8 € character (U+20AC) is E282AC
665 $actualTitle = $this->hexToStr('61' . '80' . '62');
667 $tagger = $this->getTaggerMock();
668 $tagger->expects($this->once())
671 $graby = $this->getMockBuilder('Graby\Graby')
672 ->setMethods(['fetchContent'])
673 ->disableOriginalConstructor()
676 $graby->expects($this->any())
677 ->method('fetchContent')
680 'title' => $actualTitle,
683 'content-type' => 'text/html',
688 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
689 $entry = new Entry(new User());
690 $proxy->updateEntry($entry, 'http://0.0.0.0');
692 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
693 $expectedTitle = '61' . '62';
694 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
697 public function testPdfWithUTF16BETitle_convertToUTF8()
699 // See http://graphemica.com for more info about the characters
700 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
701 $actualTitle = $this->hexToStr('D83DDE3B');
703 $tagger = $this->getTaggerMock();
704 $tagger->expects($this->once())
707 $graby = $this->getMockBuilder('Graby\Graby')
708 ->setMethods(['fetchContent'])
709 ->disableOriginalConstructor()
712 $graby->expects($this->any())
713 ->method('fetchContent')
716 'title' => $actualTitle,
719 'content-type' => 'application/pdf',
724 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
725 $entry = new Entry(new User());
726 $proxy->updateEntry($entry, 'http://0.0.0.0');
728 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
729 $expectedTitle = 'F09F98BB';
730 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
733 public function testPdfWithUTF8Title_doNothing()
735 // See http://graphemica.com for more info about the characters
736 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
737 $actualTitle = $this->hexToStr('F09F98BB');
739 $tagger = $this->getTaggerMock();
740 $tagger->expects($this->once())
743 $graby = $this->getMockBuilder('Graby\Graby')
744 ->setMethods(['fetchContent'])
745 ->disableOriginalConstructor()
748 $graby->expects($this->any())
749 ->method('fetchContent')
752 'title' => $actualTitle,
755 'content-type' => 'application/pdf',
760 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
761 $entry = new Entry(new User());
762 $proxy->updateEntry($entry, 'http://0.0.0.0');
764 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
765 $expectedTitle = 'F09F98BB';
766 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
769 public function testPdfWithWINDOWS1252Title_convertToUTF8()
771 // See http://graphemica.com for more info about the characters
772 // '€' (80) in hexadecimal and WINDOWS-1252
773 $actualTitle = $this->hexToStr('80');
775 $tagger = $this->getTaggerMock();
776 $tagger->expects($this->once())
779 $graby = $this->getMockBuilder('Graby\Graby')
780 ->setMethods(['fetchContent'])
781 ->disableOriginalConstructor()
784 $graby->expects($this->any())
785 ->method('fetchContent')
788 'title' => $actualTitle,
791 'content-type' => 'application/pdf',
796 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
797 $entry = new Entry(new User());
798 $proxy->updateEntry($entry, 'http://0.0.0.0');
800 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
801 $expectedTitle = 'E282AC';
802 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
805 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
807 // See http://graphemica.com for more info about the characters
808 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
809 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
810 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
812 $tagger = $this->getTaggerMock();
813 $tagger->expects($this->once())
816 $graby = $this->getMockBuilder('Graby\Graby')
817 ->setMethods(['fetchContent'])
818 ->disableOriginalConstructor()
821 $graby->expects($this->any())
822 ->method('fetchContent')
825 'title' => $actualTitle,
828 'content-type' => 'application/pdf',
833 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
834 $entry = new Entry(new User());
835 $proxy->updateEntry($entry, 'http://0.0.0.0');
837 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
838 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
839 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
840 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
844 * Data provider for testWithChangedUrl.
846 * Arrays contain the following values:
850 * $expected_entry_url
851 * $expected_origin_url
854 public function dataForChangedUrl()
865 'origin already set' => [
873 'trailing slash' => [
874 'https://example.com/hello-world',
876 'https://example.com/hello-world/',
877 'https://example.com/hello-world/',
881 'query string in fetched content' => [
882 'https://example.org/hello',
884 'https://example.org/hello?world=1',
885 'https://example.org/hello?world=1',
886 'https://example.org/hello',
889 'fragment in fetched content' => [
890 'https://example.org/hello',
892 'https://example.org/hello#world',
893 'https://example.org/hello',
897 'fragment and query string in fetched content' => [
898 'https://example.org/hello',
900 'https://example.org/hello?foo#world',
901 'https://example.org/hello?foo#world',
902 'https://example.org/hello',
905 'different path and query string in fetch content' => [
906 'https://example.org/hello',
908 'https://example.org/world?foo',
909 'https://example.org/world?foo',
910 'https://example.org/hello',
913 'feedproxy ignore list test' => [
914 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
916 'https://example.org/hello-wallabag',
917 'https://example.org/hello-wallabag',
921 'feedproxy ignore list test with origin url already set' => [
922 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
923 'https://example.org/this-is-source',
924 'https://example.org/hello-wallabag',
925 'https://example.org/hello-wallabag',
926 'https://example.org/this-is-source',
929 'lemonde ignore pattern test' => [
930 'http://www.lemonde.fr/tiny/url',
932 'http://example.com/hello-world',
933 'http://example.com/hello-world',
941 * @dataProvider dataForChangedUrl
943 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
945 $tagger = $this->getTaggerMock();
946 $tagger->expects($this->once())
949 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
950 $entry = new Entry(new User());
951 $entry->setOriginUrl($origin_url);
958 'url' => $content_url,
960 'content-type' => '',
967 $this->assertSame($expected_entry_url, $entry->getUrl());
968 $this->assertSame($expected_domain, $entry->getDomainName());
969 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
973 * https://stackoverflow.com/a/18506801.
979 private function strToHex($string)
982 for ($i = 0; $i < \
strlen($string); ++
$i) {
983 $ord = \
ord($string[$i]);
984 $hexCode = dechex($ord);
985 $hex .= substr('0' . $hexCode, -2);
988 return strtoupper($hex);
992 * Convert hex to string.
994 * @see https://stackoverflow.com/a/18506801
1000 private function hexToStr($hex)
1003 for ($i = 0; $i < \
strlen($hex) - 1; $i +
= 2) {
1004 $string .= \
chr(hexdec($hex[$i] . $hex[$i +
1]));
1010 private function getTaggerMock()
1012 return $this->getMockBuilder(RuleBasedTagger
::class)
1013 ->setMethods(['tag'])
1014 ->disableOriginalConstructor()
1018 private function getLogger()
1020 return new NullLogger();
1023 private function getValidator($withDefaultMock = true)
1025 $mock = $this->getMockBuilder(RecursiveValidator
::class)
1026 ->setMethods(['validate'])
1027 ->disableOriginalConstructor()
1030 if ($withDefaultMock) {
1031 $mock->expects($this->any())
1032 ->method('validate')
1033 ->willReturn(new ConstraintViolationList());