3 namespace Tests\Wallabag\CoreBundle\Helper
;
6 use Monolog\Handler\TestHandler
;
8 use PHPUnit\Framework\TestCase
;
9 use Psr\Log\NullLogger
;
10 use Symfony\Component\Validator\ConstraintViolation
;
11 use Symfony\Component\Validator\ConstraintViolationList
;
12 use Symfony\Component\Validator\Validator\RecursiveValidator
;
13 use Wallabag\CoreBundle\Entity\Entry
;
14 use Wallabag\CoreBundle\Helper\ContentProxy
;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger
;
16 use Wallabag\UserBundle\Entity\User
;
18 class ContentProxyTest
extends TestCase
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
22 public function testWithBadUrl()
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
33 $graby->expects($this->any())
34 ->method('fetchContent')
43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
47 $this->assertSame('http://user@:80', $entry->getUrl());
48 $this->assertEmpty($entry->getTitle());
49 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
53 $this->assertSame(0.0, $entry->getReadingTime());
54 $this->assertNull($entry->getDomainName());
57 public function testWithEmptyContent()
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
63 $graby = $this->getMockBuilder('Graby\Graby')
64 ->setMethods(['fetchContent'])
65 ->disableOriginalConstructor()
68 $graby->expects($this->any())
69 ->method('fetchContent')
78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
83 $this->assertEmpty($entry->getTitle());
84 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
87 $this->assertEmpty($entry->getLanguage());
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
92 public function testWithEmptyContentButOG()
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
98 $graby = $this->getMockBuilder('Graby\Graby')
99 ->setMethods(['fetchContent'])
100 ->disableOriginalConstructor()
103 $graby->expects($this->any())
104 ->method('fetchContent')
107 'title' => 'my title',
109 'content_type' => '',
112 'description' => 'desc',
115 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
116 $entry = new Entry(new User());
117 $proxy->updateEntry($entry, 'http://domain.io');
119 $this->assertSame('http://domain.io', $entry->getUrl());
120 $this->assertSame('my title', $entry->getTitle());
121 $this->assertSame($this->fetchingErrorMessage
. '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
122 $this->assertEmpty($entry->getPreviewPicture());
123 $this->assertEmpty($entry->getLanguage());
124 $this->assertEmpty($entry->getHttpStatus());
125 $this->assertEmpty($entry->getMimetype());
126 $this->assertSame(0.0, $entry->getReadingTime());
127 $this->assertSame('domain.io', $entry->getDomainName());
130 public function testWithContent()
132 $tagger = $this->getTaggerMock();
133 $tagger->expects($this->once())
136 $graby = $this->getMockBuilder('Graby\Graby')
137 ->setMethods(['fetchContent'])
138 ->disableOriginalConstructor()
141 $graby->expects($this->any())
142 ->method('fetchContent')
144 'html' => str_repeat('this is my content', 325),
145 'title' => 'this is my title',
146 'url' => 'http://1.1.1.1',
149 'description' => 'OG desc',
150 'image' => 'http://3.3.3.3/cover.jpg',
152 'content-type' => 'text/html',
156 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
157 $entry = new Entry(new User());
158 $proxy->updateEntry($entry, 'http://0.0.0.0');
160 $this->assertSame('http://1.1.1.1', $entry->getUrl());
161 $this->assertSame('this is my title', $entry->getTitle());
162 $this->assertContains('content', $entry->getContent());
163 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
164 $this->assertSame('text/html', $entry->getMimetype());
165 $this->assertSame('fr', $entry->getLanguage());
166 $this->assertSame('200', $entry->getHttpStatus());
167 $this->assertSame(4.0, $entry->getReadingTime());
168 $this->assertSame('1.1.1.1', $entry->getDomainName());
171 public function testWithContentAndNoOgImage()
173 $tagger = $this->getTaggerMock();
174 $tagger->expects($this->once())
177 $graby = $this->getMockBuilder('Graby\Graby')
178 ->setMethods(['fetchContent'])
179 ->disableOriginalConstructor()
182 $graby->expects($this->any())
183 ->method('fetchContent')
185 'html' => str_repeat('this is my content', 325),
186 'title' => 'this is my title',
187 'url' => 'http://1.1.1.1',
190 'description' => 'OG desc',
193 'content-type' => 'text/html',
197 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
198 $entry = new Entry(new User());
199 $proxy->updateEntry($entry, 'http://0.0.0.0');
201 $this->assertSame('http://1.1.1.1', $entry->getUrl());
202 $this->assertSame('this is my title', $entry->getTitle());
203 $this->assertContains('content', $entry->getContent());
204 $this->assertNull($entry->getPreviewPicture());
205 $this->assertSame('text/html', $entry->getMimetype());
206 $this->assertSame('fr', $entry->getLanguage());
207 $this->assertSame('200', $entry->getHttpStatus());
208 $this->assertSame(4.0, $entry->getReadingTime());
209 $this->assertSame('1.1.1.1', $entry->getDomainName());
212 public function testWithContentAndContentImage()
214 $tagger = $this->getTaggerMock();
215 $tagger->expects($this->once())
218 $graby = $this->getMockBuilder('Graby\Graby')
219 ->setMethods(['fetchContent'])
220 ->disableOriginalConstructor()
223 $graby->expects($this->any())
224 ->method('fetchContent')
226 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
227 'title' => 'this is my title',
228 'url' => 'http://1.1.1.1',
229 'content_type' => 'text/html',
233 'og_title' => 'my OG title',
234 'og_description' => 'OG desc',
239 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
240 $entry = new Entry(new User());
241 $proxy->updateEntry($entry, 'http://0.0.0.0');
243 $this->assertSame('http://1.1.1.1', $entry->getUrl());
244 $this->assertSame('this is my title', $entry->getTitle());
245 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
246 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
247 $this->assertSame('text/html', $entry->getMimetype());
248 $this->assertSame('fr', $entry->getLanguage());
249 $this->assertSame('200', $entry->getHttpStatus());
250 $this->assertSame(0.0, $entry->getReadingTime());
251 $this->assertSame('1.1.1.1', $entry->getDomainName());
254 public function testWithContentImageAndOgImage()
256 $tagger = $this->getTaggerMock();
257 $tagger->expects($this->once())
260 $graby = $this->getMockBuilder('Graby\Graby')
261 ->setMethods(['fetchContent'])
262 ->disableOriginalConstructor()
265 $graby->expects($this->any())
266 ->method('fetchContent')
268 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
269 'title' => 'this is my title',
270 'url' => 'http://1.1.1.1',
271 'content_type' => 'text/html',
275 'og_title' => 'my OG title',
276 'og_description' => 'OG desc',
277 'og_image' => 'http://3.3.3.3/cover.jpg',
281 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
282 $entry = new Entry(new User());
283 $proxy->updateEntry($entry, 'http://0.0.0.0');
285 $this->assertSame('http://1.1.1.1', $entry->getUrl());
286 $this->assertSame('this is my title', $entry->getTitle());
287 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
288 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
289 $this->assertSame('text/html', $entry->getMimetype());
290 $this->assertSame('fr', $entry->getLanguage());
291 $this->assertSame('200', $entry->getHttpStatus());
292 $this->assertSame(0.0, $entry->getReadingTime());
293 $this->assertSame('1.1.1.1', $entry->getDomainName());
296 public function testWithContentAndBadLanguage()
298 $tagger = $this->getTaggerMock();
299 $tagger->expects($this->once())
302 $validator = $this->getValidator(false);
303 $validator->expects($this->once())
305 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
307 $graby = $this->getMockBuilder('Graby\Graby')
308 ->setMethods(['fetchContent'])
309 ->disableOriginalConstructor()
312 $graby->expects($this->any())
313 ->method('fetchContent')
315 'html' => str_repeat('this is my content', 325),
316 'title' => 'this is my title',
317 'url' => 'http://1.1.1.1',
318 'language' => 'dontexist',
321 'content-type' => 'text/html',
325 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
326 $entry = new Entry(new User());
327 $proxy->updateEntry($entry, 'http://0.0.0.0');
329 $this->assertSame('http://1.1.1.1', $entry->getUrl());
330 $this->assertSame('this is my title', $entry->getTitle());
331 $this->assertContains('content', $entry->getContent());
332 $this->assertSame('text/html', $entry->getMimetype());
333 $this->assertNull($entry->getLanguage());
334 $this->assertSame('200', $entry->getHttpStatus());
335 $this->assertSame(4.0, $entry->getReadingTime());
336 $this->assertSame('1.1.1.1', $entry->getDomainName());
339 public function testWithContentAndBadOgImage()
341 $tagger = $this->getTaggerMock();
342 $tagger->expects($this->once())
345 $validator = $this->getValidator(false);
346 $validator->expects($this->exactly(2))
348 ->will($this->onConsecutiveCalls(
349 new ConstraintViolationList(),
350 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
353 $graby = $this->getMockBuilder('Graby\Graby')
354 ->setMethods(['fetchContent'])
355 ->disableOriginalConstructor()
358 $graby->expects($this->any())
359 ->method('fetchContent')
361 'html' => str_repeat('this is my content', 325),
362 'title' => 'this is my title',
363 'url' => 'http://1.1.1.1',
364 'content_type' => 'text/html',
367 'description' => 'OG desc',
368 'image' => 'https://',
370 'content-type' => 'text/html',
374 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
375 $entry = new Entry(new User());
376 $proxy->updateEntry($entry, 'http://0.0.0.0');
378 $this->assertSame('http://1.1.1.1', $entry->getUrl());
379 $this->assertSame('this is my title', $entry->getTitle());
380 $this->assertContains('content', $entry->getContent());
381 $this->assertNull($entry->getPreviewPicture());
382 $this->assertSame('text/html', $entry->getMimetype());
383 $this->assertSame('fr', $entry->getLanguage());
384 $this->assertSame('200', $entry->getHttpStatus());
385 $this->assertSame(4.0, $entry->getReadingTime());
386 $this->assertSame('1.1.1.1', $entry->getDomainName());
389 public function testWithForcedContent()
391 $tagger = $this->getTaggerMock();
392 $tagger->expects($this->once())
395 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
396 $entry = new Entry(new User());
401 'html' => str_repeat('this is my content', 325),
402 'title' => 'this is my title',
403 'url' => 'http://1.1.1.1',
405 'date' => '1395635872',
406 'authors' => ['Jeremy', 'Nico', 'Thomas'],
408 'cache-control' => 'no-cache',
409 'content-type' => 'text/html',
414 $this->assertSame('http://1.1.1.1', $entry->getUrl());
415 $this->assertSame('this is my title', $entry->getTitle());
416 $this->assertContains('content', $entry->getContent());
417 $this->assertSame('text/html', $entry->getMimetype());
418 $this->assertSame('fr', $entry->getLanguage());
419 $this->assertSame(4.0, $entry->getReadingTime());
420 $this->assertSame('1.1.1.1', $entry->getDomainName());
421 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
422 $this->assertContains('Jeremy', $entry->getPublishedBy());
423 $this->assertContains('Nico', $entry->getPublishedBy());
424 $this->assertContains('Thomas', $entry->getPublishedBy());
425 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
426 $this->assertContains('no-cache', $entry->getHeaders());
429 public function testWithForcedContentAndDatetime()
431 $tagger = $this->getTaggerMock();
432 $tagger->expects($this->once())
435 $logHandler = new TestHandler();
436 $logger = new Logger('test', [$logHandler]);
438 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
439 $entry = new Entry(new User());
444 'html' => str_repeat('this is my content', 325),
445 'title' => 'this is my title',
446 'url' => 'http://1.1.1.1',
448 'date' => '2016-09-08T11:55:58+0200',
450 'content-type' => 'text/html',
455 $this->assertSame('http://1.1.1.1', $entry->getUrl());
456 $this->assertSame('this is my title', $entry->getTitle());
457 $this->assertContains('content', $entry->getContent());
458 $this->assertSame('text/html', $entry->getMimetype());
459 $this->assertSame('fr', $entry->getLanguage());
460 $this->assertSame(4.0, $entry->getReadingTime());
461 $this->assertSame('1.1.1.1', $entry->getDomainName());
462 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
465 public function testWithForcedContentAndBadDate()
467 $tagger = $this->getTaggerMock();
468 $tagger->expects($this->once())
471 $logger = new Logger('foo');
472 $handler = new TestHandler();
473 $logger->pushHandler($handler);
475 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
476 $entry = new Entry(new User());
481 'html' => str_repeat('this is my content', 325),
482 'title' => 'this is my title',
483 'url' => 'http://1.1.1.1',
485 'date' => '01 02 2012',
487 'content-type' => 'text/html',
492 $this->assertSame('http://1.1.1.1', $entry->getUrl());
493 $this->assertSame('this is my title', $entry->getTitle());
494 $this->assertContains('content', $entry->getContent());
495 $this->assertSame('text/html', $entry->getMimetype());
496 $this->assertSame('fr', $entry->getLanguage());
497 $this->assertSame(4.0, $entry->getReadingTime());
498 $this->assertSame('1.1.1.1', $entry->getDomainName());
499 $this->assertNull($entry->getPublishedAt());
501 $records = $handler->getRecords();
503 $this->assertCount(3, $records);
504 $this->assertContains('Error while defining date', $records[0]['message']);
507 public function testTaggerThrowException()
509 $tagger = $this->getTaggerMock();
510 $tagger->expects($this->once())
512 ->will($this->throwException(new \
Exception()));
514 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
515 $entry = new Entry(new User());
520 'html' => str_repeat('this is my content', 325),
521 'title' => 'this is my title',
522 'url' => 'http://1.1.1.1',
525 'content-type' => 'text/html',
530 $this->assertCount(0, $entry->getTags());
533 public function dataForCrazyHtml()
536 'script and comment' => [
537 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
541 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
548 * @dataProvider dataForCrazyHtml
550 public function testWithCrazyHtmlContent($html, $escapedString)
552 $tagger = $this->getTaggerMock();
553 $tagger->expects($this->once())
556 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
557 $entry = new Entry(new User());
563 'title' => 'this is my title',
564 'url' => 'http://1.1.1.1',
567 //'og_title' => 'my OG title',
568 'description' => 'OG desc',
569 'image' => 'http://3.3.3.3/cover.jpg',
571 'content-type' => 'text/html',
576 $this->assertSame('http://1.1.1.1', $entry->getUrl());
577 $this->assertSame('this is my title', $entry->getTitle());
578 $this->assertNotContains($escapedString, $entry->getContent());
579 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
580 $this->assertSame('text/html', $entry->getMimetype());
581 $this->assertSame('fr', $entry->getLanguage());
582 $this->assertSame('200', $entry->getHttpStatus());
583 $this->assertSame('1.1.1.1', $entry->getDomainName());
586 public function testWithImageAsContent()
588 $tagger = $this->getTaggerMock();
589 $tagger->expects($this->once())
592 $graby = $this->getMockBuilder('Graby\Graby')
593 ->setMethods(['fetchContent'])
594 ->disableOriginalConstructor()
597 $graby->expects($this->any())
598 ->method('fetchContent')
600 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
601 'title' => 'this is my title',
602 'url' => 'http://1.1.1.1/image.jpg',
605 'content-type' => 'image/jpeg',
609 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
610 $entry = new Entry(new User());
611 $proxy->updateEntry($entry, 'http://0.0.0.0');
613 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
614 $this->assertSame('this is my title', $entry->getTitle());
615 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
616 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
617 $this->assertSame('image/jpeg', $entry->getMimetype());
618 $this->assertSame('200', $entry->getHttpStatus());
619 $this->assertSame('1.1.1.1', $entry->getDomainName());
622 public function testWebsiteWithValidUTF8Title_doNothing()
624 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
625 // See http://graphemica.com for more info about the characters
626 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
627 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
629 $tagger = $this->getTaggerMock();
630 $tagger->expects($this->once())
633 $graby = $this->getMockBuilder('Graby\Graby')
634 ->setMethods(['fetchContent'])
635 ->disableOriginalConstructor()
638 $graby->expects($this->any())
639 ->method('fetchContent')
642 'title' => $actualTitle,
644 'content_type' => 'text/html',
648 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
649 $entry = new Entry(new User());
650 $proxy->updateEntry($entry, 'http://0.0.0.0');
652 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
653 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
654 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
657 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
659 // See http://graphemica.com for more info about the characters
660 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
661 // The correct UTF-8 € character (U+20AC) is E282AC
662 $actualTitle = $this->hexToStr('61' . '80' . '62');
664 $tagger = $this->getTaggerMock();
665 $tagger->expects($this->once())
668 $graby = $this->getMockBuilder('Graby\Graby')
669 ->setMethods(['fetchContent'])
670 ->disableOriginalConstructor()
673 $graby->expects($this->any())
674 ->method('fetchContent')
677 'title' => $actualTitle,
679 'content_type' => 'text/html',
683 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
684 $entry = new Entry(new User());
685 $proxy->updateEntry($entry, 'http://0.0.0.0');
687 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
688 $expectedTitle = '61' . '62';
689 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
692 public function testPdfWithUTF16BETitle_convertToUTF8()
694 // See http://graphemica.com for more info about the characters
695 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
696 $actualTitle = $this->hexToStr('D83DDE3B');
698 $tagger = $this->getTaggerMock();
699 $tagger->expects($this->once())
702 $graby = $this->getMockBuilder('Graby\Graby')
703 ->setMethods(['fetchContent'])
704 ->disableOriginalConstructor()
707 $graby->expects($this->any())
708 ->method('fetchContent')
711 'title' => $actualTitle,
713 'content_type' => 'application/pdf',
717 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
718 $entry = new Entry(new User());
719 $proxy->updateEntry($entry, 'http://0.0.0.0');
721 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
722 $expectedTitle = 'F09F98BB';
723 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
726 public function testPdfWithUTF8Title_doNothing()
728 // See http://graphemica.com for more info about the characters
729 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
730 $actualTitle = $this->hexToStr('F09F98BB');
732 $tagger = $this->getTaggerMock();
733 $tagger->expects($this->once())
736 $graby = $this->getMockBuilder('Graby\Graby')
737 ->setMethods(['fetchContent'])
738 ->disableOriginalConstructor()
741 $graby->expects($this->any())
742 ->method('fetchContent')
745 'title' => $actualTitle,
747 'content_type' => 'application/pdf',
751 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
752 $entry = new Entry(new User());
753 $proxy->updateEntry($entry, 'http://0.0.0.0');
755 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
756 $expectedTitle = 'F09F98BB';
757 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
760 public function testPdfWithWINDOWS1252Title_convertToUTF8()
762 // See http://graphemica.com for more info about the characters
763 // '€' (80) in hexadecimal and WINDOWS-1252
764 $actualTitle = $this->hexToStr('80');
766 $tagger = $this->getTaggerMock();
767 $tagger->expects($this->once())
770 $graby = $this->getMockBuilder('Graby\Graby')
771 ->setMethods(['fetchContent'])
772 ->disableOriginalConstructor()
775 $graby->expects($this->any())
776 ->method('fetchContent')
779 'title' => $actualTitle,
781 'content_type' => 'application/pdf',
785 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
786 $entry = new Entry(new User());
787 $proxy->updateEntry($entry, 'http://0.0.0.0');
789 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
790 $expectedTitle = 'E282AC';
791 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
794 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
796 // See http://graphemica.com for more info about the characters
797 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
798 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
799 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
801 $tagger = $this->getTaggerMock();
802 $tagger->expects($this->once())
805 $graby = $this->getMockBuilder('Graby\Graby')
806 ->setMethods(['fetchContent'])
807 ->disableOriginalConstructor()
810 $graby->expects($this->any())
811 ->method('fetchContent')
814 'title' => $actualTitle,
816 'content_type' => 'application/pdf',
820 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
821 $entry = new Entry(new User());
822 $proxy->updateEntry($entry, 'http://0.0.0.0');
824 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
825 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
826 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
827 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
831 * Data provider for testWithChangedUrl.
833 * Arrays contain the following values:
837 * $expected_entry_url
838 * $expected_origin_url
841 public function dataForChangedUrl()
852 'origin already set' => [
860 'trailing slash' => [
861 'https://example.com/hello-world',
863 'https://example.com/hello-world/',
864 'https://example.com/hello-world/',
868 'query string in fetched content' => [
869 'https://example.org/hello',
871 'https://example.org/hello?world=1',
872 'https://example.org/hello?world=1',
873 'https://example.org/hello',
876 'fragment in fetched content' => [
877 'https://example.org/hello',
879 'https://example.org/hello#world',
880 'https://example.org/hello',
884 'fragment and query string in fetched content' => [
885 'https://example.org/hello',
887 'https://example.org/hello?foo#world',
888 'https://example.org/hello?foo#world',
889 'https://example.org/hello',
892 'different path and query string in fetch content' => [
893 'https://example.org/hello',
895 'https://example.org/world?foo',
896 'https://example.org/world?foo',
897 'https://example.org/hello',
900 'feedproxy ignore list test' => [
901 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
903 'https://example.org/hello-wallabag',
904 'https://example.org/hello-wallabag',
908 'feedproxy ignore list test with origin url already set' => [
909 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
910 'https://example.org/this-is-source',
911 'https://example.org/hello-wallabag',
912 'https://example.org/hello-wallabag',
913 'https://example.org/this-is-source',
916 'lemonde ignore pattern test' => [
917 'http://www.lemonde.fr/tiny/url',
919 'http://example.com/hello-world',
920 'http://example.com/hello-world',
928 * @dataProvider dataForChangedUrl
930 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
932 $tagger = $this->getTaggerMock();
933 $tagger->expects($this->once())
936 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
937 $entry = new Entry(new User());
938 $entry->setOriginUrl($origin_url);
945 'url' => $content_url,
946 'content_type' => '',
952 $this->assertSame($expected_entry_url, $entry->getUrl());
953 $this->assertSame($expected_domain, $entry->getDomainName());
954 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
958 * https://stackoverflow.com/a/18506801.
964 private function strToHex($string)
967 for ($i = 0; $i < \
strlen($string); ++
$i) {
968 $ord = \
ord($string[$i]);
969 $hexCode = dechex($ord);
970 $hex .= substr('0' . $hexCode, -2);
973 return strtoupper($hex);
977 * https://stackoverflow.com/a/18506801.
983 private function hexToStr($hex)
986 for ($i = 0; $i < \
strlen($hex) - 1; $i +
= 2) {
987 $string .= \
chr(hexdec($hex[$i] . $hex[$i +
1]));
993 private function getTaggerMock()
995 return $this->getMockBuilder(RuleBasedTagger
::class)
996 ->setMethods(['tag'])
997 ->disableOriginalConstructor()
1001 private function getLogger()
1003 return new NullLogger();
1006 private function getValidator($withDefaultMock = true)
1008 $mock = $this->getMockBuilder(RecursiveValidator
::class)
1009 ->setMethods(['validate'])
1010 ->disableOriginalConstructor()
1013 if ($withDefaultMock) {
1014 $mock->expects($this->any())
1015 ->method('validate')
1016 ->willReturn(new ConstraintViolationList());