3 namespace Tests\Wallabag\CoreBundle\Helper
;
6 use Monolog\Handler\TestHandler
;
8 use PHPUnit\Framework\TestCase
;
9 use Psr\Log\NullLogger
;
10 use Symfony\Component\Validator\ConstraintViolation
;
11 use Symfony\Component\Validator\ConstraintViolationList
;
12 use Symfony\Component\Validator\Validator\RecursiveValidator
;
13 use Wallabag\CoreBundle\Entity\Entry
;
14 use Wallabag\CoreBundle\Helper\ContentProxy
;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger
;
16 use Wallabag\UserBundle\Entity\User
;
18 class ContentProxyTest
extends TestCase
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
22 public function testWithBadUrl()
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
33 $graby->expects($this->any())
34 ->method('fetchContent')
45 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
46 $entry = new Entry(new User());
47 $proxy->updateEntry($entry, 'http://user@:80');
49 $this->assertSame('http://user@:80', $entry->getUrl());
50 $this->assertEmpty($entry->getTitle());
51 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
52 $this->assertEmpty($entry->getPreviewPicture());
53 $this->assertEmpty($entry->getMimetype());
54 $this->assertEmpty($entry->getLanguage());
55 $this->assertSame(0.0, $entry->getReadingTime());
56 $this->assertNull($entry->getDomainName());
59 public function testWithEmptyContent()
61 $tagger = $this->getTaggerMock();
62 $tagger->expects($this->once())
65 $graby = $this->getMockBuilder('Graby\Graby')
66 ->setMethods(['fetchContent'])
67 ->disableOriginalConstructor()
70 $graby->expects($this->any())
71 ->method('fetchContent')
82 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
83 $entry = new Entry(new User());
84 $proxy->updateEntry($entry, 'http://0.0.0.0');
86 $this->assertSame('http://0.0.0.0', $entry->getUrl());
87 $this->assertEmpty($entry->getTitle());
88 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
89 $this->assertEmpty($entry->getPreviewPicture());
90 $this->assertEmpty($entry->getMimetype());
91 $this->assertEmpty($entry->getLanguage());
92 $this->assertSame(0.0, $entry->getReadingTime());
93 $this->assertSame('0.0.0.0', $entry->getDomainName());
96 public function testWithEmptyContentButOG()
98 $tagger = $this->getTaggerMock();
99 $tagger->expects($this->once())
102 $graby = $this->getMockBuilder('Graby\Graby')
103 ->setMethods(['fetchContent'])
104 ->disableOriginalConstructor()
107 $graby->expects($this->any())
108 ->method('fetchContent')
111 'title' => 'my title',
114 'content-type' => '',
118 'description' => 'desc',
121 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
122 $entry = new Entry(new User());
123 $proxy->updateEntry($entry, 'http://domain.io');
125 $this->assertSame('http://domain.io', $entry->getUrl());
126 $this->assertSame('my title', $entry->getTitle());
127 $this->assertSame($this->fetchingErrorMessage
. '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
128 $this->assertEmpty($entry->getPreviewPicture());
129 $this->assertEmpty($entry->getLanguage());
130 $this->assertEmpty($entry->getHttpStatus());
131 $this->assertEmpty($entry->getMimetype());
132 $this->assertSame(0.0, $entry->getReadingTime());
133 $this->assertSame('domain.io', $entry->getDomainName());
136 public function testWithContent()
138 $tagger = $this->getTaggerMock();
139 $tagger->expects($this->once())
142 $graby = $this->getMockBuilder('Graby\Graby')
143 ->setMethods(['fetchContent'])
144 ->disableOriginalConstructor()
147 $graby->expects($this->any())
148 ->method('fetchContent')
150 'html' => str_repeat('this is my content', 325),
151 'title' => 'this is my title',
152 'url' => 'http://1.1.1.1',
155 'description' => 'OG desc',
156 'image' => 'http://3.3.3.3/cover.jpg',
158 'content-type' => 'text/html',
162 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
163 $entry = new Entry(new User());
164 $proxy->updateEntry($entry, 'http://0.0.0.0');
166 $this->assertSame('http://1.1.1.1', $entry->getUrl());
167 $this->assertSame('this is my title', $entry->getTitle());
168 $this->assertContains('content', $entry->getContent());
169 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
170 $this->assertSame('text/html', $entry->getMimetype());
171 $this->assertSame('fr', $entry->getLanguage());
172 $this->assertSame('200', $entry->getHttpStatus());
173 $this->assertSame(4.0, $entry->getReadingTime());
174 $this->assertSame('1.1.1.1', $entry->getDomainName());
177 public function testWithContentAndNoOgImage()
179 $tagger = $this->getTaggerMock();
180 $tagger->expects($this->once())
183 $graby = $this->getMockBuilder('Graby\Graby')
184 ->setMethods(['fetchContent'])
185 ->disableOriginalConstructor()
188 $graby->expects($this->any())
189 ->method('fetchContent')
191 'html' => str_repeat('this is my content', 325),
192 'title' => 'this is my title',
193 'url' => 'http://1.1.1.1',
196 'description' => 'OG desc',
199 'content-type' => 'text/html',
203 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
204 $entry = new Entry(new User());
205 $proxy->updateEntry($entry, 'http://0.0.0.0');
207 $this->assertSame('http://1.1.1.1', $entry->getUrl());
208 $this->assertSame('this is my title', $entry->getTitle());
209 $this->assertContains('content', $entry->getContent());
210 $this->assertNull($entry->getPreviewPicture());
211 $this->assertSame('text/html', $entry->getMimetype());
212 $this->assertSame('fr', $entry->getLanguage());
213 $this->assertSame('200', $entry->getHttpStatus());
214 $this->assertSame(4.0, $entry->getReadingTime());
215 $this->assertSame('1.1.1.1', $entry->getDomainName());
218 public function testWithContentAndContentImage()
220 $tagger = $this->getTaggerMock();
221 $tagger->expects($this->once())
224 $graby = $this->getMockBuilder('Graby\Graby')
225 ->setMethods(['fetchContent'])
226 ->disableOriginalConstructor()
229 $graby->expects($this->any())
230 ->method('fetchContent')
232 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
233 'title' => 'this is my title',
234 'url' => 'http://1.1.1.1',
235 'content_type' => 'text/html',
239 'og_title' => 'my OG title',
240 'og_description' => 'OG desc',
245 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
246 $entry = new Entry(new User());
247 $proxy->updateEntry($entry, 'http://0.0.0.0');
249 $this->assertSame('http://1.1.1.1', $entry->getUrl());
250 $this->assertSame('this is my title', $entry->getTitle());
251 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
252 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
253 $this->assertSame('text/html', $entry->getMimetype());
254 $this->assertSame('fr', $entry->getLanguage());
255 $this->assertSame('200', $entry->getHttpStatus());
256 $this->assertSame(0.0, $entry->getReadingTime());
257 $this->assertSame('1.1.1.1', $entry->getDomainName());
260 public function testWithContentImageAndOgImage()
262 $tagger = $this->getTaggerMock();
263 $tagger->expects($this->once())
266 $graby = $this->getMockBuilder('Graby\Graby')
267 ->setMethods(['fetchContent'])
268 ->disableOriginalConstructor()
271 $graby->expects($this->any())
272 ->method('fetchContent')
274 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
275 'title' => 'this is my title',
276 'url' => 'http://1.1.1.1',
277 'content_type' => 'text/html',
281 'og_title' => 'my OG title',
282 'og_description' => 'OG desc',
283 'og_image' => 'http://3.3.3.3/cover.jpg',
287 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
288 $entry = new Entry(new User());
289 $proxy->updateEntry($entry, 'http://0.0.0.0');
291 $this->assertSame('http://1.1.1.1', $entry->getUrl());
292 $this->assertSame('this is my title', $entry->getTitle());
293 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
294 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
295 $this->assertSame('text/html', $entry->getMimetype());
296 $this->assertSame('fr', $entry->getLanguage());
297 $this->assertSame('200', $entry->getHttpStatus());
298 $this->assertSame(0.0, $entry->getReadingTime());
299 $this->assertSame('1.1.1.1', $entry->getDomainName());
302 public function testWithContentAndBadLanguage()
304 $tagger = $this->getTaggerMock();
305 $tagger->expects($this->once())
308 $validator = $this->getValidator(false);
309 $validator->expects($this->once())
311 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
313 $graby = $this->getMockBuilder('Graby\Graby')
314 ->setMethods(['fetchContent'])
315 ->disableOriginalConstructor()
318 $graby->expects($this->any())
319 ->method('fetchContent')
321 'html' => str_repeat('this is my content', 325),
322 'title' => 'this is my title',
323 'url' => 'http://1.1.1.1',
324 'language' => 'dontexist',
327 'content-type' => 'text/html',
331 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
332 $entry = new Entry(new User());
333 $proxy->updateEntry($entry, 'http://0.0.0.0');
335 $this->assertSame('http://1.1.1.1', $entry->getUrl());
336 $this->assertSame('this is my title', $entry->getTitle());
337 $this->assertContains('content', $entry->getContent());
338 $this->assertSame('text/html', $entry->getMimetype());
339 $this->assertNull($entry->getLanguage());
340 $this->assertSame('200', $entry->getHttpStatus());
341 $this->assertSame(4.0, $entry->getReadingTime());
342 $this->assertSame('1.1.1.1', $entry->getDomainName());
345 public function testWithContentAndBadOgImage()
347 $tagger = $this->getTaggerMock();
348 $tagger->expects($this->once())
351 $validator = $this->getValidator(false);
352 $validator->expects($this->exactly(2))
354 ->will($this->onConsecutiveCalls(
355 new ConstraintViolationList(),
356 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
359 $graby = $this->getMockBuilder('Graby\Graby')
360 ->setMethods(['fetchContent'])
361 ->disableOriginalConstructor()
364 $graby->expects($this->any())
365 ->method('fetchContent')
367 'html' => str_repeat('this is my content', 325),
368 'title' => 'this is my title',
369 'url' => 'http://1.1.1.1',
371 'content-type' => 'text/html',
375 'description' => 'OG desc',
376 'image' => 'https://',
379 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
380 $entry = new Entry(new User());
381 $proxy->updateEntry($entry, 'http://0.0.0.0');
383 $this->assertSame('http://1.1.1.1', $entry->getUrl());
384 $this->assertSame('this is my title', $entry->getTitle());
385 $this->assertContains('content', $entry->getContent());
386 $this->assertNull($entry->getPreviewPicture());
387 $this->assertSame('text/html', $entry->getMimetype());
388 $this->assertSame('fr', $entry->getLanguage());
389 $this->assertSame('200', $entry->getHttpStatus());
390 $this->assertSame(4.0, $entry->getReadingTime());
391 $this->assertSame('1.1.1.1', $entry->getDomainName());
394 public function testWithForcedContent()
396 $tagger = $this->getTaggerMock();
397 $tagger->expects($this->once())
400 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
401 $entry = new Entry(new User());
406 'html' => str_repeat('this is my content', 325),
407 'title' => 'this is my title',
408 'url' => 'http://1.1.1.1',
410 'date' => '1395635872',
411 'authors' => ['Jeremy', 'Nico', 'Thomas'],
413 'cache-control' => 'no-cache',
414 'content-type' => 'text/html',
419 $this->assertSame('http://1.1.1.1', $entry->getUrl());
420 $this->assertSame('this is my title', $entry->getTitle());
421 $this->assertContains('content', $entry->getContent());
422 $this->assertSame('text/html', $entry->getMimetype());
423 $this->assertSame('fr', $entry->getLanguage());
424 $this->assertSame(4.0, $entry->getReadingTime());
425 $this->assertSame('1.1.1.1', $entry->getDomainName());
426 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
427 $this->assertContains('Jeremy', $entry->getPublishedBy());
428 $this->assertContains('Nico', $entry->getPublishedBy());
429 $this->assertContains('Thomas', $entry->getPublishedBy());
430 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
431 $this->assertContains('no-cache', $entry->getHeaders());
434 public function testWithForcedContentAndDatetime()
436 $tagger = $this->getTaggerMock();
437 $tagger->expects($this->once())
440 $logHandler = new TestHandler();
441 $logger = new Logger('test', [$logHandler]);
443 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
444 $entry = new Entry(new User());
449 'html' => str_repeat('this is my content', 325),
450 'title' => 'this is my title',
451 'url' => 'http://1.1.1.1',
453 'date' => '2016-09-08T11:55:58+0200',
455 'content-type' => 'text/html',
460 $this->assertSame('http://1.1.1.1', $entry->getUrl());
461 $this->assertSame('this is my title', $entry->getTitle());
462 $this->assertContains('content', $entry->getContent());
463 $this->assertSame('text/html', $entry->getMimetype());
464 $this->assertSame('fr', $entry->getLanguage());
465 $this->assertSame(4.0, $entry->getReadingTime());
466 $this->assertSame('1.1.1.1', $entry->getDomainName());
467 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
470 public function testWithForcedContentAndBadDate()
472 $tagger = $this->getTaggerMock();
473 $tagger->expects($this->once())
476 $logger = new Logger('foo');
477 $handler = new TestHandler();
478 $logger->pushHandler($handler);
480 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
481 $entry = new Entry(new User());
486 'html' => str_repeat('this is my content', 325),
487 'title' => 'this is my title',
488 'url' => 'http://1.1.1.1',
490 'date' => '01 02 2012',
492 'content-type' => 'text/html',
497 $this->assertSame('http://1.1.1.1', $entry->getUrl());
498 $this->assertSame('this is my title', $entry->getTitle());
499 $this->assertContains('content', $entry->getContent());
500 $this->assertSame('text/html', $entry->getMimetype());
501 $this->assertSame('fr', $entry->getLanguage());
502 $this->assertSame(4.0, $entry->getReadingTime());
503 $this->assertSame('1.1.1.1', $entry->getDomainName());
504 $this->assertNull($entry->getPublishedAt());
506 $records = $handler->getRecords();
508 $this->assertCount(3, $records);
509 $this->assertContains('Error while defining date', $records[0]['message']);
512 public function testTaggerThrowException()
514 $tagger = $this->getTaggerMock();
515 $tagger->expects($this->once())
517 ->will($this->throwException(new \
Exception()));
519 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
520 $entry = new Entry(new User());
525 'html' => str_repeat('this is my content', 325),
526 'title' => 'this is my title',
527 'url' => 'http://1.1.1.1',
530 'content-type' => 'text/html',
535 $this->assertCount(0, $entry->getTags());
538 public function dataForCrazyHtml()
541 'script and comment' => [
542 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
546 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
553 * @dataProvider dataForCrazyHtml
555 public function testWithCrazyHtmlContent($html, $escapedString)
557 $tagger = $this->getTaggerMock();
558 $tagger->expects($this->once())
561 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
562 $entry = new Entry(new User());
568 'title' => 'this is my title',
569 'url' => 'http://1.1.1.1',
572 //'og_title' => 'my OG title',
573 'description' => 'OG desc',
574 'image' => 'http://3.3.3.3/cover.jpg',
576 'content-type' => 'text/html',
581 $this->assertSame('http://1.1.1.1', $entry->getUrl());
582 $this->assertSame('this is my title', $entry->getTitle());
583 $this->assertNotContains($escapedString, $entry->getContent());
584 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
585 $this->assertSame('text/html', $entry->getMimetype());
586 $this->assertSame('fr', $entry->getLanguage());
587 $this->assertSame('200', $entry->getHttpStatus());
588 $this->assertSame('1.1.1.1', $entry->getDomainName());
591 public function testWithImageAsContent()
593 $tagger = $this->getTaggerMock();
594 $tagger->expects($this->once())
597 $graby = $this->getMockBuilder('Graby\Graby')
598 ->setMethods(['fetchContent'])
599 ->disableOriginalConstructor()
602 $graby->expects($this->any())
603 ->method('fetchContent')
605 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
606 'title' => 'this is my title',
607 'url' => 'http://1.1.1.1/image.jpg',
610 'content-type' => 'image/jpeg',
614 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
615 $entry = new Entry(new User());
616 $proxy->updateEntry($entry, 'http://0.0.0.0');
618 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
619 $this->assertSame('this is my title', $entry->getTitle());
620 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
621 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
622 $this->assertSame('image/jpeg', $entry->getMimetype());
623 $this->assertSame('200', $entry->getHttpStatus());
624 $this->assertSame('1.1.1.1', $entry->getDomainName());
627 public function testWebsiteWithValidUTF8Title_doNothing()
629 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
630 // See http://graphemica.com for more info about the characters
631 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
632 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
634 $tagger = $this->getTaggerMock();
635 $tagger->expects($this->once())
638 $graby = $this->getMockBuilder('Graby\Graby')
639 ->setMethods(['fetchContent'])
640 ->disableOriginalConstructor()
643 $graby->expects($this->any())
644 ->method('fetchContent')
647 'title' => $actualTitle,
650 'content-type' => 'text/html',
655 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
656 $entry = new Entry(new User());
657 $proxy->updateEntry($entry, 'http://0.0.0.0');
659 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
660 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
661 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
664 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
666 // See http://graphemica.com for more info about the characters
667 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
668 // The correct UTF-8 € character (U+20AC) is E282AC
669 $actualTitle = $this->hexToStr('61' . '80' . '62');
671 $tagger = $this->getTaggerMock();
672 $tagger->expects($this->once())
675 $graby = $this->getMockBuilder('Graby\Graby')
676 ->setMethods(['fetchContent'])
677 ->disableOriginalConstructor()
680 $graby->expects($this->any())
681 ->method('fetchContent')
684 'title' => $actualTitle,
687 'content-type' => 'text/html',
692 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
693 $entry = new Entry(new User());
694 $proxy->updateEntry($entry, 'http://0.0.0.0');
696 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
697 $expectedTitle = '61' . '62';
698 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
701 public function testPdfWithUTF16BETitle_convertToUTF8()
703 // See http://graphemica.com for more info about the characters
704 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
705 $actualTitle = $this->hexToStr('D83DDE3B');
707 $tagger = $this->getTaggerMock();
708 $tagger->expects($this->once())
711 $graby = $this->getMockBuilder('Graby\Graby')
712 ->setMethods(['fetchContent'])
713 ->disableOriginalConstructor()
716 $graby->expects($this->any())
717 ->method('fetchContent')
720 'title' => $actualTitle,
723 'content-type' => 'application/pdf',
728 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
729 $entry = new Entry(new User());
730 $proxy->updateEntry($entry, 'http://0.0.0.0');
732 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
733 $expectedTitle = 'F09F98BB';
734 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
737 public function testPdfWithUTF8Title_doNothing()
739 // See http://graphemica.com for more info about the characters
740 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
741 $actualTitle = $this->hexToStr('F09F98BB');
743 $tagger = $this->getTaggerMock();
744 $tagger->expects($this->once())
747 $graby = $this->getMockBuilder('Graby\Graby')
748 ->setMethods(['fetchContent'])
749 ->disableOriginalConstructor()
752 $graby->expects($this->any())
753 ->method('fetchContent')
756 'title' => $actualTitle,
759 'content-type' => 'application/pdf',
764 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
765 $entry = new Entry(new User());
766 $proxy->updateEntry($entry, 'http://0.0.0.0');
768 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
769 $expectedTitle = 'F09F98BB';
770 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
773 public function testPdfWithWINDOWS1252Title_convertToUTF8()
775 // See http://graphemica.com for more info about the characters
776 // '€' (80) in hexadecimal and WINDOWS-1252
777 $actualTitle = $this->hexToStr('80');
779 $tagger = $this->getTaggerMock();
780 $tagger->expects($this->once())
783 $graby = $this->getMockBuilder('Graby\Graby')
784 ->setMethods(['fetchContent'])
785 ->disableOriginalConstructor()
788 $graby->expects($this->any())
789 ->method('fetchContent')
792 'title' => $actualTitle,
795 'content-type' => 'application/pdf',
800 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
801 $entry = new Entry(new User());
802 $proxy->updateEntry($entry, 'http://0.0.0.0');
804 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
805 $expectedTitle = 'E282AC';
806 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
809 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
811 // See http://graphemica.com for more info about the characters
812 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
813 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
814 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
816 $tagger = $this->getTaggerMock();
817 $tagger->expects($this->once())
820 $graby = $this->getMockBuilder('Graby\Graby')
821 ->setMethods(['fetchContent'])
822 ->disableOriginalConstructor()
825 $graby->expects($this->any())
826 ->method('fetchContent')
829 'title' => $actualTitle,
832 'content-type' => 'application/pdf',
837 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
838 $entry = new Entry(new User());
839 $proxy->updateEntry($entry, 'http://0.0.0.0');
841 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
842 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
843 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
844 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
848 * Data provider for testWithChangedUrl.
850 * Arrays contain the following values:
854 * $expected_entry_url
855 * $expected_origin_url
858 public function dataForChangedUrl()
869 'origin already set' => [
877 'trailing slash' => [
878 'https://example.com/hello-world',
880 'https://example.com/hello-world/',
881 'https://example.com/hello-world/',
885 'query string in fetched content' => [
886 'https://example.org/hello',
888 'https://example.org/hello?world=1',
889 'https://example.org/hello?world=1',
890 'https://example.org/hello',
893 'fragment in fetched content' => [
894 'https://example.org/hello',
896 'https://example.org/hello#world',
897 'https://example.org/hello',
901 'fragment and query string in fetched content' => [
902 'https://example.org/hello',
904 'https://example.org/hello?foo#world',
905 'https://example.org/hello?foo#world',
906 'https://example.org/hello',
909 'different path and query string in fetch content' => [
910 'https://example.org/hello',
912 'https://example.org/world?foo',
913 'https://example.org/world?foo',
914 'https://example.org/hello',
917 'feedproxy ignore list test' => [
918 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
920 'https://example.org/hello-wallabag',
921 'https://example.org/hello-wallabag',
925 'feedproxy ignore list test with origin url already set' => [
926 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
927 'https://example.org/this-is-source',
928 'https://example.org/hello-wallabag',
929 'https://example.org/hello-wallabag',
930 'https://example.org/this-is-source',
933 'lemonde ignore pattern test' => [
934 'http://www.lemonde.fr/tiny/url',
936 'http://example.com/hello-world',
937 'http://example.com/hello-world',
945 * @dataProvider dataForChangedUrl
947 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
949 $tagger = $this->getTaggerMock();
950 $tagger->expects($this->once())
953 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
954 $entry = new Entry(new User());
955 $entry->setOriginUrl($origin_url);
962 'url' => $content_url,
964 'content-type' => '',
971 $this->assertSame($expected_entry_url, $entry->getUrl());
972 $this->assertSame($expected_domain, $entry->getDomainName());
973 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
977 * https://stackoverflow.com/a/18506801.
983 private function strToHex($string)
986 for ($i = 0; $i < \
strlen($string); ++
$i) {
987 $ord = \
ord($string[$i]);
988 $hexCode = dechex($ord);
989 $hex .= substr('0' . $hexCode, -2);
992 return strtoupper($hex);
996 * Convert hex to string.
998 * @see https://stackoverflow.com/a/18506801
1004 private function hexToStr($hex)
1007 for ($i = 0; $i < \
strlen($hex) - 1; $i +
= 2) {
1008 $string .= \
chr(hexdec($hex[$i] . $hex[$i +
1]));
1014 private function getTaggerMock()
1016 return $this->getMockBuilder(RuleBasedTagger
::class)
1017 ->setMethods(['tag'])
1018 ->disableOriginalConstructor()
1022 private function getLogger()
1024 return new NullLogger();
1027 private function getValidator($withDefaultMock = true)
1029 $mock = $this->getMockBuilder(RecursiveValidator
::class)
1030 ->setMethods(['validate'])
1031 ->disableOriginalConstructor()
1034 if ($withDefaultMock) {
1035 $mock->expects($this->any())
1036 ->method('validate')
1037 ->willReturn(new ConstraintViolationList());