3 namespace Tests\Wallabag\CoreBundle\Helper
;
6 use Monolog\Handler\TestHandler
;
8 use PHPUnit\Framework\TestCase
;
9 use Psr\Log\NullLogger
;
10 use Symfony\Component\Validator\ConstraintViolation
;
11 use Symfony\Component\Validator\ConstraintViolationList
;
12 use Symfony\Component\Validator\Validator\RecursiveValidator
;
13 use Wallabag\CoreBundle\Entity\Entry
;
14 use Wallabag\CoreBundle\Helper\ContentProxy
;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger
;
16 use Wallabag\UserBundle\Entity\User
;
18 class ContentProxyTest
extends TestCase
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
22 public function testWithBadUrl()
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
33 $graby->expects($this->any())
34 ->method('fetchContent')
43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
47 $this->assertSame('http://user@:80', $entry->getUrl());
48 $this->assertEmpty($entry->getTitle());
49 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
53 $this->assertSame(0.0, $entry->getReadingTime());
54 $this->assertNull($entry->getDomainName());
57 public function testWithEmptyContent()
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
63 $graby = $this->getMockBuilder('Graby\Graby')
64 ->setMethods(['fetchContent'])
65 ->disableOriginalConstructor()
68 $graby->expects($this->any())
69 ->method('fetchContent')
78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
83 $this->assertEmpty($entry->getTitle());
84 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
87 $this->assertEmpty($entry->getLanguage());
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
92 public function testWithEmptyContentButOG()
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
98 $graby = $this->getMockBuilder('Graby\Graby')
99 ->setMethods(['fetchContent'])
100 ->disableOriginalConstructor()
103 $graby->expects($this->any())
104 ->method('fetchContent')
109 'content_type' => '',
113 'og_title' => 'my title',
114 'og_description' => 'desc',
118 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
119 $entry = new Entry(new User());
120 $proxy->updateEntry($entry, 'http://domain.io');
122 $this->assertSame('http://domain.io', $entry->getUrl());
123 $this->assertSame('my title', $entry->getTitle());
124 $this->assertSame($this->fetchingErrorMessage
. '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
125 $this->assertEmpty($entry->getPreviewPicture());
126 $this->assertEmpty($entry->getLanguage());
127 $this->assertEmpty($entry->getHttpStatus());
128 $this->assertEmpty($entry->getMimetype());
129 $this->assertSame(0.0, $entry->getReadingTime());
130 $this->assertSame('domain.io', $entry->getDomainName());
133 public function testWithContent()
135 $tagger = $this->getTaggerMock();
136 $tagger->expects($this->once())
139 $graby = $this->getMockBuilder('Graby\Graby')
140 ->setMethods(['fetchContent'])
141 ->disableOriginalConstructor()
144 $graby->expects($this->any())
145 ->method('fetchContent')
147 'html' => str_repeat('this is my content', 325),
148 'title' => 'this is my title',
149 'url' => 'http://1.1.1.1',
150 'content_type' => 'text/html',
154 'og_title' => 'my OG title',
155 'og_description' => 'OG desc',
156 'og_image' => 'http://3.3.3.3/cover.jpg',
160 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
161 $entry = new Entry(new User());
162 $proxy->updateEntry($entry, 'http://0.0.0.0');
164 $this->assertSame('http://1.1.1.1', $entry->getUrl());
165 $this->assertSame('this is my title', $entry->getTitle());
166 $this->assertContains('content', $entry->getContent());
167 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
168 $this->assertSame('text/html', $entry->getMimetype());
169 $this->assertSame('fr', $entry->getLanguage());
170 $this->assertSame('200', $entry->getHttpStatus());
171 $this->assertSame(4.0, $entry->getReadingTime());
172 $this->assertSame('1.1.1.1', $entry->getDomainName());
175 public function testWithContentAndNoOgImage()
177 $tagger = $this->getTaggerMock();
178 $tagger->expects($this->once())
181 $graby = $this->getMockBuilder('Graby\Graby')
182 ->setMethods(['fetchContent'])
183 ->disableOriginalConstructor()
186 $graby->expects($this->any())
187 ->method('fetchContent')
189 'html' => str_repeat('this is my content', 325),
190 'title' => 'this is my title',
191 'url' => 'http://1.1.1.1',
192 'content_type' => 'text/html',
196 'og_title' => 'my OG title',
197 'og_description' => 'OG desc',
202 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
203 $entry = new Entry(new User());
204 $proxy->updateEntry($entry, 'http://0.0.0.0');
206 $this->assertSame('http://1.1.1.1', $entry->getUrl());
207 $this->assertSame('this is my title', $entry->getTitle());
208 $this->assertContains('content', $entry->getContent());
209 $this->assertNull($entry->getPreviewPicture());
210 $this->assertSame('text/html', $entry->getMimetype());
211 $this->assertSame('fr', $entry->getLanguage());
212 $this->assertSame('200', $entry->getHttpStatus());
213 $this->assertSame(4.0, $entry->getReadingTime());
214 $this->assertSame('1.1.1.1', $entry->getDomainName());
217 public function testWithContentAndContentImage()
219 $tagger = $this->getTaggerMock();
220 $tagger->expects($this->once())
223 $graby = $this->getMockBuilder('Graby\Graby')
224 ->setMethods(['fetchContent'])
225 ->disableOriginalConstructor()
228 $graby->expects($this->any())
229 ->method('fetchContent')
231 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
232 'title' => 'this is my title',
233 'url' => 'http://1.1.1.1',
234 'content_type' => 'text/html',
238 'og_title' => 'my OG title',
239 'og_description' => 'OG desc',
244 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
245 $entry = new Entry(new User());
246 $proxy->updateEntry($entry, 'http://0.0.0.0');
248 $this->assertSame('http://1.1.1.1', $entry->getUrl());
249 $this->assertSame('this is my title', $entry->getTitle());
250 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
251 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
252 $this->assertSame('text/html', $entry->getMimetype());
253 $this->assertSame('fr', $entry->getLanguage());
254 $this->assertSame('200', $entry->getHttpStatus());
255 $this->assertSame(0.0, $entry->getReadingTime());
256 $this->assertSame('1.1.1.1', $entry->getDomainName());
259 public function testWithContentImageAndOgImage()
261 $tagger = $this->getTaggerMock();
262 $tagger->expects($this->once())
265 $graby = $this->getMockBuilder('Graby\Graby')
266 ->setMethods(['fetchContent'])
267 ->disableOriginalConstructor()
270 $graby->expects($this->any())
271 ->method('fetchContent')
273 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
274 'title' => 'this is my title',
275 'url' => 'http://1.1.1.1',
276 'content_type' => 'text/html',
280 'og_title' => 'my OG title',
281 'og_description' => 'OG desc',
282 'og_image' => 'http://3.3.3.3/cover.jpg',
286 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
287 $entry = new Entry(new User());
288 $proxy->updateEntry($entry, 'http://0.0.0.0');
290 $this->assertSame('http://1.1.1.1', $entry->getUrl());
291 $this->assertSame('this is my title', $entry->getTitle());
292 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
293 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
294 $this->assertSame('text/html', $entry->getMimetype());
295 $this->assertSame('fr', $entry->getLanguage());
296 $this->assertSame('200', $entry->getHttpStatus());
297 $this->assertSame(0.0, $entry->getReadingTime());
298 $this->assertSame('1.1.1.1', $entry->getDomainName());
301 public function testWithContentAndBadLanguage()
303 $tagger = $this->getTaggerMock();
304 $tagger->expects($this->once())
307 $validator = $this->getValidator(false);
308 $validator->expects($this->once())
310 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
312 $graby = $this->getMockBuilder('Graby\Graby')
313 ->setMethods(['fetchContent'])
314 ->disableOriginalConstructor()
317 $graby->expects($this->any())
318 ->method('fetchContent')
320 'html' => str_repeat('this is my content', 325),
321 'title' => 'this is my title',
322 'url' => 'http://1.1.1.1',
323 'content_type' => 'text/html',
324 'language' => 'dontexist',
328 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
329 $entry = new Entry(new User());
330 $proxy->updateEntry($entry, 'http://0.0.0.0');
332 $this->assertSame('http://1.1.1.1', $entry->getUrl());
333 $this->assertSame('this is my title', $entry->getTitle());
334 $this->assertContains('content', $entry->getContent());
335 $this->assertSame('text/html', $entry->getMimetype());
336 $this->assertNull($entry->getLanguage());
337 $this->assertSame('200', $entry->getHttpStatus());
338 $this->assertSame(4.0, $entry->getReadingTime());
339 $this->assertSame('1.1.1.1', $entry->getDomainName());
342 public function testWithContentAndBadOgImage()
344 $tagger = $this->getTaggerMock();
345 $tagger->expects($this->once())
348 $validator = $this->getValidator(false);
349 $validator->expects($this->exactly(2))
351 ->will($this->onConsecutiveCalls(
352 new ConstraintViolationList(),
353 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
356 $graby = $this->getMockBuilder('Graby\Graby')
357 ->setMethods(['fetchContent'])
358 ->disableOriginalConstructor()
361 $graby->expects($this->any())
362 ->method('fetchContent')
364 'html' => str_repeat('this is my content', 325),
365 'title' => 'this is my title',
366 'url' => 'http://1.1.1.1',
367 'content_type' => 'text/html',
371 'og_title' => 'my OG title',
372 'og_description' => 'OG desc',
373 'og_image' => 'https://',
377 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
378 $entry = new Entry(new User());
379 $proxy->updateEntry($entry, 'http://0.0.0.0');
381 $this->assertSame('http://1.1.1.1', $entry->getUrl());
382 $this->assertSame('this is my title', $entry->getTitle());
383 $this->assertContains('content', $entry->getContent());
384 $this->assertNull($entry->getPreviewPicture());
385 $this->assertSame('text/html', $entry->getMimetype());
386 $this->assertSame('fr', $entry->getLanguage());
387 $this->assertSame('200', $entry->getHttpStatus());
388 $this->assertSame(4.0, $entry->getReadingTime());
389 $this->assertSame('1.1.1.1', $entry->getDomainName());
392 public function testWithForcedContent()
394 $tagger = $this->getTaggerMock();
395 $tagger->expects($this->once())
398 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
399 $entry = new Entry(new User());
404 'html' => str_repeat('this is my content', 325),
405 'title' => 'this is my title',
406 'url' => 'http://1.1.1.1',
407 'content_type' => 'text/html',
409 'date' => '1395635872',
410 'authors' => ['Jeremy', 'Nico', 'Thomas'],
412 'Cache-Control' => 'no-cache',
417 $this->assertSame('http://1.1.1.1', $entry->getUrl());
418 $this->assertSame('this is my title', $entry->getTitle());
419 $this->assertContains('content', $entry->getContent());
420 $this->assertSame('text/html', $entry->getMimetype());
421 $this->assertSame('fr', $entry->getLanguage());
422 $this->assertSame(4.0, $entry->getReadingTime());
423 $this->assertSame('1.1.1.1', $entry->getDomainName());
424 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
425 $this->assertContains('Jeremy', $entry->getPublishedBy());
426 $this->assertContains('Nico', $entry->getPublishedBy());
427 $this->assertContains('Thomas', $entry->getPublishedBy());
428 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
429 $this->assertContains('no-cache', $entry->getHeaders());
432 public function testWithForcedContentAndDatetime()
434 $tagger = $this->getTaggerMock();
435 $tagger->expects($this->once())
438 $logHandler = new TestHandler();
439 $logger = new Logger('test', [$logHandler]);
441 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
442 $entry = new Entry(new User());
447 'html' => str_repeat('this is my content', 325),
448 'title' => 'this is my title',
449 'url' => 'http://1.1.1.1',
450 'content_type' => 'text/html',
452 'date' => '2016-09-08T11:55:58+0200',
456 $this->assertSame('http://1.1.1.1', $entry->getUrl());
457 $this->assertSame('this is my title', $entry->getTitle());
458 $this->assertContains('content', $entry->getContent());
459 $this->assertSame('text/html', $entry->getMimetype());
460 $this->assertSame('fr', $entry->getLanguage());
461 $this->assertSame(4.0, $entry->getReadingTime());
462 $this->assertSame('1.1.1.1', $entry->getDomainName());
463 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
466 public function testWithForcedContentAndBadDate()
468 $tagger = $this->getTaggerMock();
469 $tagger->expects($this->once())
472 $logger = new Logger('foo');
473 $handler = new TestHandler();
474 $logger->pushHandler($handler);
476 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
477 $entry = new Entry(new User());
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
485 'content_type' => 'text/html',
487 'date' => '01 02 2012',
491 $this->assertSame('http://1.1.1.1', $entry->getUrl());
492 $this->assertSame('this is my title', $entry->getTitle());
493 $this->assertContains('content', $entry->getContent());
494 $this->assertSame('text/html', $entry->getMimetype());
495 $this->assertSame('fr', $entry->getLanguage());
496 $this->assertSame(4.0, $entry->getReadingTime());
497 $this->assertSame('1.1.1.1', $entry->getDomainName());
498 $this->assertNull($entry->getPublishedAt());
500 $records = $handler->getRecords();
502 $this->assertCount(3, $records);
503 $this->assertContains('Error while defining date', $records[0]['message']);
506 public function testTaggerThrowException()
508 $tagger = $this->getTaggerMock();
509 $tagger->expects($this->once())
511 ->will($this->throwException(new \
Exception()));
513 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
514 $entry = new Entry(new User());
519 'html' => str_repeat('this is my content', 325),
520 'title' => 'this is my title',
521 'url' => 'http://1.1.1.1',
522 'content_type' => 'text/html',
527 $this->assertCount(0, $entry->getTags());
530 public function dataForCrazyHtml()
533 'script and comment' => [
534 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
538 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
545 * @dataProvider dataForCrazyHtml
547 public function testWithCrazyHtmlContent($html, $escapedString)
549 $tagger = $this->getTaggerMock();
550 $tagger->expects($this->once())
553 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
554 $entry = new Entry(new User());
560 'title' => 'this is my title',
561 'url' => 'http://1.1.1.1',
562 'content_type' => 'text/html',
566 'og_title' => 'my OG title',
567 'og_description' => 'OG desc',
568 'og_image' => 'http://3.3.3.3/cover.jpg',
573 $this->assertSame('http://1.1.1.1', $entry->getUrl());
574 $this->assertSame('this is my title', $entry->getTitle());
575 $this->assertNotContains($escapedString, $entry->getContent());
576 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
577 $this->assertSame('text/html', $entry->getMimetype());
578 $this->assertSame('fr', $entry->getLanguage());
579 $this->assertSame('200', $entry->getHttpStatus());
580 $this->assertSame('1.1.1.1', $entry->getDomainName());
583 public function testWithImageAsContent()
585 $tagger = $this->getTaggerMock();
586 $tagger->expects($this->once())
589 $graby = $this->getMockBuilder('Graby\Graby')
590 ->setMethods(['fetchContent'])
591 ->disableOriginalConstructor()
594 $graby->expects($this->any())
595 ->method('fetchContent')
597 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
598 'title' => 'this is my title',
599 'url' => 'http://1.1.1.1/image.jpg',
600 'content_type' => 'image/jpeg',
605 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
606 $entry = new Entry(new User());
607 $proxy->updateEntry($entry, 'http://0.0.0.0');
609 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
610 $this->assertSame('this is my title', $entry->getTitle());
611 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
612 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
613 $this->assertSame('image/jpeg', $entry->getMimetype());
614 $this->assertSame('200', $entry->getHttpStatus());
615 $this->assertSame('1.1.1.1', $entry->getDomainName());
618 public function testWebsiteWithValidUTF8Title_doNothing()
620 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
621 // See http://graphemica.com for more info about the characters
622 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
623 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
625 $tagger = $this->getTaggerMock();
626 $tagger->expects($this->once())
629 $graby = $this->getMockBuilder('Graby\Graby')
630 ->setMethods(['fetchContent'])
631 ->disableOriginalConstructor()
634 $graby->expects($this->any())
635 ->method('fetchContent')
638 'title' => $actualTitle,
640 'content_type' => 'text/html',
644 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
645 $entry = new Entry(new User());
646 $proxy->updateEntry($entry, 'http://0.0.0.0');
648 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
649 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
650 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
653 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
655 // See http://graphemica.com for more info about the characters
656 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
657 // The correct UTF-8 € character (U+20AC) is E282AC
658 $actualTitle = $this->hexToStr('61' . '80' . '62');
660 $tagger = $this->getTaggerMock();
661 $tagger->expects($this->once())
664 $graby = $this->getMockBuilder('Graby\Graby')
665 ->setMethods(['fetchContent'])
666 ->disableOriginalConstructor()
669 $graby->expects($this->any())
670 ->method('fetchContent')
673 'title' => $actualTitle,
675 'content_type' => 'text/html',
679 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
680 $entry = new Entry(new User());
681 $proxy->updateEntry($entry, 'http://0.0.0.0');
683 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
684 $expectedTitle = '61' . '62';
685 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
688 public function testPdfWithUTF16BETitle_convertToUTF8()
690 // See http://graphemica.com for more info about the characters
691 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
692 $actualTitle = $this->hexToStr('D83DDE3B');
694 $tagger = $this->getTaggerMock();
695 $tagger->expects($this->once())
698 $graby = $this->getMockBuilder('Graby\Graby')
699 ->setMethods(['fetchContent'])
700 ->disableOriginalConstructor()
703 $graby->expects($this->any())
704 ->method('fetchContent')
707 'title' => $actualTitle,
709 'content_type' => 'application/pdf',
713 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
714 $entry = new Entry(new User());
715 $proxy->updateEntry($entry, 'http://0.0.0.0');
717 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
718 $expectedTitle = 'F09F98BB';
719 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
722 public function testPdfWithUTF8Title_doNothing()
724 // See http://graphemica.com for more info about the characters
725 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
726 $actualTitle = $this->hexToStr('F09F98BB');
728 $tagger = $this->getTaggerMock();
729 $tagger->expects($this->once())
732 $graby = $this->getMockBuilder('Graby\Graby')
733 ->setMethods(['fetchContent'])
734 ->disableOriginalConstructor()
737 $graby->expects($this->any())
738 ->method('fetchContent')
741 'title' => $actualTitle,
743 'content_type' => 'application/pdf',
747 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
748 $entry = new Entry(new User());
749 $proxy->updateEntry($entry, 'http://0.0.0.0');
751 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
752 $expectedTitle = 'F09F98BB';
753 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
756 public function testPdfWithWINDOWS1252Title_convertToUTF8()
758 // See http://graphemica.com for more info about the characters
759 // '€' (80) in hexadecimal and WINDOWS-1252
760 $actualTitle = $this->hexToStr('80');
762 $tagger = $this->getTaggerMock();
763 $tagger->expects($this->once())
766 $graby = $this->getMockBuilder('Graby\Graby')
767 ->setMethods(['fetchContent'])
768 ->disableOriginalConstructor()
771 $graby->expects($this->any())
772 ->method('fetchContent')
775 'title' => $actualTitle,
777 'content_type' => 'application/pdf',
781 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
782 $entry = new Entry(new User());
783 $proxy->updateEntry($entry, 'http://0.0.0.0');
785 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
786 $expectedTitle = 'E282AC';
787 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
790 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
792 // See http://graphemica.com for more info about the characters
793 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
794 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
795 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
797 $tagger = $this->getTaggerMock();
798 $tagger->expects($this->once())
801 $graby = $this->getMockBuilder('Graby\Graby')
802 ->setMethods(['fetchContent'])
803 ->disableOriginalConstructor()
806 $graby->expects($this->any())
807 ->method('fetchContent')
810 'title' => $actualTitle,
812 'content_type' => 'application/pdf',
816 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
817 $entry = new Entry(new User());
818 $proxy->updateEntry($entry, 'http://0.0.0.0');
820 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
821 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
822 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
823 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
827 * Data provider for testWithChangedUrl.
829 * Arrays contain the following values:
833 * $expected_entry_url
834 * $expected_origin_url
837 public function dataForChangedUrl()
848 'origin already set' => [
856 'trailing slash' => [
857 'https://example.com/hello-world',
859 'https://example.com/hello-world/',
860 'https://example.com/hello-world/',
864 'query string in fetched content' => [
865 'https://example.org/hello',
867 'https://example.org/hello?world=1',
868 'https://example.org/hello?world=1',
869 'https://example.org/hello',
872 'fragment in fetched content' => [
873 'https://example.org/hello',
875 'https://example.org/hello#world',
876 'https://example.org/hello',
880 'fragment and query string in fetched content' => [
881 'https://example.org/hello',
883 'https://example.org/hello?foo#world',
884 'https://example.org/hello?foo#world',
885 'https://example.org/hello',
888 'different path and query string in fetch content' => [
889 'https://example.org/hello',
891 'https://example.org/world?foo',
892 'https://example.org/world?foo',
893 'https://example.org/hello',
896 'feedproxy ignore list test' => [
897 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
899 'https://example.org/hello-wallabag',
900 'https://example.org/hello-wallabag',
904 'feedproxy ignore list test with origin url already set' => [
905 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
906 'https://example.org/this-is-source',
907 'https://example.org/hello-wallabag',
908 'https://example.org/hello-wallabag',
909 'https://example.org/this-is-source',
912 'lemonde ignore pattern test' => [
913 'http://www.lemonde.fr/tiny/url',
915 'http://example.com/hello-world',
916 'http://example.com/hello-world',
924 * @dataProvider dataForChangedUrl
926 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
928 $tagger = $this->getTaggerMock();
929 $tagger->expects($this->once())
932 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
933 $entry = new Entry(new User());
934 $entry->setOriginUrl($origin_url);
941 'url' => $content_url,
942 'content_type' => '',
948 $this->assertSame($expected_entry_url, $entry->getUrl());
949 $this->assertSame($expected_domain, $entry->getDomainName());
950 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
954 * https://stackoverflow.com/a/18506801.
960 private function strToHex($string)
963 for ($i = 0; $i < \
strlen($string); ++
$i) {
964 $ord = \
ord($string[$i]);
965 $hexCode = dechex($ord);
966 $hex .= substr('0' . $hexCode, -2);
969 return strtoupper($hex);
973 * https://stackoverflow.com/a/18506801.
979 private function hexToStr($hex)
982 for ($i = 0; $i < \
strlen($hex) - 1; $i +
= 2) {
983 $string .= \
chr(hexdec($hex[$i] . $hex[$i +
1]));
989 private function getTaggerMock()
991 return $this->getMockBuilder(RuleBasedTagger
::class)
992 ->setMethods(['tag'])
993 ->disableOriginalConstructor()
997 private function getLogger()
999 return new NullLogger();
1002 private function getValidator($withDefaultMock = true)
1004 $mock = $this->getMockBuilder(RecursiveValidator
::class)
1005 ->setMethods(['validate'])
1006 ->disableOriginalConstructor()
1009 if ($withDefaultMock) {
1010 $mock->expects($this->any())
1011 ->method('validate')
1012 ->willReturn(new ConstraintViolationList());