3 namespace Tests\Wallabag\CoreBundle\Helper
;
6 use Monolog\Handler\TestHandler
;
8 use PHPUnit\Framework\TestCase
;
9 use Psr\Log\NullLogger
;
10 use Symfony\Component\Validator\ConstraintViolation
;
11 use Symfony\Component\Validator\ConstraintViolationList
;
12 use Symfony\Component\Validator\Validator\RecursiveValidator
;
13 use Wallabag\CoreBundle\Entity\Entry
;
14 use Wallabag\CoreBundle\Helper\ContentProxy
;
15 use Wallabag\CoreBundle\Helper\RuleBasedIgnoreOriginProcessor
;
16 use Wallabag\CoreBundle\Helper\RuleBasedTagger
;
17 use Wallabag\UserBundle\Entity\User
;
19 class ContentProxyTest
extends TestCase
21 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
23 public function testWithBadUrl()
25 $tagger = $this->getTaggerMock();
26 $tagger->expects($this->once())
29 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
31 $graby = $this->getMockBuilder('Graby\Graby')
32 ->setMethods(['fetchContent'])
33 ->disableOriginalConstructor()
36 $graby->expects($this->any())
37 ->method('fetchContent')
48 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
49 $entry = new Entry(new User());
50 $proxy->updateEntry($entry, 'http://user@:80');
52 $this->assertSame('http://user@:80', $entry->getUrl());
53 $this->assertEmpty($entry->getTitle());
54 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
55 $this->assertEmpty($entry->getPreviewPicture());
56 $this->assertEmpty($entry->getMimetype());
57 $this->assertEmpty($entry->getLanguage());
58 $this->assertSame(0.0, $entry->getReadingTime());
59 $this->assertNull($entry->getDomainName());
62 public function testWithEmptyContent()
64 $tagger = $this->getTaggerMock();
65 $tagger->expects($this->once())
68 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
70 $graby = $this->getMockBuilder('Graby\Graby')
71 ->setMethods(['fetchContent'])
72 ->disableOriginalConstructor()
75 $graby->expects($this->any())
76 ->method('fetchContent')
87 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
88 $entry = new Entry(new User());
89 $proxy->updateEntry($entry, 'http://0.0.0.0');
91 $this->assertSame('http://0.0.0.0', $entry->getUrl());
92 $this->assertEmpty($entry->getTitle());
93 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
94 $this->assertEmpty($entry->getPreviewPicture());
95 $this->assertEmpty($entry->getMimetype());
96 $this->assertEmpty($entry->getLanguage());
97 $this->assertSame(0.0, $entry->getReadingTime());
98 $this->assertSame('0.0.0.0', $entry->getDomainName());
101 public function testWithEmptyContentButOG()
103 $tagger = $this->getTaggerMock();
104 $tagger->expects($this->once())
107 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
109 $graby = $this->getMockBuilder('Graby\Graby')
110 ->setMethods(['fetchContent'])
111 ->disableOriginalConstructor()
114 $graby->expects($this->any())
115 ->method('fetchContent')
118 'title' => 'my title',
121 'content-type' => '',
125 'description' => 'desc',
128 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
129 $entry = new Entry(new User());
130 $proxy->updateEntry($entry, 'http://domain.io');
132 $this->assertSame('http://domain.io', $entry->getUrl());
133 $this->assertSame('my title', $entry->getTitle());
134 $this->assertSame($this->fetchingErrorMessage
. '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
135 $this->assertEmpty($entry->getPreviewPicture());
136 $this->assertEmpty($entry->getLanguage());
137 $this->assertEmpty($entry->getHttpStatus());
138 $this->assertEmpty($entry->getMimetype());
139 $this->assertSame(0.0, $entry->getReadingTime());
140 $this->assertSame('domain.io', $entry->getDomainName());
143 public function testWithContent()
145 $tagger = $this->getTaggerMock();
146 $tagger->expects($this->once())
149 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
150 $ruleBasedIgnoreOriginProcessor->expects($this->once())
153 $graby = $this->getMockBuilder('Graby\Graby')
154 ->setMethods(['fetchContent'])
155 ->disableOriginalConstructor()
158 $graby->expects($this->any())
159 ->method('fetchContent')
161 'html' => str_repeat('this is my content', 325),
162 'title' => 'this is my title',
163 'url' => 'http://1.1.1.1',
166 'description' => 'OG desc',
167 'image' => 'http://3.3.3.3/cover.jpg',
169 'content-type' => 'text/html',
173 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
174 $entry = new Entry(new User());
175 $proxy->updateEntry($entry, 'http://0.0.0.0');
177 $this->assertSame('http://1.1.1.1', $entry->getUrl());
178 $this->assertSame('this is my title', $entry->getTitle());
179 $this->assertContains('content', $entry->getContent());
180 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
181 $this->assertSame('text/html', $entry->getMimetype());
182 $this->assertSame('fr', $entry->getLanguage());
183 $this->assertSame('200', $entry->getHttpStatus());
184 $this->assertSame(4.0, $entry->getReadingTime());
185 $this->assertSame('1.1.1.1', $entry->getDomainName());
188 public function testWithContentAndNoOgImage()
190 $tagger = $this->getTaggerMock();
191 $tagger->expects($this->once())
194 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
195 $ruleBasedIgnoreOriginProcessor->expects($this->once())
198 $graby = $this->getMockBuilder('Graby\Graby')
199 ->setMethods(['fetchContent'])
200 ->disableOriginalConstructor()
203 $graby->expects($this->any())
204 ->method('fetchContent')
206 'html' => str_repeat('this is my content', 325),
207 'title' => 'this is my title',
208 'url' => 'http://1.1.1.1',
211 'description' => 'OG desc',
214 'content-type' => 'text/html',
218 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
219 $entry = new Entry(new User());
220 $proxy->updateEntry($entry, 'http://0.0.0.0');
222 $this->assertSame('http://1.1.1.1', $entry->getUrl());
223 $this->assertSame('this is my title', $entry->getTitle());
224 $this->assertContains('content', $entry->getContent());
225 $this->assertNull($entry->getPreviewPicture());
226 $this->assertSame('text/html', $entry->getMimetype());
227 $this->assertSame('fr', $entry->getLanguage());
228 $this->assertSame('200', $entry->getHttpStatus());
229 $this->assertSame(4.0, $entry->getReadingTime());
230 $this->assertSame('1.1.1.1', $entry->getDomainName());
233 public function testWithContentAndContentImage()
235 $tagger = $this->getTaggerMock();
236 $tagger->expects($this->once())
239 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
240 $ruleBasedIgnoreOriginProcessor->expects($this->once())
243 $graby = $this->getMockBuilder('Graby\Graby')
244 ->setMethods(['fetchContent'])
245 ->disableOriginalConstructor()
248 $graby->expects($this->any())
249 ->method('fetchContent')
251 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
252 'title' => 'this is my title',
253 'url' => 'http://1.1.1.1',
255 'content-type' => 'text/html',
262 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
263 $entry = new Entry(new User());
264 $proxy->updateEntry($entry, 'http://0.0.0.0');
266 $this->assertSame('http://1.1.1.1', $entry->getUrl());
267 $this->assertSame('this is my title', $entry->getTitle());
268 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
269 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
270 $this->assertSame('text/html', $entry->getMimetype());
271 $this->assertSame('fr', $entry->getLanguage());
272 $this->assertSame('200', $entry->getHttpStatus());
273 $this->assertSame(0.0, $entry->getReadingTime());
274 $this->assertSame('1.1.1.1', $entry->getDomainName());
277 public function testWithContentImageAndOgImage()
279 $tagger = $this->getTaggerMock();
280 $tagger->expects($this->once())
283 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
284 $ruleBasedIgnoreOriginProcessor->expects($this->once())
287 $graby = $this->getMockBuilder('Graby\Graby')
288 ->setMethods(['fetchContent'])
289 ->disableOriginalConstructor()
292 $graby->expects($this->any())
293 ->method('fetchContent')
295 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
296 'title' => 'this is my title',
297 'url' => 'http://1.1.1.1',
299 'content-type' => 'text/html',
303 'image' => 'http://3.3.3.3/cover.jpg',
306 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
307 $entry = new Entry(new User());
308 $proxy->updateEntry($entry, 'http://0.0.0.0');
310 $this->assertSame('http://1.1.1.1', $entry->getUrl());
311 $this->assertSame('this is my title', $entry->getTitle());
312 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
313 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
314 $this->assertSame('text/html', $entry->getMimetype());
315 $this->assertSame('fr', $entry->getLanguage());
316 $this->assertSame('200', $entry->getHttpStatus());
317 $this->assertSame(0.0, $entry->getReadingTime());
318 $this->assertSame('1.1.1.1', $entry->getDomainName());
321 public function testWithContentAndBadLanguage()
323 $tagger = $this->getTaggerMock();
324 $tagger->expects($this->once())
327 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
328 $ruleBasedIgnoreOriginProcessor->expects($this->once())
331 $validator = $this->getValidator(false);
332 $validator->expects($this->once())
334 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
336 $graby = $this->getMockBuilder('Graby\Graby')
337 ->setMethods(['fetchContent'])
338 ->disableOriginalConstructor()
341 $graby->expects($this->any())
342 ->method('fetchContent')
344 'html' => str_repeat('this is my content', 325),
345 'title' => 'this is my title',
346 'url' => 'http://1.1.1.1',
347 'language' => 'dontexist',
350 'content-type' => 'text/html',
354 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
355 $entry = new Entry(new User());
356 $proxy->updateEntry($entry, 'http://0.0.0.0');
358 $this->assertSame('http://1.1.1.1', $entry->getUrl());
359 $this->assertSame('this is my title', $entry->getTitle());
360 $this->assertContains('content', $entry->getContent());
361 $this->assertSame('text/html', $entry->getMimetype());
362 $this->assertNull($entry->getLanguage());
363 $this->assertSame('200', $entry->getHttpStatus());
364 $this->assertSame(4.0, $entry->getReadingTime());
365 $this->assertSame('1.1.1.1', $entry->getDomainName());
368 public function testWithContentAndBadOgImage()
370 $tagger = $this->getTaggerMock();
371 $tagger->expects($this->once())
374 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
375 $ruleBasedIgnoreOriginProcessor->expects($this->once())
378 $validator = $this->getValidator(false);
379 $validator->expects($this->exactly(2))
381 ->will($this->onConsecutiveCalls(
382 new ConstraintViolationList(),
383 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
386 $graby = $this->getMockBuilder('Graby\Graby')
387 ->setMethods(['fetchContent'])
388 ->disableOriginalConstructor()
391 $graby->expects($this->any())
392 ->method('fetchContent')
394 'html' => str_repeat('this is my content', 325),
395 'title' => 'this is my title',
396 'url' => 'http://1.1.1.1',
398 'content-type' => 'text/html',
402 'description' => 'OG desc',
403 'image' => 'https://',
406 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
407 $entry = new Entry(new User());
408 $proxy->updateEntry($entry, 'http://0.0.0.0');
410 $this->assertSame('http://1.1.1.1', $entry->getUrl());
411 $this->assertSame('this is my title', $entry->getTitle());
412 $this->assertContains('content', $entry->getContent());
413 $this->assertNull($entry->getPreviewPicture());
414 $this->assertSame('text/html', $entry->getMimetype());
415 $this->assertSame('fr', $entry->getLanguage());
416 $this->assertSame('200', $entry->getHttpStatus());
417 $this->assertSame(4.0, $entry->getReadingTime());
418 $this->assertSame('1.1.1.1', $entry->getDomainName());
421 public function testWithForcedContent()
423 $tagger = $this->getTaggerMock();
424 $tagger->expects($this->once())
427 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
428 $ruleBasedIgnoreOriginProcessor->expects($this->once())
431 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
432 $entry = new Entry(new User());
437 'html' => str_repeat('this is my content', 325),
438 'title' => 'this is my title',
439 'url' => 'http://1.1.1.1',
441 'date' => '1395635872',
442 'authors' => ['Jeremy', 'Nico', 'Thomas'],
444 'cache-control' => 'no-cache',
445 'content-type' => 'text/html',
450 $this->assertSame('http://1.1.1.1', $entry->getUrl());
451 $this->assertSame('this is my title', $entry->getTitle());
452 $this->assertContains('content', $entry->getContent());
453 $this->assertSame('text/html', $entry->getMimetype());
454 $this->assertSame('fr', $entry->getLanguage());
455 $this->assertSame(4.0, $entry->getReadingTime());
456 $this->assertSame('1.1.1.1', $entry->getDomainName());
457 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
458 $this->assertContains('Jeremy', $entry->getPublishedBy());
459 $this->assertContains('Nico', $entry->getPublishedBy());
460 $this->assertContains('Thomas', $entry->getPublishedBy());
461 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
462 $this->assertContains('no-cache', $entry->getHeaders());
465 public function testWithForcedContentAndDatetime()
467 $tagger = $this->getTaggerMock();
468 $tagger->expects($this->once())
471 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
473 $logHandler = new TestHandler();
474 $logger = new Logger('test', [$logHandler]);
476 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
477 $entry = new Entry(new User());
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
486 'date' => '2016-09-08T11:55:58+0200',
488 'content-type' => 'text/html',
493 $this->assertSame('http://1.1.1.1', $entry->getUrl());
494 $this->assertSame('this is my title', $entry->getTitle());
495 $this->assertContains('content', $entry->getContent());
496 $this->assertSame('text/html', $entry->getMimetype());
497 $this->assertSame('fr', $entry->getLanguage());
498 $this->assertSame(4.0, $entry->getReadingTime());
499 $this->assertSame('1.1.1.1', $entry->getDomainName());
500 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
503 public function testWithForcedContentAndBadDate()
505 $tagger = $this->getTaggerMock();
506 $tagger->expects($this->once())
509 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
511 $logger = new Logger('foo');
512 $handler = new TestHandler();
513 $logger->pushHandler($handler);
515 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
516 $entry = new Entry(new User());
521 'html' => str_repeat('this is my content', 325),
522 'title' => 'this is my title',
523 'url' => 'http://1.1.1.1',
525 'date' => '01 02 2012',
527 'content-type' => 'text/html',
532 $this->assertSame('http://1.1.1.1', $entry->getUrl());
533 $this->assertSame('this is my title', $entry->getTitle());
534 $this->assertContains('content', $entry->getContent());
535 $this->assertSame('text/html', $entry->getMimetype());
536 $this->assertSame('fr', $entry->getLanguage());
537 $this->assertSame(4.0, $entry->getReadingTime());
538 $this->assertSame('1.1.1.1', $entry->getDomainName());
539 $this->assertNull($entry->getPublishedAt());
541 $records = $handler->getRecords();
543 $this->assertCount(3, $records);
544 $this->assertContains('Error while defining date', $records[0]['message']);
547 public function testTaggerThrowException()
549 $tagger = $this->getTaggerMock();
550 $tagger->expects($this->once())
552 ->will($this->throwException(new \
Exception()));
554 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
556 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
557 $entry = new Entry(new User());
562 'html' => str_repeat('this is my content', 325),
563 'title' => 'this is my title',
564 'url' => 'http://1.1.1.1',
567 'content-type' => 'text/html',
572 $this->assertCount(0, $entry->getTags());
575 public function dataForCrazyHtml()
578 'script and comment' => [
579 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
583 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
590 * @dataProvider dataForCrazyHtml
592 public function testWithCrazyHtmlContent($html, $escapedString)
594 $tagger = $this->getTaggerMock();
595 $tagger->expects($this->once())
598 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
600 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
601 $entry = new Entry(new User());
607 'title' => 'this is my title',
608 'url' => 'http://1.1.1.1',
611 //'og_title' => 'my OG title',
612 'description' => 'OG desc',
613 'image' => 'http://3.3.3.3/cover.jpg',
615 'content-type' => 'text/html',
620 $this->assertSame('http://1.1.1.1', $entry->getUrl());
621 $this->assertSame('this is my title', $entry->getTitle());
622 $this->assertNotContains($escapedString, $entry->getContent());
623 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
624 $this->assertSame('text/html', $entry->getMimetype());
625 $this->assertSame('fr', $entry->getLanguage());
626 $this->assertSame('200', $entry->getHttpStatus());
627 $this->assertSame('1.1.1.1', $entry->getDomainName());
630 public function testWithImageAsContent()
632 $tagger = $this->getTaggerMock();
633 $tagger->expects($this->once())
636 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
638 $graby = $this->getMockBuilder('Graby\Graby')
639 ->setMethods(['fetchContent'])
640 ->disableOriginalConstructor()
643 $graby->expects($this->any())
644 ->method('fetchContent')
646 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
647 'title' => 'this is my title',
648 'url' => 'http://1.1.1.1/image.jpg',
651 'content-type' => 'image/jpeg',
655 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
656 $entry = new Entry(new User());
657 $proxy->updateEntry($entry, 'http://0.0.0.0');
659 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
660 $this->assertSame('this is my title', $entry->getTitle());
661 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
662 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
663 $this->assertSame('image/jpeg', $entry->getMimetype());
664 $this->assertSame('200', $entry->getHttpStatus());
665 $this->assertSame('1.1.1.1', $entry->getDomainName());
668 public function testWebsiteWithValidUTF8Title_doNothing()
670 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
671 // See http://graphemica.com for more info about the characters
672 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
673 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
675 $tagger = $this->getTaggerMock();
676 $tagger->expects($this->once())
679 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
681 $graby = $this->getMockBuilder('Graby\Graby')
682 ->setMethods(['fetchContent'])
683 ->disableOriginalConstructor()
686 $graby->expects($this->any())
687 ->method('fetchContent')
690 'title' => $actualTitle,
693 'content-type' => 'text/html',
698 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
699 $entry = new Entry(new User());
700 $proxy->updateEntry($entry, 'http://0.0.0.0');
702 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
703 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
704 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
707 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
709 // See http://graphemica.com for more info about the characters
710 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
711 // The correct UTF-8 € character (U+20AC) is E282AC
712 $actualTitle = $this->hexToStr('61' . '80' . '62');
714 $tagger = $this->getTaggerMock();
715 $tagger->expects($this->once())
718 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
720 $graby = $this->getMockBuilder('Graby\Graby')
721 ->setMethods(['fetchContent'])
722 ->disableOriginalConstructor()
725 $graby->expects($this->any())
726 ->method('fetchContent')
729 'title' => $actualTitle,
732 'content-type' => 'text/html',
737 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
738 $entry = new Entry(new User());
739 $proxy->updateEntry($entry, 'http://0.0.0.0');
741 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
742 $expectedTitle = '61' . '62';
743 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
746 public function testPdfWithUTF16BETitle_convertToUTF8()
748 // See http://graphemica.com for more info about the characters
749 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
750 $actualTitle = $this->hexToStr('D83DDE3B');
752 $tagger = $this->getTaggerMock();
753 $tagger->expects($this->once())
756 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
758 $graby = $this->getMockBuilder('Graby\Graby')
759 ->setMethods(['fetchContent'])
760 ->disableOriginalConstructor()
763 $graby->expects($this->any())
764 ->method('fetchContent')
767 'title' => $actualTitle,
770 'content-type' => 'application/pdf',
775 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
776 $entry = new Entry(new User());
777 $proxy->updateEntry($entry, 'http://0.0.0.0');
779 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
780 $expectedTitle = 'F09F98BB';
781 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
784 public function testPdfWithUTF8Title_doNothing()
786 // See http://graphemica.com for more info about the characters
787 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
788 $actualTitle = $this->hexToStr('F09F98BB');
790 $tagger = $this->getTaggerMock();
791 $tagger->expects($this->once())
794 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
796 $graby = $this->getMockBuilder('Graby\Graby')
797 ->setMethods(['fetchContent'])
798 ->disableOriginalConstructor()
801 $graby->expects($this->any())
802 ->method('fetchContent')
805 'title' => $actualTitle,
808 'content-type' => 'application/pdf',
813 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
814 $entry = new Entry(new User());
815 $proxy->updateEntry($entry, 'http://0.0.0.0');
817 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
818 $expectedTitle = 'F09F98BB';
819 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
822 public function testPdfWithWINDOWS1252Title_convertToUTF8()
824 // See http://graphemica.com for more info about the characters
825 // '€' (80) in hexadecimal and WINDOWS-1252
826 $actualTitle = $this->hexToStr('80');
828 $tagger = $this->getTaggerMock();
829 $tagger->expects($this->once())
832 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
834 $graby = $this->getMockBuilder('Graby\Graby')
835 ->setMethods(['fetchContent'])
836 ->disableOriginalConstructor()
839 $graby->expects($this->any())
840 ->method('fetchContent')
843 'title' => $actualTitle,
846 'content-type' => 'application/pdf',
851 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
852 $entry = new Entry(new User());
853 $proxy->updateEntry($entry, 'http://0.0.0.0');
855 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
856 $expectedTitle = 'E282AC';
857 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
860 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
862 // See http://graphemica.com for more info about the characters
863 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
864 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
865 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
867 $tagger = $this->getTaggerMock();
868 $tagger->expects($this->once())
871 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
873 $graby = $this->getMockBuilder('Graby\Graby')
874 ->setMethods(['fetchContent'])
875 ->disableOriginalConstructor()
878 $graby->expects($this->any())
879 ->method('fetchContent')
882 'title' => $actualTitle,
885 'content-type' => 'application/pdf',
890 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
891 $entry = new Entry(new User());
892 $proxy->updateEntry($entry, 'http://0.0.0.0');
894 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
895 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
896 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
897 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
901 * Data provider for testWithChangedUrl.
903 * Arrays contain the following values:
907 * $expected_entry_url
908 * $expected_origin_url
912 public function dataForChangedUrl()
924 'origin already set' => [
933 'trailing slash' => [
934 'https://example.com/hello-world',
936 'https://example.com/hello-world/',
937 'https://example.com/hello-world/',
942 'query string in fetched content' => [
943 'https://example.org/hello',
945 'https://example.org/hello?world=1',
946 'https://example.org/hello?world=1',
947 'https://example.org/hello',
951 'fragment in fetched content' => [
952 'https://example.org/hello',
954 'https://example.org/hello#world',
955 'https://example.org/hello',
960 'fragment and query string in fetched content' => [
961 'https://example.org/hello',
963 'https://example.org/hello?foo#world',
964 'https://example.org/hello?foo#world',
965 'https://example.org/hello',
969 'different path and query string in fetch content' => [
970 'https://example.org/hello',
972 'https://example.org/world?foo',
973 'https://example.org/world?foo',
974 'https://example.org/hello',
978 'feedproxy ignore list test' => [
979 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
981 'https://example.org/hello-wallabag',
982 'https://example.org/hello-wallabag',
987 'feedproxy ignore list test with origin url already set' => [
988 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
989 'https://example.org/this-is-source',
990 'https://example.org/hello-wallabag',
991 'https://example.org/hello-wallabag',
992 'https://example.org/this-is-source',
996 'lemonde ignore pattern test' => [
997 'http://www.lemonde.fr/tiny/url',
999 'http://example.com/hello-world',
1000 'http://example.com/hello-world',
1009 * @dataProvider dataForChangedUrl
1011 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain, $processor_result)
1013 $tagger = $this->getTaggerMock();
1014 $tagger->expects($this->once())
1017 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
1018 $ruleBasedIgnoreOriginProcessor->expects($this->once())
1020 ->willReturn($processor_result);
1022 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
1023 $entry = new Entry(new User());
1024 $entry->setOriginUrl($origin_url);
1025 $proxy->updateEntry(
1031 'url' => $content_url,
1033 'content-type' => '',
1040 $this->assertSame($expected_entry_url, $entry->getUrl());
1041 $this->assertSame($expected_domain, $entry->getDomainName());
1042 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
1046 * https://stackoverflow.com/a/18506801.
1052 private function strToHex($string)
1055 for ($i = 0; $i < \
strlen($string); ++
$i) {
1056 $ord = \
ord($string[$i]);
1057 $hexCode = dechex($ord);
1058 $hex .= substr('0' . $hexCode, -2);
1061 return strtoupper($hex);
1065 * Convert hex to string.
1067 * @see https://stackoverflow.com/a/18506801
1073 private function hexToStr($hex)
1076 for ($i = 0; $i < \
strlen($hex) - 1; $i +
= 2) {
1077 $string .= \
chr(hexdec($hex[$i] . $hex[$i +
1]));
1083 private function getTaggerMock()
1085 return $this->getMockBuilder(RuleBasedTagger
::class)
1086 ->setMethods(['tag'])
1087 ->disableOriginalConstructor()
1091 private function getRuleBasedIgnoreOriginProcessorMock()
1093 return $this->getMockBuilder(RuleBasedIgnoreOriginProcessor
::class)
1094 ->setMethods(['process'])
1095 ->disableOriginalConstructor()
1099 private function getLogger()
1101 return new NullLogger();
1104 private function getValidator($withDefaultMock = true)
1106 $mock = $this->getMockBuilder(RecursiveValidator
::class)
1107 ->setMethods(['validate'])
1108 ->disableOriginalConstructor()
1111 if ($withDefaultMock) {
1112 $mock->expects($this->any())
1113 ->method('validate')
1114 ->willReturn(new ConstraintViolationList());