3 namespace Tests\Wallabag\CoreBundle\Helper
;
6 use Monolog\Handler\TestHandler
;
8 use PHPUnit\Framework\TestCase
;
9 use Psr\Log\NullLogger
;
10 use Symfony\Component\Validator\ConstraintViolation
;
11 use Symfony\Component\Validator\ConstraintViolationList
;
12 use Symfony\Component\Validator\Validator\RecursiveValidator
;
13 use Wallabag\CoreBundle\Entity\Entry
;
14 use Wallabag\CoreBundle\Helper\ContentProxy
;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger
;
16 use Wallabag\UserBundle\Entity\User
;
18 class ContentProxyTest
extends TestCase
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
22 public function testWithBadUrl()
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
33 $graby->expects($this->any())
34 ->method('fetchContent')
43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
47 $this->assertSame('http://user@:80', $entry->getUrl());
48 $this->assertEmpty($entry->getTitle());
49 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
53 $this->assertSame(0.0, $entry->getReadingTime());
54 $this->assertNull($entry->getDomainName());
57 public function testWithEmptyContent()
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
63 $graby = $this->getMockBuilder('Graby\Graby')
64 ->setMethods(['fetchContent'])
65 ->disableOriginalConstructor()
68 $graby->expects($this->any())
69 ->method('fetchContent')
78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
83 $this->assertEmpty($entry->getTitle());
84 $this->assertSame($this->fetchingErrorMessage
, $entry->getContent());
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
87 $this->assertEmpty($entry->getLanguage());
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
92 public function testWithEmptyContentButOG()
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
98 $graby = $this->getMockBuilder('Graby\Graby')
99 ->setMethods(['fetchContent'])
100 ->disableOriginalConstructor()
103 $graby->expects($this->any())
104 ->method('fetchContent')
109 'content_type' => '',
113 'og_title' => 'my title',
114 'og_description' => 'desc',
118 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
119 $entry = new Entry(new User());
120 $proxy->updateEntry($entry, 'http://domain.io');
122 $this->assertSame('http://domain.io', $entry->getUrl());
123 $this->assertSame('my title', $entry->getTitle());
124 $this->assertSame($this->fetchingErrorMessage
. '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
125 $this->assertEmpty($entry->getPreviewPicture());
126 $this->assertEmpty($entry->getLanguage());
127 $this->assertEmpty($entry->getHttpStatus());
128 $this->assertEmpty($entry->getMimetype());
129 $this->assertSame(0.0, $entry->getReadingTime());
130 $this->assertSame('domain.io', $entry->getDomainName());
133 public function testWithContent()
135 $tagger = $this->getTaggerMock();
136 $tagger->expects($this->once())
139 $graby = $this->getMockBuilder('Graby\Graby')
140 ->setMethods(['fetchContent'])
141 ->disableOriginalConstructor()
144 $graby->expects($this->any())
145 ->method('fetchContent')
147 'html' => str_repeat('this is my content', 325),
148 'title' => 'this is my title',
149 'url' => 'http://1.1.1.1',
150 'content_type' => 'text/html',
154 'og_title' => 'my OG title',
155 'og_description' => 'OG desc',
156 'og_image' => 'http://3.3.3.3/cover.jpg',
160 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
161 $entry = new Entry(new User());
162 $proxy->updateEntry($entry, 'http://0.0.0.0');
164 $this->assertSame('http://1.1.1.1', $entry->getUrl());
165 $this->assertSame('this is my title', $entry->getTitle());
166 $this->assertContains('this is my content', $entry->getContent());
167 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
168 $this->assertSame('text/html', $entry->getMimetype());
169 $this->assertSame('fr', $entry->getLanguage());
170 $this->assertSame('200', $entry->getHttpStatus());
171 $this->assertSame(4.0, $entry->getReadingTime());
172 $this->assertSame('1.1.1.1', $entry->getDomainName());
175 public function testWithContentAndNoOgImage()
177 $tagger = $this->getTaggerMock();
178 $tagger->expects($this->once())
181 $graby = $this->getMockBuilder('Graby\Graby')
182 ->setMethods(['fetchContent'])
183 ->disableOriginalConstructor()
186 $graby->expects($this->any())
187 ->method('fetchContent')
189 'html' => str_repeat('this is my content', 325),
190 'title' => 'this is my title',
191 'url' => 'http://1.1.1.1',
192 'content_type' => 'text/html',
196 'og_title' => 'my OG title',
197 'og_description' => 'OG desc',
202 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
203 $entry = new Entry(new User());
204 $proxy->updateEntry($entry, 'http://0.0.0.0');
206 $this->assertSame('http://1.1.1.1', $entry->getUrl());
207 $this->assertSame('this is my title', $entry->getTitle());
208 $this->assertContains('this is my content', $entry->getContent());
209 $this->assertNull($entry->getPreviewPicture());
210 $this->assertSame('text/html', $entry->getMimetype());
211 $this->assertSame('fr', $entry->getLanguage());
212 $this->assertSame('200', $entry->getHttpStatus());
213 $this->assertSame(4.0, $entry->getReadingTime());
214 $this->assertSame('1.1.1.1', $entry->getDomainName());
217 public function testWithContentAndBadLanguage()
219 $tagger = $this->getTaggerMock();
220 $tagger->expects($this->once())
223 $validator = $this->getValidator(false);
224 $validator->expects($this->once())
226 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
228 $graby = $this->getMockBuilder('Graby\Graby')
229 ->setMethods(['fetchContent'])
230 ->disableOriginalConstructor()
233 $graby->expects($this->any())
234 ->method('fetchContent')
236 'html' => str_repeat('this is my content', 325),
237 'title' => 'this is my title',
238 'url' => 'http://1.1.1.1',
239 'content_type' => 'text/html',
240 'language' => 'dontexist',
244 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
245 $entry = new Entry(new User());
246 $proxy->updateEntry($entry, 'http://0.0.0.0');
248 $this->assertSame('http://1.1.1.1', $entry->getUrl());
249 $this->assertSame('this is my title', $entry->getTitle());
250 $this->assertContains('this is my content', $entry->getContent());
251 $this->assertSame('text/html', $entry->getMimetype());
252 $this->assertNull($entry->getLanguage());
253 $this->assertSame('200', $entry->getHttpStatus());
254 $this->assertSame(4.0, $entry->getReadingTime());
255 $this->assertSame('1.1.1.1', $entry->getDomainName());
258 public function testWithContentAndBadOgImage()
260 $tagger = $this->getTaggerMock();
261 $tagger->expects($this->once())
264 $validator = $this->getValidator(false);
265 $validator->expects($this->exactly(2))
267 ->will($this->onConsecutiveCalls(
268 new ConstraintViolationList(),
269 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
272 $graby = $this->getMockBuilder('Graby\Graby')
273 ->setMethods(['fetchContent'])
274 ->disableOriginalConstructor()
277 $graby->expects($this->any())
278 ->method('fetchContent')
280 'html' => str_repeat('this is my content', 325),
281 'title' => 'this is my title',
282 'url' => 'http://1.1.1.1',
283 'content_type' => 'text/html',
287 'og_title' => 'my OG title',
288 'og_description' => 'OG desc',
289 'og_image' => 'https://',
293 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage
);
294 $entry = new Entry(new User());
295 $proxy->updateEntry($entry, 'http://0.0.0.0');
297 $this->assertSame('http://1.1.1.1', $entry->getUrl());
298 $this->assertSame('this is my title', $entry->getTitle());
299 $this->assertContains('this is my content', $entry->getContent());
300 $this->assertNull($entry->getPreviewPicture());
301 $this->assertSame('text/html', $entry->getMimetype());
302 $this->assertSame('fr', $entry->getLanguage());
303 $this->assertSame('200', $entry->getHttpStatus());
304 $this->assertSame(4.0, $entry->getReadingTime());
305 $this->assertSame('1.1.1.1', $entry->getDomainName());
308 public function testWithForcedContent()
310 $tagger = $this->getTaggerMock();
311 $tagger->expects($this->once())
314 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
315 $entry = new Entry(new User());
320 'html' => str_repeat('this is my content', 325),
321 'title' => 'this is my title',
322 'url' => 'http://1.1.1.1',
323 'content_type' => 'text/html',
325 'date' => '1395635872',
326 'authors' => ['Jeremy', 'Nico', 'Thomas'],
328 'Cache-Control' => 'no-cache',
333 $this->assertSame('http://1.1.1.1', $entry->getUrl());
334 $this->assertSame('this is my title', $entry->getTitle());
335 $this->assertContains('this is my content', $entry->getContent());
336 $this->assertSame('text/html', $entry->getMimetype());
337 $this->assertSame('fr', $entry->getLanguage());
338 $this->assertSame(4.0, $entry->getReadingTime());
339 $this->assertSame('1.1.1.1', $entry->getDomainName());
340 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
341 $this->assertContains('Jeremy', $entry->getPublishedBy());
342 $this->assertContains('Nico', $entry->getPublishedBy());
343 $this->assertContains('Thomas', $entry->getPublishedBy());
344 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
345 $this->assertContains('no-cache', $entry->getHeaders());
348 public function testWithForcedContentAndDatetime()
350 $tagger = $this->getTaggerMock();
351 $tagger->expects($this->once())
354 $logHandler = new TestHandler();
355 $logger = new Logger('test', [$logHandler]);
357 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
358 $entry = new Entry(new User());
363 'html' => str_repeat('this is my content', 325),
364 'title' => 'this is my title',
365 'url' => 'http://1.1.1.1',
366 'content_type' => 'text/html',
368 'date' => '2016-09-08T11:55:58+0200',
372 $this->assertSame('http://1.1.1.1', $entry->getUrl());
373 $this->assertSame('this is my title', $entry->getTitle());
374 $this->assertContains('this is my content', $entry->getContent());
375 $this->assertSame('text/html', $entry->getMimetype());
376 $this->assertSame('fr', $entry->getLanguage());
377 $this->assertSame(4.0, $entry->getReadingTime());
378 $this->assertSame('1.1.1.1', $entry->getDomainName());
379 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
382 public function testWithForcedContentAndBadDate()
384 $tagger = $this->getTaggerMock();
385 $tagger->expects($this->once())
388 $logger = new Logger('foo');
389 $handler = new TestHandler();
390 $logger->pushHandler($handler);
392 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage
);
393 $entry = new Entry(new User());
398 'html' => str_repeat('this is my content', 325),
399 'title' => 'this is my title',
400 'url' => 'http://1.1.1.1',
401 'content_type' => 'text/html',
403 'date' => '01 02 2012',
407 $this->assertSame('http://1.1.1.1', $entry->getUrl());
408 $this->assertSame('this is my title', $entry->getTitle());
409 $this->assertContains('this is my content', $entry->getContent());
410 $this->assertSame('text/html', $entry->getMimetype());
411 $this->assertSame('fr', $entry->getLanguage());
412 $this->assertSame(4.0, $entry->getReadingTime());
413 $this->assertSame('1.1.1.1', $entry->getDomainName());
414 $this->assertNull($entry->getPublishedAt());
416 $records = $handler->getRecords();
418 $this->assertCount(1, $records);
419 $this->assertContains('Error while defining date', $records[0]['message']);
422 public function testTaggerThrowException()
424 $tagger = $this->getTaggerMock();
425 $tagger->expects($this->once())
427 ->will($this->throwException(new \
Exception()));
429 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
430 $entry = new Entry(new User());
435 'html' => str_repeat('this is my content', 325),
436 'title' => 'this is my title',
437 'url' => 'http://1.1.1.1',
438 'content_type' => 'text/html',
443 $this->assertCount(0, $entry->getTags());
446 public function dataForCrazyHtml()
449 'script and comment' => [
450 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
454 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
461 * @dataProvider dataForCrazyHtml
463 public function testWithCrazyHtmlContent($html, $escapedString)
465 $tagger = $this->getTaggerMock();
466 $tagger->expects($this->once())
469 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
470 $entry = new Entry(new User());
476 'title' => 'this is my title',
477 'url' => 'http://1.1.1.1',
478 'content_type' => 'text/html',
482 'og_title' => 'my OG title',
483 'og_description' => 'OG desc',
484 'og_image' => 'http://3.3.3.3/cover.jpg',
489 $this->assertSame('http://1.1.1.1', $entry->getUrl());
490 $this->assertSame('this is my title', $entry->getTitle());
491 $this->assertNotContains($escapedString, $entry->getContent());
492 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
493 $this->assertSame('text/html', $entry->getMimetype());
494 $this->assertSame('fr', $entry->getLanguage());
495 $this->assertSame('200', $entry->getHttpStatus());
496 $this->assertSame('1.1.1.1', $entry->getDomainName());
499 public function testWithImageAsContent()
501 $tagger = $this->getTaggerMock();
502 $tagger->expects($this->once())
505 $graby = $this->getMockBuilder('Graby\Graby')
506 ->setMethods(['fetchContent'])
507 ->disableOriginalConstructor()
510 $graby->expects($this->any())
511 ->method('fetchContent')
513 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
514 'title' => 'this is my title',
515 'url' => 'http://1.1.1.1/image.jpg',
516 'content_type' => 'image/jpeg',
521 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
522 $entry = new Entry(new User());
523 $proxy->updateEntry($entry, 'http://0.0.0.0');
525 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
526 $this->assertSame('this is my title', $entry->getTitle());
527 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
528 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
529 $this->assertSame('image/jpeg', $entry->getMimetype());
530 $this->assertSame('200', $entry->getHttpStatus());
531 $this->assertSame('1.1.1.1', $entry->getDomainName());
534 public function testWebsiteWithValidUTF8Title_doNothing()
536 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
537 // See http://graphemica.com for more info about the characters
538 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
539 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
541 $tagger = $this->getTaggerMock();
542 $tagger->expects($this->once())
545 $graby = $this->getMockBuilder('Graby\Graby')
546 ->setMethods(['fetchContent'])
547 ->disableOriginalConstructor()
550 $graby->expects($this->any())
551 ->method('fetchContent')
554 'title' => $actualTitle,
556 'content_type' => 'text/html',
560 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
561 $entry = new Entry(new User());
562 $proxy->updateEntry($entry, 'http://0.0.0.0');
564 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
565 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
566 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
569 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
571 // See http://graphemica.com for more info about the characters
572 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
573 // The correct UTF-8 € character (U+20AC) is E282AC
574 $actualTitle = $this->hexToStr('61' . '80' . '62');
576 $tagger = $this->getTaggerMock();
577 $tagger->expects($this->once())
580 $graby = $this->getMockBuilder('Graby\Graby')
581 ->setMethods(['fetchContent'])
582 ->disableOriginalConstructor()
585 $graby->expects($this->any())
586 ->method('fetchContent')
589 'title' => $actualTitle,
591 'content_type' => 'text/html',
595 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
596 $entry = new Entry(new User());
597 $proxy->updateEntry($entry, 'http://0.0.0.0');
599 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
600 $expectedTitle = '61' . '62';
601 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
604 public function testPdfWithUTF16BETitle_convertToUTF8()
606 // See http://graphemica.com for more info about the characters
607 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
608 $actualTitle = $this->hexToStr('D83DDE3B');
610 $tagger = $this->getTaggerMock();
611 $tagger->expects($this->once())
614 $graby = $this->getMockBuilder('Graby\Graby')
615 ->setMethods(['fetchContent'])
616 ->disableOriginalConstructor()
619 $graby->expects($this->any())
620 ->method('fetchContent')
623 'title' => $actualTitle,
625 'content_type' => 'application/pdf',
629 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
630 $entry = new Entry(new User());
631 $proxy->updateEntry($entry, 'http://0.0.0.0');
633 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
634 $expectedTitle = 'F09F98BB';
635 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
638 public function testPdfWithUTF8Title_doNothing()
640 // See http://graphemica.com for more info about the characters
641 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
642 $actualTitle = $this->hexToStr('F09F98BB');
644 $tagger = $this->getTaggerMock();
645 $tagger->expects($this->once())
648 $graby = $this->getMockBuilder('Graby\Graby')
649 ->setMethods(['fetchContent'])
650 ->disableOriginalConstructor()
653 $graby->expects($this->any())
654 ->method('fetchContent')
657 'title' => $actualTitle,
659 'content_type' => 'application/pdf',
663 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
664 $entry = new Entry(new User());
665 $proxy->updateEntry($entry, 'http://0.0.0.0');
667 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
668 $expectedTitle = 'F09F98BB';
669 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
672 public function testPdfWithWINDOWS1252Title_convertToUTF8()
674 // See http://graphemica.com for more info about the characters
675 // '€' (80) in hexadecimal and WINDOWS-1252
676 $actualTitle = $this->hexToStr('80');
678 $tagger = $this->getTaggerMock();
679 $tagger->expects($this->once())
682 $graby = $this->getMockBuilder('Graby\Graby')
683 ->setMethods(['fetchContent'])
684 ->disableOriginalConstructor()
687 $graby->expects($this->any())
688 ->method('fetchContent')
691 'title' => $actualTitle,
693 'content_type' => 'application/pdf',
697 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
698 $entry = new Entry(new User());
699 $proxy->updateEntry($entry, 'http://0.0.0.0');
701 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
702 $expectedTitle = 'E282AC';
703 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
706 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
708 // See http://graphemica.com for more info about the characters
709 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
710 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
711 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
713 $tagger = $this->getTaggerMock();
714 $tagger->expects($this->once())
717 $graby = $this->getMockBuilder('Graby\Graby')
718 ->setMethods(['fetchContent'])
719 ->disableOriginalConstructor()
722 $graby->expects($this->any())
723 ->method('fetchContent')
726 'title' => $actualTitle,
728 'content_type' => 'application/pdf',
732 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
);
733 $entry = new Entry(new User());
734 $proxy->updateEntry($entry, 'http://0.0.0.0');
736 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
737 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
738 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
739 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
743 * Data provider for testWithChangedUrl.
745 * Arrays contain the following values:
749 * $expected_entry_url
750 * $expected_origin_url
753 public function dataForChangedUrl()
764 'origin already set' => [
772 'trailing slash' => [
773 'https://example.com/hello-world',
775 'https://example.com/hello-world/',
776 'https://example.com/hello-world/',
780 'query string in fetched content' => [
781 'https://example.org/hello',
783 'https://example.org/hello?world=1',
784 'https://example.org/hello?world=1',
785 'https://example.org/hello',
788 'fragment in fetched content' => [
789 'https://example.org/hello',
791 'https://example.org/hello#world',
792 'https://example.org/hello',
796 'fragment and query string in fetched content' => [
797 'https://example.org/hello',
799 'https://example.org/hello?foo#world',
800 'https://example.org/hello?foo#world',
801 'https://example.org/hello',
804 'different path and query string in fetch content' => [
805 'https://example.org/hello',
807 'https://example.org/world?foo',
808 'https://example.org/world?foo',
809 'https://example.org/hello',
812 'feedproxy ignore list test' => [
813 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
815 'https://example.org/hello-wallabag',
816 'https://example.org/hello-wallabag',
820 'feedproxy ignore list test with origin url already set' => [
821 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
822 'https://example.org/this-is-source',
823 'https://example.org/hello-wallabag',
824 'https://example.org/hello-wallabag',
825 'https://example.org/this-is-source',
828 'lemonde ignore pattern test' => [
829 'http://www.lemonde.fr/tiny/url',
831 'http://example.com/hello-world',
832 'http://example.com/hello-world',
840 * @dataProvider dataForChangedUrl
842 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
844 $tagger = $this->getTaggerMock();
845 $tagger->expects($this->once())
848 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage
, true);
849 $entry = new Entry(new User());
850 $entry->setOriginUrl($origin_url);
857 'url' => $content_url,
858 'content_type' => '',
864 $this->assertSame($expected_entry_url, $entry->getUrl());
865 $this->assertSame($expected_domain, $entry->getDomainName());
866 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
870 * https://stackoverflow.com/a/18506801.
876 private function strToHex($string)
879 for ($i = 0; $i < \
strlen($string); ++
$i) {
880 $ord = \
ord($string[$i]);
881 $hexCode = dechex($ord);
882 $hex .= substr('0' . $hexCode, -2);
885 return strtoupper($hex);
889 * https://stackoverflow.com/a/18506801.
895 private function hexToStr($hex)
898 for ($i = 0; $i < \
strlen($hex) - 1; $i +
= 2) {
899 $string .= \
chr(hexdec($hex[$i] . $hex[$i +
1]));
905 private function getTaggerMock()
907 return $this->getMockBuilder(RuleBasedTagger
::class)
908 ->setMethods(['tag'])
909 ->disableOriginalConstructor()
913 private function getLogger()
915 return new NullLogger();
918 private function getValidator($withDefaultMock = true)
920 $mock = $this->getMockBuilder(RecursiveValidator
::class)
921 ->setMethods(['validate'])
922 ->disableOriginalConstructor()
925 if ($withDefaultMock) {
926 $mock->expects($this->any())
928 ->willReturn(new ConstraintViolationList());