]> git.immae.eu Git - github/wallabag/wallabag.git/blob - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Merge pull request #4438 from wallabag/dependabot/composer/scheb/two-factor-bundle...
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
1 <?php
2
3 namespace Tests\Wallabag\CoreBundle\Helper;
4
5 use Graby\Graby;
6 use Monolog\Handler\TestHandler;
7 use Monolog\Logger;
8 use PHPUnit\Framework\TestCase;
9 use Psr\Log\NullLogger;
10 use Symfony\Component\Validator\ConstraintViolation;
11 use Symfony\Component\Validator\ConstraintViolationList;
12 use Symfony\Component\Validator\Validator\RecursiveValidator;
13 use Wallabag\CoreBundle\Entity\Entry;
14 use Wallabag\CoreBundle\Helper\ContentProxy;
15 use Wallabag\CoreBundle\Helper\RuleBasedIgnoreOriginProcessor;
16 use Wallabag\CoreBundle\Helper\RuleBasedTagger;
17 use Wallabag\UserBundle\Entity\User;
18
19 class ContentProxyTest extends TestCase
20 {
21 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
22
23 public function testWithBadUrl()
24 {
25 $tagger = $this->getTaggerMock();
26 $tagger->expects($this->once())
27 ->method('tag');
28
29 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
30
31 $graby = $this->getMockBuilder('Graby\Graby')
32 ->setMethods(['fetchContent'])
33 ->disableOriginalConstructor()
34 ->getMock();
35
36 $graby->expects($this->any())
37 ->method('fetchContent')
38 ->willReturn([
39 'html' => false,
40 'title' => '',
41 'url' => '',
42 'headers' => [
43 'content-type' => '',
44 ],
45 'language' => '',
46 ]);
47
48 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
49 $entry = new Entry(new User());
50 $proxy->updateEntry($entry, 'http://user@:80');
51
52 $this->assertSame('http://user@:80', $entry->getUrl());
53 $this->assertEmpty($entry->getTitle());
54 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
55 $this->assertEmpty($entry->getPreviewPicture());
56 $this->assertEmpty($entry->getMimetype());
57 $this->assertEmpty($entry->getLanguage());
58 $this->assertSame(0.0, $entry->getReadingTime());
59 $this->assertNull($entry->getDomainName());
60 }
61
62 public function testWithEmptyContent()
63 {
64 $tagger = $this->getTaggerMock();
65 $tagger->expects($this->once())
66 ->method('tag');
67
68 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
69
70 $graby = $this->getMockBuilder('Graby\Graby')
71 ->setMethods(['fetchContent'])
72 ->disableOriginalConstructor()
73 ->getMock();
74
75 $graby->expects($this->any())
76 ->method('fetchContent')
77 ->willReturn([
78 'html' => false,
79 'title' => '',
80 'url' => '',
81 'headers' => [
82 'content-type' => '',
83 ],
84 'language' => '',
85 ]);
86
87 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
88 $entry = new Entry(new User());
89 $proxy->updateEntry($entry, 'http://0.0.0.0');
90
91 $this->assertSame('http://0.0.0.0', $entry->getUrl());
92 $this->assertEmpty($entry->getTitle());
93 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
94 $this->assertEmpty($entry->getPreviewPicture());
95 $this->assertEmpty($entry->getMimetype());
96 $this->assertEmpty($entry->getLanguage());
97 $this->assertSame(0.0, $entry->getReadingTime());
98 $this->assertSame('0.0.0.0', $entry->getDomainName());
99 }
100
101 public function testWithEmptyContentButOG()
102 {
103 $tagger = $this->getTaggerMock();
104 $tagger->expects($this->once())
105 ->method('tag');
106
107 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
108
109 $graby = $this->getMockBuilder('Graby\Graby')
110 ->setMethods(['fetchContent'])
111 ->disableOriginalConstructor()
112 ->getMock();
113
114 $graby->expects($this->any())
115 ->method('fetchContent')
116 ->willReturn([
117 'html' => false,
118 'title' => 'my title',
119 'url' => '',
120 'headers' => [
121 'content-type' => '',
122 ],
123 'language' => '',
124 'status' => '',
125 'description' => 'desc',
126 ]);
127
128 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
129 $entry = new Entry(new User());
130 $proxy->updateEntry($entry, 'http://domain.io');
131
132 $this->assertSame('http://domain.io', $entry->getUrl());
133 $this->assertSame('my title', $entry->getTitle());
134 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
135 $this->assertEmpty($entry->getPreviewPicture());
136 $this->assertEmpty($entry->getLanguage());
137 $this->assertEmpty($entry->getHttpStatus());
138 $this->assertEmpty($entry->getMimetype());
139 $this->assertSame(0.0, $entry->getReadingTime());
140 $this->assertSame('domain.io', $entry->getDomainName());
141 }
142
143 public function testWithContent()
144 {
145 $tagger = $this->getTaggerMock();
146 $tagger->expects($this->once())
147 ->method('tag');
148
149 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
150 $ruleBasedIgnoreOriginProcessor->expects($this->once())
151 ->method('process');
152
153 $graby = $this->getMockBuilder('Graby\Graby')
154 ->setMethods(['fetchContent'])
155 ->disableOriginalConstructor()
156 ->getMock();
157
158 $graby->expects($this->any())
159 ->method('fetchContent')
160 ->willReturn([
161 'html' => str_repeat('this is my content', 325),
162 'title' => 'this is my title',
163 'url' => 'http://1.1.1.1',
164 'language' => 'fr',
165 'status' => '200',
166 'description' => 'OG desc',
167 'image' => 'http://3.3.3.3/cover.jpg',
168 'headers' => [
169 'content-type' => 'text/html',
170 ],
171 ]);
172
173 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
174 $entry = new Entry(new User());
175 $proxy->updateEntry($entry, 'http://0.0.0.0');
176
177 $this->assertSame('http://1.1.1.1', $entry->getUrl());
178 $this->assertSame('this is my title', $entry->getTitle());
179 $this->assertContains('content', $entry->getContent());
180 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
181 $this->assertSame('text/html', $entry->getMimetype());
182 $this->assertSame('fr', $entry->getLanguage());
183 $this->assertSame('200', $entry->getHttpStatus());
184 $this->assertSame(4.0, $entry->getReadingTime());
185 $this->assertSame('1.1.1.1', $entry->getDomainName());
186 }
187
188 public function testWithContentAndNoOgImage()
189 {
190 $tagger = $this->getTaggerMock();
191 $tagger->expects($this->once())
192 ->method('tag');
193
194 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
195 $ruleBasedIgnoreOriginProcessor->expects($this->once())
196 ->method('process');
197
198 $graby = $this->getMockBuilder('Graby\Graby')
199 ->setMethods(['fetchContent'])
200 ->disableOriginalConstructor()
201 ->getMock();
202
203 $graby->expects($this->any())
204 ->method('fetchContent')
205 ->willReturn([
206 'html' => str_repeat('this is my content', 325),
207 'title' => 'this is my title',
208 'url' => 'http://1.1.1.1',
209 'language' => 'fr',
210 'status' => '200',
211 'description' => 'OG desc',
212 'image' => null,
213 'headers' => [
214 'content-type' => 'text/html',
215 ],
216 ]);
217
218 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
219 $entry = new Entry(new User());
220 $proxy->updateEntry($entry, 'http://0.0.0.0');
221
222 $this->assertSame('http://1.1.1.1', $entry->getUrl());
223 $this->assertSame('this is my title', $entry->getTitle());
224 $this->assertContains('content', $entry->getContent());
225 $this->assertNull($entry->getPreviewPicture());
226 $this->assertSame('text/html', $entry->getMimetype());
227 $this->assertSame('fr', $entry->getLanguage());
228 $this->assertSame('200', $entry->getHttpStatus());
229 $this->assertSame(4.0, $entry->getReadingTime());
230 $this->assertSame('1.1.1.1', $entry->getDomainName());
231 }
232
233 public function testWithContentAndContentImage()
234 {
235 $tagger = $this->getTaggerMock();
236 $tagger->expects($this->once())
237 ->method('tag');
238
239 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
240 $ruleBasedIgnoreOriginProcessor->expects($this->once())
241 ->method('process');
242
243 $graby = $this->getMockBuilder('Graby\Graby')
244 ->setMethods(['fetchContent'])
245 ->disableOriginalConstructor()
246 ->getMock();
247
248 $graby->expects($this->any())
249 ->method('fetchContent')
250 ->willReturn([
251 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
252 'title' => 'this is my title',
253 'url' => 'http://1.1.1.1',
254 'headers' => [
255 'content-type' => 'text/html',
256 ],
257 'language' => 'fr',
258 'status' => '200',
259 'image' => null,
260 ]);
261
262 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
263 $entry = new Entry(new User());
264 $proxy->updateEntry($entry, 'http://0.0.0.0');
265
266 $this->assertSame('http://1.1.1.1', $entry->getUrl());
267 $this->assertSame('this is my title', $entry->getTitle());
268 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
269 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
270 $this->assertSame('text/html', $entry->getMimetype());
271 $this->assertSame('fr', $entry->getLanguage());
272 $this->assertSame('200', $entry->getHttpStatus());
273 $this->assertSame(0.0, $entry->getReadingTime());
274 $this->assertSame('1.1.1.1', $entry->getDomainName());
275 }
276
277 public function testWithContentImageAndOgImage()
278 {
279 $tagger = $this->getTaggerMock();
280 $tagger->expects($this->once())
281 ->method('tag');
282
283 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
284 $ruleBasedIgnoreOriginProcessor->expects($this->once())
285 ->method('process');
286
287 $graby = $this->getMockBuilder('Graby\Graby')
288 ->setMethods(['fetchContent'])
289 ->disableOriginalConstructor()
290 ->getMock();
291
292 $graby->expects($this->any())
293 ->method('fetchContent')
294 ->willReturn([
295 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
296 'title' => 'this is my title',
297 'url' => 'http://1.1.1.1',
298 'headers' => [
299 'content-type' => 'text/html',
300 ],
301 'language' => 'fr',
302 'status' => '200',
303 'image' => 'http://3.3.3.3/cover.jpg',
304 ]);
305
306 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
307 $entry = new Entry(new User());
308 $proxy->updateEntry($entry, 'http://0.0.0.0');
309
310 $this->assertSame('http://1.1.1.1', $entry->getUrl());
311 $this->assertSame('this is my title', $entry->getTitle());
312 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
313 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
314 $this->assertSame('text/html', $entry->getMimetype());
315 $this->assertSame('fr', $entry->getLanguage());
316 $this->assertSame('200', $entry->getHttpStatus());
317 $this->assertSame(0.0, $entry->getReadingTime());
318 $this->assertSame('1.1.1.1', $entry->getDomainName());
319 }
320
321 public function testWithContentAndBadLanguage()
322 {
323 $tagger = $this->getTaggerMock();
324 $tagger->expects($this->once())
325 ->method('tag');
326
327 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
328 $ruleBasedIgnoreOriginProcessor->expects($this->once())
329 ->method('process');
330
331 $validator = $this->getValidator(false);
332 $validator->expects($this->once())
333 ->method('validate')
334 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
335
336 $graby = $this->getMockBuilder('Graby\Graby')
337 ->setMethods(['fetchContent'])
338 ->disableOriginalConstructor()
339 ->getMock();
340
341 $graby->expects($this->any())
342 ->method('fetchContent')
343 ->willReturn([
344 'html' => str_repeat('this is my content', 325),
345 'title' => 'this is my title',
346 'url' => 'http://1.1.1.1',
347 'language' => 'dontexist',
348 'status' => '200',
349 'headers' => [
350 'content-type' => 'text/html',
351 ],
352 ]);
353
354 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $validator, $this->getLogger(), $this->fetchingErrorMessage);
355 $entry = new Entry(new User());
356 $proxy->updateEntry($entry, 'http://0.0.0.0');
357
358 $this->assertSame('http://1.1.1.1', $entry->getUrl());
359 $this->assertSame('this is my title', $entry->getTitle());
360 $this->assertContains('content', $entry->getContent());
361 $this->assertSame('text/html', $entry->getMimetype());
362 $this->assertNull($entry->getLanguage());
363 $this->assertSame('200', $entry->getHttpStatus());
364 $this->assertSame(4.0, $entry->getReadingTime());
365 $this->assertSame('1.1.1.1', $entry->getDomainName());
366 }
367
368 public function testWithContentAndBadOgImage()
369 {
370 $tagger = $this->getTaggerMock();
371 $tagger->expects($this->once())
372 ->method('tag');
373
374 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
375 $ruleBasedIgnoreOriginProcessor->expects($this->once())
376 ->method('process');
377
378 $validator = $this->getValidator(false);
379 $validator->expects($this->exactly(2))
380 ->method('validate')
381 ->will($this->onConsecutiveCalls(
382 new ConstraintViolationList(),
383 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
384 ));
385
386 $graby = $this->getMockBuilder('Graby\Graby')
387 ->setMethods(['fetchContent'])
388 ->disableOriginalConstructor()
389 ->getMock();
390
391 $graby->expects($this->any())
392 ->method('fetchContent')
393 ->willReturn([
394 'html' => str_repeat('this is my content', 325),
395 'title' => 'this is my title',
396 'url' => 'http://1.1.1.1',
397 'headers' => [
398 'content-type' => 'text/html',
399 ],
400 'language' => 'fr',
401 'status' => '200',
402 'description' => 'OG desc',
403 'image' => 'https://',
404 ]);
405
406 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $validator, $this->getLogger(), $this->fetchingErrorMessage);
407 $entry = new Entry(new User());
408 $proxy->updateEntry($entry, 'http://0.0.0.0');
409
410 $this->assertSame('http://1.1.1.1', $entry->getUrl());
411 $this->assertSame('this is my title', $entry->getTitle());
412 $this->assertContains('content', $entry->getContent());
413 $this->assertNull($entry->getPreviewPicture());
414 $this->assertSame('text/html', $entry->getMimetype());
415 $this->assertSame('fr', $entry->getLanguage());
416 $this->assertSame('200', $entry->getHttpStatus());
417 $this->assertSame(4.0, $entry->getReadingTime());
418 $this->assertSame('1.1.1.1', $entry->getDomainName());
419 }
420
421 public function testWithForcedContent()
422 {
423 $tagger = $this->getTaggerMock();
424 $tagger->expects($this->once())
425 ->method('tag');
426
427 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
428 $ruleBasedIgnoreOriginProcessor->expects($this->once())
429 ->method('process');
430
431 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
432 $entry = new Entry(new User());
433 $proxy->updateEntry(
434 $entry,
435 'http://0.0.0.0',
436 [
437 'html' => str_repeat('this is my content', 325),
438 'title' => 'this is my title',
439 'url' => 'http://1.1.1.1',
440 'language' => 'fr',
441 'date' => '1395635872',
442 'authors' => ['Jeremy', 'Nico', 'Thomas'],
443 'headers' => [
444 'cache-control' => 'no-cache',
445 'content-type' => 'text/html',
446 ],
447 ]
448 );
449
450 $this->assertSame('http://1.1.1.1', $entry->getUrl());
451 $this->assertSame('this is my title', $entry->getTitle());
452 $this->assertContains('content', $entry->getContent());
453 $this->assertSame('text/html', $entry->getMimetype());
454 $this->assertSame('fr', $entry->getLanguage());
455 $this->assertSame(4.0, $entry->getReadingTime());
456 $this->assertSame('1.1.1.1', $entry->getDomainName());
457 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
458 $this->assertContains('Jeremy', $entry->getPublishedBy());
459 $this->assertContains('Nico', $entry->getPublishedBy());
460 $this->assertContains('Thomas', $entry->getPublishedBy());
461 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
462 $this->assertContains('no-cache', $entry->getHeaders());
463 }
464
465 public function testWithForcedContentAndDatetime()
466 {
467 $tagger = $this->getTaggerMock();
468 $tagger->expects($this->once())
469 ->method('tag');
470
471 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
472
473 $logHandler = new TestHandler();
474 $logger = new Logger('test', [$logHandler]);
475
476 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $logger, $this->fetchingErrorMessage);
477 $entry = new Entry(new User());
478 $proxy->updateEntry(
479 $entry,
480 'http://1.1.1.1',
481 [
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
485 'language' => 'fr',
486 'date' => '2016-09-08T11:55:58+0200',
487 'headers' => [
488 'content-type' => 'text/html',
489 ],
490 ]
491 );
492
493 $this->assertSame('http://1.1.1.1', $entry->getUrl());
494 $this->assertSame('this is my title', $entry->getTitle());
495 $this->assertContains('content', $entry->getContent());
496 $this->assertSame('text/html', $entry->getMimetype());
497 $this->assertSame('fr', $entry->getLanguage());
498 $this->assertSame(4.0, $entry->getReadingTime());
499 $this->assertSame('1.1.1.1', $entry->getDomainName());
500 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
501 }
502
503 public function testWithForcedContentAndBadDate()
504 {
505 $tagger = $this->getTaggerMock();
506 $tagger->expects($this->once())
507 ->method('tag');
508
509 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
510
511 $logger = new Logger('foo');
512 $handler = new TestHandler();
513 $logger->pushHandler($handler);
514
515 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $logger, $this->fetchingErrorMessage);
516 $entry = new Entry(new User());
517 $proxy->updateEntry(
518 $entry,
519 'http://1.1.1.1',
520 [
521 'html' => str_repeat('this is my content', 325),
522 'title' => 'this is my title',
523 'url' => 'http://1.1.1.1',
524 'language' => 'fr',
525 'date' => '01 02 2012',
526 'headers' => [
527 'content-type' => 'text/html',
528 ],
529 ]
530 );
531
532 $this->assertSame('http://1.1.1.1', $entry->getUrl());
533 $this->assertSame('this is my title', $entry->getTitle());
534 $this->assertContains('content', $entry->getContent());
535 $this->assertSame('text/html', $entry->getMimetype());
536 $this->assertSame('fr', $entry->getLanguage());
537 $this->assertSame(4.0, $entry->getReadingTime());
538 $this->assertSame('1.1.1.1', $entry->getDomainName());
539 $this->assertNull($entry->getPublishedAt());
540
541 $records = $handler->getRecords();
542
543 $this->assertCount(3, $records);
544 $this->assertContains('Error while defining date', $records[0]['message']);
545 }
546
547 public function testTaggerThrowException()
548 {
549 $tagger = $this->getTaggerMock();
550 $tagger->expects($this->once())
551 ->method('tag')
552 ->will($this->throwException(new \Exception()));
553
554 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
555
556 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
557 $entry = new Entry(new User());
558 $proxy->updateEntry(
559 $entry,
560 'http://1.1.1.1',
561 [
562 'html' => str_repeat('this is my content', 325),
563 'title' => 'this is my title',
564 'url' => 'http://1.1.1.1',
565 'language' => 'fr',
566 'headers' => [
567 'content-type' => 'text/html',
568 ],
569 ]
570 );
571
572 $this->assertCount(0, $entry->getTags());
573 }
574
575 public function dataForCrazyHtml()
576 {
577 return [
578 'script and comment' => [
579 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
580 'lol',
581 ],
582 'script' => [
583 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
584 'script',
585 ],
586 ];
587 }
588
589 /**
590 * @dataProvider dataForCrazyHtml
591 */
592 public function testWithCrazyHtmlContent($html, $escapedString)
593 {
594 $tagger = $this->getTaggerMock();
595 $tagger->expects($this->once())
596 ->method('tag');
597
598 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
599
600 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
601 $entry = new Entry(new User());
602 $proxy->updateEntry(
603 $entry,
604 'http://1.1.1.1',
605 [
606 'html' => $html,
607 'title' => 'this is my title',
608 'url' => 'http://1.1.1.1',
609 'language' => 'fr',
610 'status' => '200',
611 //'og_title' => 'my OG title',
612 'description' => 'OG desc',
613 'image' => 'http://3.3.3.3/cover.jpg',
614 'headers' => [
615 'content-type' => 'text/html',
616 ],
617 ]
618 );
619
620 $this->assertSame('http://1.1.1.1', $entry->getUrl());
621 $this->assertSame('this is my title', $entry->getTitle());
622 $this->assertNotContains($escapedString, $entry->getContent());
623 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
624 $this->assertSame('text/html', $entry->getMimetype());
625 $this->assertSame('fr', $entry->getLanguage());
626 $this->assertSame('200', $entry->getHttpStatus());
627 $this->assertSame('1.1.1.1', $entry->getDomainName());
628 }
629
630 public function testWithImageAsContent()
631 {
632 $tagger = $this->getTaggerMock();
633 $tagger->expects($this->once())
634 ->method('tag');
635
636 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
637
638 $graby = $this->getMockBuilder('Graby\Graby')
639 ->setMethods(['fetchContent'])
640 ->disableOriginalConstructor()
641 ->getMock();
642
643 $graby->expects($this->any())
644 ->method('fetchContent')
645 ->willReturn([
646 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
647 'title' => 'this is my title',
648 'url' => 'http://1.1.1.1/image.jpg',
649 'status' => '200',
650 'headers' => [
651 'content-type' => 'image/jpeg',
652 ],
653 ]);
654
655 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
656 $entry = new Entry(new User());
657 $proxy->updateEntry($entry, 'http://0.0.0.0');
658
659 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
660 $this->assertSame('this is my title', $entry->getTitle());
661 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
662 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
663 $this->assertSame('image/jpeg', $entry->getMimetype());
664 $this->assertSame('200', $entry->getHttpStatus());
665 $this->assertSame('1.1.1.1', $entry->getDomainName());
666 }
667
668 public function testWebsiteWithValidUTF8Title_doNothing()
669 {
670 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
671 // See http://graphemica.com for more info about the characters
672 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
673 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
674
675 $tagger = $this->getTaggerMock();
676 $tagger->expects($this->once())
677 ->method('tag');
678
679 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
680
681 $graby = $this->getMockBuilder('Graby\Graby')
682 ->setMethods(['fetchContent'])
683 ->disableOriginalConstructor()
684 ->getMock();
685
686 $graby->expects($this->any())
687 ->method('fetchContent')
688 ->willReturn([
689 'html' => false,
690 'title' => $actualTitle,
691 'url' => '',
692 'headers' => [
693 'content-type' => 'text/html',
694 ],
695 'language' => '',
696 ]);
697
698 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
699 $entry = new Entry(new User());
700 $proxy->updateEntry($entry, 'http://0.0.0.0');
701
702 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
703 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
704 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
705 }
706
707 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
708 {
709 // See http://graphemica.com for more info about the characters
710 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
711 // The correct UTF-8 € character (U+20AC) is E282AC
712 $actualTitle = $this->hexToStr('61' . '80' . '62');
713
714 $tagger = $this->getTaggerMock();
715 $tagger->expects($this->once())
716 ->method('tag');
717
718 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
719
720 $graby = $this->getMockBuilder('Graby\Graby')
721 ->setMethods(['fetchContent'])
722 ->disableOriginalConstructor()
723 ->getMock();
724
725 $graby->expects($this->any())
726 ->method('fetchContent')
727 ->willReturn([
728 'html' => false,
729 'title' => $actualTitle,
730 'url' => '',
731 'headers' => [
732 'content-type' => 'text/html',
733 ],
734 'language' => '',
735 ]);
736
737 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
738 $entry = new Entry(new User());
739 $proxy->updateEntry($entry, 'http://0.0.0.0');
740
741 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
742 $expectedTitle = '61' . '62';
743 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
744 }
745
746 public function testPdfWithUTF16BETitle_convertToUTF8()
747 {
748 // See http://graphemica.com for more info about the characters
749 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
750 $actualTitle = $this->hexToStr('D83DDE3B');
751
752 $tagger = $this->getTaggerMock();
753 $tagger->expects($this->once())
754 ->method('tag');
755
756 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
757
758 $graby = $this->getMockBuilder('Graby\Graby')
759 ->setMethods(['fetchContent'])
760 ->disableOriginalConstructor()
761 ->getMock();
762
763 $graby->expects($this->any())
764 ->method('fetchContent')
765 ->willReturn([
766 'html' => false,
767 'title' => $actualTitle,
768 'url' => '',
769 'headers' => [
770 'content-type' => 'application/pdf',
771 ],
772 'language' => '',
773 ]);
774
775 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
776 $entry = new Entry(new User());
777 $proxy->updateEntry($entry, 'http://0.0.0.0');
778
779 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
780 $expectedTitle = 'F09F98BB';
781 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
782 }
783
784 public function testPdfWithUTF8Title_doNothing()
785 {
786 // See http://graphemica.com for more info about the characters
787 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
788 $actualTitle = $this->hexToStr('F09F98BB');
789
790 $tagger = $this->getTaggerMock();
791 $tagger->expects($this->once())
792 ->method('tag');
793
794 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
795
796 $graby = $this->getMockBuilder('Graby\Graby')
797 ->setMethods(['fetchContent'])
798 ->disableOriginalConstructor()
799 ->getMock();
800
801 $graby->expects($this->any())
802 ->method('fetchContent')
803 ->willReturn([
804 'html' => false,
805 'title' => $actualTitle,
806 'url' => '',
807 'headers' => [
808 'content-type' => 'application/pdf',
809 ],
810 'language' => '',
811 ]);
812
813 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
814 $entry = new Entry(new User());
815 $proxy->updateEntry($entry, 'http://0.0.0.0');
816
817 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
818 $expectedTitle = 'F09F98BB';
819 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
820 }
821
822 public function testPdfWithWINDOWS1252Title_convertToUTF8()
823 {
824 // See http://graphemica.com for more info about the characters
825 // '€' (80) in hexadecimal and WINDOWS-1252
826 $actualTitle = $this->hexToStr('80');
827
828 $tagger = $this->getTaggerMock();
829 $tagger->expects($this->once())
830 ->method('tag');
831
832 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
833
834 $graby = $this->getMockBuilder('Graby\Graby')
835 ->setMethods(['fetchContent'])
836 ->disableOriginalConstructor()
837 ->getMock();
838
839 $graby->expects($this->any())
840 ->method('fetchContent')
841 ->willReturn([
842 'html' => false,
843 'title' => $actualTitle,
844 'url' => '',
845 'headers' => [
846 'content-type' => 'application/pdf',
847 ],
848 'language' => '',
849 ]);
850
851 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
852 $entry = new Entry(new User());
853 $proxy->updateEntry($entry, 'http://0.0.0.0');
854
855 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
856 $expectedTitle = 'E282AC';
857 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
858 }
859
860 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
861 {
862 // See http://graphemica.com for more info about the characters
863 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
864 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
865 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
866
867 $tagger = $this->getTaggerMock();
868 $tagger->expects($this->once())
869 ->method('tag');
870
871 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
872
873 $graby = $this->getMockBuilder('Graby\Graby')
874 ->setMethods(['fetchContent'])
875 ->disableOriginalConstructor()
876 ->getMock();
877
878 $graby->expects($this->any())
879 ->method('fetchContent')
880 ->willReturn([
881 'html' => false,
882 'title' => $actualTitle,
883 'url' => '',
884 'headers' => [
885 'content-type' => 'application/pdf',
886 ],
887 'language' => '',
888 ]);
889
890 $proxy = new ContentProxy($graby, $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
891 $entry = new Entry(new User());
892 $proxy->updateEntry($entry, 'http://0.0.0.0');
893
894 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
895 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
896 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
897 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
898 }
899
900 /**
901 * Data provider for testWithChangedUrl.
902 *
903 * Arrays contain the following values:
904 * $entry_url
905 * $origin_url
906 * $content_url
907 * $expected_entry_url
908 * $expected_origin_url
909 * $expected_domain
910 * $processor_result
911 */
912 public function dataForChangedUrl()
913 {
914 return [
915 'normal' => [
916 'http://0.0.0.0',
917 null,
918 'http://1.1.1.1',
919 'http://1.1.1.1',
920 'http://0.0.0.0',
921 '1.1.1.1',
922 false,
923 ],
924 'origin already set' => [
925 'http://0.0.0.0',
926 'http://hello',
927 'http://1.1.1.1',
928 'http://1.1.1.1',
929 'http://hello',
930 '1.1.1.1',
931 false,
932 ],
933 'trailing slash' => [
934 'https://example.com/hello-world',
935 null,
936 'https://example.com/hello-world/',
937 'https://example.com/hello-world/',
938 null,
939 'example.com',
940 false,
941 ],
942 'query string in fetched content' => [
943 'https://example.org/hello',
944 null,
945 'https://example.org/hello?world=1',
946 'https://example.org/hello?world=1',
947 'https://example.org/hello',
948 'example.org',
949 false,
950 ],
951 'fragment in fetched content' => [
952 'https://example.org/hello',
953 null,
954 'https://example.org/hello#world',
955 'https://example.org/hello',
956 null,
957 'example.org',
958 false,
959 ],
960 'fragment and query string in fetched content' => [
961 'https://example.org/hello',
962 null,
963 'https://example.org/hello?foo#world',
964 'https://example.org/hello?foo#world',
965 'https://example.org/hello',
966 'example.org',
967 false,
968 ],
969 'different path and query string in fetch content' => [
970 'https://example.org/hello',
971 null,
972 'https://example.org/world?foo',
973 'https://example.org/world?foo',
974 'https://example.org/hello',
975 'example.org',
976 false,
977 ],
978 'feedproxy ignore list test' => [
979 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
980 null,
981 'https://example.org/hello-wallabag',
982 'https://example.org/hello-wallabag',
983 null,
984 'example.org',
985 true,
986 ],
987 'feedproxy ignore list test with origin url already set' => [
988 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
989 'https://example.org/this-is-source',
990 'https://example.org/hello-wallabag',
991 'https://example.org/hello-wallabag',
992 'https://example.org/this-is-source',
993 'example.org',
994 true,
995 ],
996 'lemonde ignore pattern test' => [
997 'http://www.lemonde.fr/tiny/url',
998 null,
999 'http://example.com/hello-world',
1000 'http://example.com/hello-world',
1001 null,
1002 'example.com',
1003 true,
1004 ],
1005 ];
1006 }
1007
1008 /**
1009 * @dataProvider dataForChangedUrl
1010 */
1011 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain, $processor_result)
1012 {
1013 $tagger = $this->getTaggerMock();
1014 $tagger->expects($this->once())
1015 ->method('tag');
1016
1017 $ruleBasedIgnoreOriginProcessor = $this->getRuleBasedIgnoreOriginProcessorMock();
1018 $ruleBasedIgnoreOriginProcessor->expects($this->once())
1019 ->method('process')
1020 ->willReturn($processor_result);
1021
1022 $proxy = new ContentProxy((new Graby()), $tagger, $ruleBasedIgnoreOriginProcessor, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
1023 $entry = new Entry(new User());
1024 $entry->setOriginUrl($origin_url);
1025 $proxy->updateEntry(
1026 $entry,
1027 $entry_url,
1028 [
1029 'html' => false,
1030 'title' => '',
1031 'url' => $content_url,
1032 'headers' => [
1033 'content-type' => '',
1034 ],
1035 'language' => '',
1036 ],
1037 true
1038 );
1039
1040 $this->assertSame($expected_entry_url, $entry->getUrl());
1041 $this->assertSame($expected_domain, $entry->getDomainName());
1042 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
1043 }
1044
1045 /**
1046 * https://stackoverflow.com/a/18506801.
1047 *
1048 * @param $string
1049 *
1050 * @return string
1051 */
1052 private function strToHex($string)
1053 {
1054 $hex = '';
1055 for ($i = 0; $i < \strlen($string); ++$i) {
1056 $ord = \ord($string[$i]);
1057 $hexCode = dechex($ord);
1058 $hex .= substr('0' . $hexCode, -2);
1059 }
1060
1061 return strtoupper($hex);
1062 }
1063
1064 /**
1065 * Convert hex to string.
1066 *
1067 * @see https://stackoverflow.com/a/18506801
1068 *
1069 * @param $hex
1070 *
1071 * @return string
1072 */
1073 private function hexToStr($hex)
1074 {
1075 $string = '';
1076 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
1077 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
1078 }
1079
1080 return $string;
1081 }
1082
1083 private function getTaggerMock()
1084 {
1085 return $this->getMockBuilder(RuleBasedTagger::class)
1086 ->setMethods(['tag'])
1087 ->disableOriginalConstructor()
1088 ->getMock();
1089 }
1090
1091 private function getRuleBasedIgnoreOriginProcessorMock()
1092 {
1093 return $this->getMockBuilder(RuleBasedIgnoreOriginProcessor::class)
1094 ->setMethods(['process'])
1095 ->disableOriginalConstructor()
1096 ->getMock();
1097 }
1098
1099 private function getLogger()
1100 {
1101 return new NullLogger();
1102 }
1103
1104 private function getValidator($withDefaultMock = true)
1105 {
1106 $mock = $this->getMockBuilder(RecursiveValidator::class)
1107 ->setMethods(['validate'])
1108 ->disableOriginalConstructor()
1109 ->getMock();
1110
1111 if ($withDefaultMock) {
1112 $mock->expects($this->any())
1113 ->method('validate')
1114 ->willReturn(new ConstraintViolationList());
1115 }
1116
1117 return $mock;
1118 }
1119 }