]> git.immae.eu Git - github/wallabag/wallabag.git/blob - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
c7caac1d326a3e60c1ebcbd2532f8bbd599c94dc
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
1 <?php
2
3 namespace Tests\Wallabag\CoreBundle\Helper;
4
5 use Graby\Graby;
6 use Monolog\Handler\TestHandler;
7 use Monolog\Logger;
8 use PHPUnit\Framework\TestCase;
9 use Psr\Log\NullLogger;
10 use Symfony\Component\Validator\ConstraintViolation;
11 use Symfony\Component\Validator\ConstraintViolationList;
12 use Symfony\Component\Validator\Validator\RecursiveValidator;
13 use Wallabag\CoreBundle\Entity\Entry;
14 use Wallabag\CoreBundle\Helper\ContentProxy;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger;
16 use Wallabag\UserBundle\Entity\User;
17
18 class ContentProxyTest extends TestCase
19 {
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
21
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
35 ->willReturn([
36 'html' => false,
37 'title' => '',
38 'url' => '',
39 'content_type' => '',
40 'language' => '',
41 ]);
42
43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
46
47 $this->assertSame('http://user@:80', $entry->getUrl());
48 $this->assertEmpty($entry->getTitle());
49 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
53 $this->assertSame(0.0, $entry->getReadingTime());
54 $this->assertNull($entry->getDomainName());
55 }
56
57 public function testWithEmptyContent()
58 {
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
61 ->method('tag');
62
63 $graby = $this->getMockBuilder('Graby\Graby')
64 ->setMethods(['fetchContent'])
65 ->disableOriginalConstructor()
66 ->getMock();
67
68 $graby->expects($this->any())
69 ->method('fetchContent')
70 ->willReturn([
71 'html' => false,
72 'title' => '',
73 'url' => '',
74 'content_type' => '',
75 'language' => '',
76 ]);
77
78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
81
82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
83 $this->assertEmpty($entry->getTitle());
84 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
87 $this->assertEmpty($entry->getLanguage());
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
90 }
91
92 public function testWithEmptyContentButOG()
93 {
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
96 ->method('tag');
97
98 $graby = $this->getMockBuilder('Graby\Graby')
99 ->setMethods(['fetchContent'])
100 ->disableOriginalConstructor()
101 ->getMock();
102
103 $graby->expects($this->any())
104 ->method('fetchContent')
105 ->willReturn([
106 'html' => false,
107 'title' => '',
108 'url' => '',
109 'content_type' => '',
110 'language' => '',
111 'status' => '',
112 'open_graph' => [
113 'og_title' => 'my title',
114 'og_description' => 'desc',
115 ],
116 ]);
117
118 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
119 $entry = new Entry(new User());
120 $proxy->updateEntry($entry, 'http://domain.io');
121
122 $this->assertSame('http://domain.io', $entry->getUrl());
123 $this->assertSame('my title', $entry->getTitle());
124 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
125 $this->assertEmpty($entry->getPreviewPicture());
126 $this->assertEmpty($entry->getLanguage());
127 $this->assertEmpty($entry->getHttpStatus());
128 $this->assertEmpty($entry->getMimetype());
129 $this->assertSame(0.0, $entry->getReadingTime());
130 $this->assertSame('domain.io', $entry->getDomainName());
131 }
132
133 public function testWithContent()
134 {
135 $tagger = $this->getTaggerMock();
136 $tagger->expects($this->once())
137 ->method('tag');
138
139 $graby = $this->getMockBuilder('Graby\Graby')
140 ->setMethods(['fetchContent'])
141 ->disableOriginalConstructor()
142 ->getMock();
143
144 $graby->expects($this->any())
145 ->method('fetchContent')
146 ->willReturn([
147 'html' => str_repeat('this is my content', 325),
148 'title' => 'this is my title',
149 'url' => 'http://1.1.1.1',
150 'content_type' => 'text/html',
151 'language' => 'fr',
152 'status' => '200',
153 'open_graph' => [
154 'og_title' => 'my OG title',
155 'og_description' => 'OG desc',
156 'og_image' => 'http://3.3.3.3/cover.jpg',
157 ],
158 ]);
159
160 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
161 $entry = new Entry(new User());
162 $proxy->updateEntry($entry, 'http://0.0.0.0');
163
164 $this->assertSame('http://1.1.1.1', $entry->getUrl());
165 $this->assertSame('this is my title', $entry->getTitle());
166 $this->assertContains('content', $entry->getContent());
167 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
168 $this->assertSame('text/html', $entry->getMimetype());
169 $this->assertSame('fr', $entry->getLanguage());
170 $this->assertSame('200', $entry->getHttpStatus());
171 $this->assertSame(4.0, $entry->getReadingTime());
172 $this->assertSame('1.1.1.1', $entry->getDomainName());
173 }
174
175 public function testWithContentAndNoOgImage()
176 {
177 $tagger = $this->getTaggerMock();
178 $tagger->expects($this->once())
179 ->method('tag');
180
181 $graby = $this->getMockBuilder('Graby\Graby')
182 ->setMethods(['fetchContent'])
183 ->disableOriginalConstructor()
184 ->getMock();
185
186 $graby->expects($this->any())
187 ->method('fetchContent')
188 ->willReturn([
189 'html' => str_repeat('this is my content', 325),
190 'title' => 'this is my title',
191 'url' => 'http://1.1.1.1',
192 'content_type' => 'text/html',
193 'language' => 'fr',
194 'status' => '200',
195 'open_graph' => [
196 'og_title' => 'my OG title',
197 'og_description' => 'OG desc',
198 'og_image' => null,
199 ],
200 ]);
201
202 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
203 $entry = new Entry(new User());
204 $proxy->updateEntry($entry, 'http://0.0.0.0');
205
206 $this->assertSame('http://1.1.1.1', $entry->getUrl());
207 $this->assertSame('this is my title', $entry->getTitle());
208 $this->assertContains('content', $entry->getContent());
209 $this->assertNull($entry->getPreviewPicture());
210 $this->assertSame('text/html', $entry->getMimetype());
211 $this->assertSame('fr', $entry->getLanguage());
212 $this->assertSame('200', $entry->getHttpStatus());
213 $this->assertSame(4.0, $entry->getReadingTime());
214 $this->assertSame('1.1.1.1', $entry->getDomainName());
215 }
216
217 public function testWithContentAndContentImage()
218 {
219 $tagger = $this->getTaggerMock();
220 $tagger->expects($this->once())
221 ->method('tag');
222
223 $graby = $this->getMockBuilder('Graby\Graby')
224 ->setMethods(['fetchContent'])
225 ->disableOriginalConstructor()
226 ->getMock();
227
228 $graby->expects($this->any())
229 ->method('fetchContent')
230 ->willReturn([
231 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
232 'title' => 'this is my title',
233 'url' => 'http://1.1.1.1',
234 'content_type' => 'text/html',
235 'language' => 'fr',
236 'status' => '200',
237 'open_graph' => [
238 'og_title' => 'my OG title',
239 'og_description' => 'OG desc',
240 'og_image' => null,
241 ],
242 ]);
243
244 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
245 $entry = new Entry(new User());
246 $proxy->updateEntry($entry, 'http://0.0.0.0');
247
248 $this->assertSame('http://1.1.1.1', $entry->getUrl());
249 $this->assertSame('this is my title', $entry->getTitle());
250 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
251 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
252 $this->assertSame('text/html', $entry->getMimetype());
253 $this->assertSame('fr', $entry->getLanguage());
254 $this->assertSame('200', $entry->getHttpStatus());
255 $this->assertSame(0.0, $entry->getReadingTime());
256 $this->assertSame('1.1.1.1', $entry->getDomainName());
257 }
258
259 public function testWithContentImageAndOgImage()
260 {
261 $tagger = $this->getTaggerMock();
262 $tagger->expects($this->once())
263 ->method('tag');
264
265 $graby = $this->getMockBuilder('Graby\Graby')
266 ->setMethods(['fetchContent'])
267 ->disableOriginalConstructor()
268 ->getMock();
269
270 $graby->expects($this->any())
271 ->method('fetchContent')
272 ->willReturn([
273 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
274 'title' => 'this is my title',
275 'url' => 'http://1.1.1.1',
276 'content_type' => 'text/html',
277 'language' => 'fr',
278 'status' => '200',
279 'open_graph' => [
280 'og_title' => 'my OG title',
281 'og_description' => 'OG desc',
282 'og_image' => 'http://3.3.3.3/cover.jpg',
283 ],
284 ]);
285
286 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
287 $entry = new Entry(new User());
288 $proxy->updateEntry($entry, 'http://0.0.0.0');
289
290 $this->assertSame('http://1.1.1.1', $entry->getUrl());
291 $this->assertSame('this is my title', $entry->getTitle());
292 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
293 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
294 $this->assertSame('text/html', $entry->getMimetype());
295 $this->assertSame('fr', $entry->getLanguage());
296 $this->assertSame('200', $entry->getHttpStatus());
297 $this->assertSame(0.0, $entry->getReadingTime());
298 $this->assertSame('1.1.1.1', $entry->getDomainName());
299 }
300
301 public function testWithContentAndBadLanguage()
302 {
303 $tagger = $this->getTaggerMock();
304 $tagger->expects($this->once())
305 ->method('tag');
306
307 $validator = $this->getValidator(false);
308 $validator->expects($this->once())
309 ->method('validate')
310 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
311
312 $graby = $this->getMockBuilder('Graby\Graby')
313 ->setMethods(['fetchContent'])
314 ->disableOriginalConstructor()
315 ->getMock();
316
317 $graby->expects($this->any())
318 ->method('fetchContent')
319 ->willReturn([
320 'html' => str_repeat('this is my content', 325),
321 'title' => 'this is my title',
322 'url' => 'http://1.1.1.1',
323 'content_type' => 'text/html',
324 'language' => 'dontexist',
325 'status' => '200',
326 ]);
327
328 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
329 $entry = new Entry(new User());
330 $proxy->updateEntry($entry, 'http://0.0.0.0');
331
332 $this->assertSame('http://1.1.1.1', $entry->getUrl());
333 $this->assertSame('this is my title', $entry->getTitle());
334 $this->assertContains('content', $entry->getContent());
335 $this->assertSame('text/html', $entry->getMimetype());
336 $this->assertNull($entry->getLanguage());
337 $this->assertSame('200', $entry->getHttpStatus());
338 $this->assertSame(4.0, $entry->getReadingTime());
339 $this->assertSame('1.1.1.1', $entry->getDomainName());
340 }
341
342 public function testWithContentAndBadOgImage()
343 {
344 $tagger = $this->getTaggerMock();
345 $tagger->expects($this->once())
346 ->method('tag');
347
348 $validator = $this->getValidator(false);
349 $validator->expects($this->exactly(2))
350 ->method('validate')
351 ->will($this->onConsecutiveCalls(
352 new ConstraintViolationList(),
353 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
354 ));
355
356 $graby = $this->getMockBuilder('Graby\Graby')
357 ->setMethods(['fetchContent'])
358 ->disableOriginalConstructor()
359 ->getMock();
360
361 $graby->expects($this->any())
362 ->method('fetchContent')
363 ->willReturn([
364 'html' => str_repeat('this is my content', 325),
365 'title' => 'this is my title',
366 'url' => 'http://1.1.1.1',
367 'content_type' => 'text/html',
368 'language' => 'fr',
369 'status' => '200',
370 'open_graph' => [
371 'og_title' => 'my OG title',
372 'og_description' => 'OG desc',
373 'og_image' => 'https://',
374 ],
375 ]);
376
377 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
378 $entry = new Entry(new User());
379 $proxy->updateEntry($entry, 'http://0.0.0.0');
380
381 $this->assertSame('http://1.1.1.1', $entry->getUrl());
382 $this->assertSame('this is my title', $entry->getTitle());
383 $this->assertContains('content', $entry->getContent());
384 $this->assertNull($entry->getPreviewPicture());
385 $this->assertSame('text/html', $entry->getMimetype());
386 $this->assertSame('fr', $entry->getLanguage());
387 $this->assertSame('200', $entry->getHttpStatus());
388 $this->assertSame(4.0, $entry->getReadingTime());
389 $this->assertSame('1.1.1.1', $entry->getDomainName());
390 }
391
392 public function testWithForcedContent()
393 {
394 $tagger = $this->getTaggerMock();
395 $tagger->expects($this->once())
396 ->method('tag');
397
398 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
399 $entry = new Entry(new User());
400 $proxy->updateEntry(
401 $entry,
402 'http://0.0.0.0',
403 [
404 'html' => str_repeat('this is my content', 325),
405 'title' => 'this is my title',
406 'url' => 'http://1.1.1.1',
407 'content_type' => 'text/html',
408 'language' => 'fr',
409 'date' => '1395635872',
410 'authors' => ['Jeremy', 'Nico', 'Thomas'],
411 'all_headers' => [
412 'Cache-Control' => 'no-cache',
413 ],
414 ]
415 );
416
417 $this->assertSame('http://1.1.1.1', $entry->getUrl());
418 $this->assertSame('this is my title', $entry->getTitle());
419 $this->assertContains('content', $entry->getContent());
420 $this->assertSame('text/html', $entry->getMimetype());
421 $this->assertSame('fr', $entry->getLanguage());
422 $this->assertSame(4.0, $entry->getReadingTime());
423 $this->assertSame('1.1.1.1', $entry->getDomainName());
424 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
425 $this->assertContains('Jeremy', $entry->getPublishedBy());
426 $this->assertContains('Nico', $entry->getPublishedBy());
427 $this->assertContains('Thomas', $entry->getPublishedBy());
428 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
429 $this->assertContains('no-cache', $entry->getHeaders());
430 }
431
432 public function testWithForcedContentAndDatetime()
433 {
434 $tagger = $this->getTaggerMock();
435 $tagger->expects($this->once())
436 ->method('tag');
437
438 $logHandler = new TestHandler();
439 $logger = new Logger('test', [$logHandler]);
440
441 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
442 $entry = new Entry(new User());
443 $proxy->updateEntry(
444 $entry,
445 'http://1.1.1.1',
446 [
447 'html' => str_repeat('this is my content', 325),
448 'title' => 'this is my title',
449 'url' => 'http://1.1.1.1',
450 'content_type' => 'text/html',
451 'language' => 'fr',
452 'date' => '2016-09-08T11:55:58+0200',
453 ]
454 );
455
456 $this->assertSame('http://1.1.1.1', $entry->getUrl());
457 $this->assertSame('this is my title', $entry->getTitle());
458 $this->assertContains('content', $entry->getContent());
459 $this->assertSame('text/html', $entry->getMimetype());
460 $this->assertSame('fr', $entry->getLanguage());
461 $this->assertSame(4.0, $entry->getReadingTime());
462 $this->assertSame('1.1.1.1', $entry->getDomainName());
463 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
464 }
465
466 public function testWithForcedContentAndBadDate()
467 {
468 $tagger = $this->getTaggerMock();
469 $tagger->expects($this->once())
470 ->method('tag');
471
472 $logger = new Logger('foo');
473 $handler = new TestHandler();
474 $logger->pushHandler($handler);
475
476 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
477 $entry = new Entry(new User());
478 $proxy->updateEntry(
479 $entry,
480 'http://1.1.1.1',
481 [
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
485 'content_type' => 'text/html',
486 'language' => 'fr',
487 'date' => '01 02 2012',
488 ]
489 );
490
491 $this->assertSame('http://1.1.1.1', $entry->getUrl());
492 $this->assertSame('this is my title', $entry->getTitle());
493 $this->assertContains('content', $entry->getContent());
494 $this->assertSame('text/html', $entry->getMimetype());
495 $this->assertSame('fr', $entry->getLanguage());
496 $this->assertSame(4.0, $entry->getReadingTime());
497 $this->assertSame('1.1.1.1', $entry->getDomainName());
498 $this->assertNull($entry->getPublishedAt());
499
500 $records = $handler->getRecords();
501
502 $this->assertCount(3, $records);
503 $this->assertContains('Error while defining date', $records[0]['message']);
504 }
505
506 public function testTaggerThrowException()
507 {
508 $tagger = $this->getTaggerMock();
509 $tagger->expects($this->once())
510 ->method('tag')
511 ->will($this->throwException(new \Exception()));
512
513 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
514 $entry = new Entry(new User());
515 $proxy->updateEntry(
516 $entry,
517 'http://1.1.1.1',
518 [
519 'html' => str_repeat('this is my content', 325),
520 'title' => 'this is my title',
521 'url' => 'http://1.1.1.1',
522 'content_type' => 'text/html',
523 'language' => 'fr',
524 ]
525 );
526
527 $this->assertCount(0, $entry->getTags());
528 }
529
530 public function dataForCrazyHtml()
531 {
532 return [
533 'script and comment' => [
534 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
535 'lol',
536 ],
537 'script' => [
538 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
539 'script',
540 ],
541 ];
542 }
543
544 /**
545 * @dataProvider dataForCrazyHtml
546 */
547 public function testWithCrazyHtmlContent($html, $escapedString)
548 {
549 $tagger = $this->getTaggerMock();
550 $tagger->expects($this->once())
551 ->method('tag');
552
553 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
554 $entry = new Entry(new User());
555 $proxy->updateEntry(
556 $entry,
557 'http://1.1.1.1',
558 [
559 'html' => $html,
560 'title' => 'this is my title',
561 'url' => 'http://1.1.1.1',
562 'content_type' => 'text/html',
563 'language' => 'fr',
564 'status' => '200',
565 'open_graph' => [
566 'og_title' => 'my OG title',
567 'og_description' => 'OG desc',
568 'og_image' => 'http://3.3.3.3/cover.jpg',
569 ],
570 ]
571 );
572
573 $this->assertSame('http://1.1.1.1', $entry->getUrl());
574 $this->assertSame('this is my title', $entry->getTitle());
575 $this->assertNotContains($escapedString, $entry->getContent());
576 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
577 $this->assertSame('text/html', $entry->getMimetype());
578 $this->assertSame('fr', $entry->getLanguage());
579 $this->assertSame('200', $entry->getHttpStatus());
580 $this->assertSame('1.1.1.1', $entry->getDomainName());
581 }
582
583 public function testWithImageAsContent()
584 {
585 $tagger = $this->getTaggerMock();
586 $tagger->expects($this->once())
587 ->method('tag');
588
589 $graby = $this->getMockBuilder('Graby\Graby')
590 ->setMethods(['fetchContent'])
591 ->disableOriginalConstructor()
592 ->getMock();
593
594 $graby->expects($this->any())
595 ->method('fetchContent')
596 ->willReturn([
597 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
598 'title' => 'this is my title',
599 'url' => 'http://1.1.1.1/image.jpg',
600 'content_type' => 'image/jpeg',
601 'status' => '200',
602 'open_graph' => [],
603 ]);
604
605 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
606 $entry = new Entry(new User());
607 $proxy->updateEntry($entry, 'http://0.0.0.0');
608
609 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
610 $this->assertSame('this is my title', $entry->getTitle());
611 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
612 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
613 $this->assertSame('image/jpeg', $entry->getMimetype());
614 $this->assertSame('200', $entry->getHttpStatus());
615 $this->assertSame('1.1.1.1', $entry->getDomainName());
616 }
617
618 public function testWebsiteWithValidUTF8Title_doNothing()
619 {
620 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
621 // See http://graphemica.com for more info about the characters
622 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
623 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
624
625 $tagger = $this->getTaggerMock();
626 $tagger->expects($this->once())
627 ->method('tag');
628
629 $graby = $this->getMockBuilder('Graby\Graby')
630 ->setMethods(['fetchContent'])
631 ->disableOriginalConstructor()
632 ->getMock();
633
634 $graby->expects($this->any())
635 ->method('fetchContent')
636 ->willReturn([
637 'html' => false,
638 'title' => $actualTitle,
639 'url' => '',
640 'content_type' => 'text/html',
641 'language' => '',
642 ]);
643
644 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
645 $entry = new Entry(new User());
646 $proxy->updateEntry($entry, 'http://0.0.0.0');
647
648 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
649 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
650 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
651 }
652
653 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
654 {
655 // See http://graphemica.com for more info about the characters
656 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
657 // The correct UTF-8 € character (U+20AC) is E282AC
658 $actualTitle = $this->hexToStr('61' . '80' . '62');
659
660 $tagger = $this->getTaggerMock();
661 $tagger->expects($this->once())
662 ->method('tag');
663
664 $graby = $this->getMockBuilder('Graby\Graby')
665 ->setMethods(['fetchContent'])
666 ->disableOriginalConstructor()
667 ->getMock();
668
669 $graby->expects($this->any())
670 ->method('fetchContent')
671 ->willReturn([
672 'html' => false,
673 'title' => $actualTitle,
674 'url' => '',
675 'content_type' => 'text/html',
676 'language' => '',
677 ]);
678
679 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
680 $entry = new Entry(new User());
681 $proxy->updateEntry($entry, 'http://0.0.0.0');
682
683 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
684 $expectedTitle = '61' . '62';
685 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
686 }
687
688 public function testPdfWithUTF16BETitle_convertToUTF8()
689 {
690 // See http://graphemica.com for more info about the characters
691 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
692 $actualTitle = $this->hexToStr('D83DDE3B');
693
694 $tagger = $this->getTaggerMock();
695 $tagger->expects($this->once())
696 ->method('tag');
697
698 $graby = $this->getMockBuilder('Graby\Graby')
699 ->setMethods(['fetchContent'])
700 ->disableOriginalConstructor()
701 ->getMock();
702
703 $graby->expects($this->any())
704 ->method('fetchContent')
705 ->willReturn([
706 'html' => false,
707 'title' => $actualTitle,
708 'url' => '',
709 'content_type' => 'application/pdf',
710 'language' => '',
711 ]);
712
713 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
714 $entry = new Entry(new User());
715 $proxy->updateEntry($entry, 'http://0.0.0.0');
716
717 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
718 $expectedTitle = 'F09F98BB';
719 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
720 }
721
722 public function testPdfWithUTF8Title_doNothing()
723 {
724 // See http://graphemica.com for more info about the characters
725 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
726 $actualTitle = $this->hexToStr('F09F98BB');
727
728 $tagger = $this->getTaggerMock();
729 $tagger->expects($this->once())
730 ->method('tag');
731
732 $graby = $this->getMockBuilder('Graby\Graby')
733 ->setMethods(['fetchContent'])
734 ->disableOriginalConstructor()
735 ->getMock();
736
737 $graby->expects($this->any())
738 ->method('fetchContent')
739 ->willReturn([
740 'html' => false,
741 'title' => $actualTitle,
742 'url' => '',
743 'content_type' => 'application/pdf',
744 'language' => '',
745 ]);
746
747 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
748 $entry = new Entry(new User());
749 $proxy->updateEntry($entry, 'http://0.0.0.0');
750
751 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
752 $expectedTitle = 'F09F98BB';
753 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
754 }
755
756 public function testPdfWithWINDOWS1252Title_convertToUTF8()
757 {
758 // See http://graphemica.com for more info about the characters
759 // '€' (80) in hexadecimal and WINDOWS-1252
760 $actualTitle = $this->hexToStr('80');
761
762 $tagger = $this->getTaggerMock();
763 $tagger->expects($this->once())
764 ->method('tag');
765
766 $graby = $this->getMockBuilder('Graby\Graby')
767 ->setMethods(['fetchContent'])
768 ->disableOriginalConstructor()
769 ->getMock();
770
771 $graby->expects($this->any())
772 ->method('fetchContent')
773 ->willReturn([
774 'html' => false,
775 'title' => $actualTitle,
776 'url' => '',
777 'content_type' => 'application/pdf',
778 'language' => '',
779 ]);
780
781 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
782 $entry = new Entry(new User());
783 $proxy->updateEntry($entry, 'http://0.0.0.0');
784
785 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
786 $expectedTitle = 'E282AC';
787 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
788 }
789
790 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
791 {
792 // See http://graphemica.com for more info about the characters
793 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
794 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
795 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
796
797 $tagger = $this->getTaggerMock();
798 $tagger->expects($this->once())
799 ->method('tag');
800
801 $graby = $this->getMockBuilder('Graby\Graby')
802 ->setMethods(['fetchContent'])
803 ->disableOriginalConstructor()
804 ->getMock();
805
806 $graby->expects($this->any())
807 ->method('fetchContent')
808 ->willReturn([
809 'html' => false,
810 'title' => $actualTitle,
811 'url' => '',
812 'content_type' => 'application/pdf',
813 'language' => '',
814 ]);
815
816 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
817 $entry = new Entry(new User());
818 $proxy->updateEntry($entry, 'http://0.0.0.0');
819
820 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
821 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
822 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
823 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
824 }
825
826 /**
827 * Data provider for testWithChangedUrl.
828 *
829 * Arrays contain the following values:
830 * $entry_url
831 * $origin_url
832 * $content_url
833 * $expected_entry_url
834 * $expected_origin_url
835 * $expected_domain
836 */
837 public function dataForChangedUrl()
838 {
839 return [
840 'normal' => [
841 'http://0.0.0.0',
842 null,
843 'http://1.1.1.1',
844 'http://1.1.1.1',
845 'http://0.0.0.0',
846 '1.1.1.1',
847 ],
848 'origin already set' => [
849 'http://0.0.0.0',
850 'http://hello',
851 'http://1.1.1.1',
852 'http://1.1.1.1',
853 'http://hello',
854 '1.1.1.1',
855 ],
856 'trailing slash' => [
857 'https://example.com/hello-world',
858 null,
859 'https://example.com/hello-world/',
860 'https://example.com/hello-world/',
861 null,
862 'example.com',
863 ],
864 'query string in fetched content' => [
865 'https://example.org/hello',
866 null,
867 'https://example.org/hello?world=1',
868 'https://example.org/hello?world=1',
869 'https://example.org/hello',
870 'example.org',
871 ],
872 'fragment in fetched content' => [
873 'https://example.org/hello',
874 null,
875 'https://example.org/hello#world',
876 'https://example.org/hello',
877 null,
878 'example.org',
879 ],
880 'fragment and query string in fetched content' => [
881 'https://example.org/hello',
882 null,
883 'https://example.org/hello?foo#world',
884 'https://example.org/hello?foo#world',
885 'https://example.org/hello',
886 'example.org',
887 ],
888 'different path and query string in fetch content' => [
889 'https://example.org/hello',
890 null,
891 'https://example.org/world?foo',
892 'https://example.org/world?foo',
893 'https://example.org/hello',
894 'example.org',
895 ],
896 'feedproxy ignore list test' => [
897 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
898 null,
899 'https://example.org/hello-wallabag',
900 'https://example.org/hello-wallabag',
901 null,
902 'example.org',
903 ],
904 'feedproxy ignore list test with origin url already set' => [
905 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
906 'https://example.org/this-is-source',
907 'https://example.org/hello-wallabag',
908 'https://example.org/hello-wallabag',
909 'https://example.org/this-is-source',
910 'example.org',
911 ],
912 'lemonde ignore pattern test' => [
913 'http://www.lemonde.fr/tiny/url',
914 null,
915 'http://example.com/hello-world',
916 'http://example.com/hello-world',
917 null,
918 'example.com',
919 ],
920 ];
921 }
922
923 /**
924 * @dataProvider dataForChangedUrl
925 */
926 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
927 {
928 $tagger = $this->getTaggerMock();
929 $tagger->expects($this->once())
930 ->method('tag');
931
932 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
933 $entry = new Entry(new User());
934 $entry->setOriginUrl($origin_url);
935 $proxy->updateEntry(
936 $entry,
937 $entry_url,
938 [
939 'html' => false,
940 'title' => '',
941 'url' => $content_url,
942 'content_type' => '',
943 'language' => '',
944 ],
945 true
946 );
947
948 $this->assertSame($expected_entry_url, $entry->getUrl());
949 $this->assertSame($expected_domain, $entry->getDomainName());
950 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
951 }
952
953 /**
954 * https://stackoverflow.com/a/18506801.
955 *
956 * @param $string
957 *
958 * @return string
959 */
960 private function strToHex($string)
961 {
962 $hex = '';
963 for ($i = 0; $i < \strlen($string); ++$i) {
964 $ord = \ord($string[$i]);
965 $hexCode = dechex($ord);
966 $hex .= substr('0' . $hexCode, -2);
967 }
968
969 return strtoupper($hex);
970 }
971
972 /**
973 * https://stackoverflow.com/a/18506801.
974 *
975 * @param $hex
976 *
977 * @return string
978 */
979 private function hexToStr($hex)
980 {
981 $string = '';
982 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
983 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
984 }
985
986 return $string;
987 }
988
989 private function getTaggerMock()
990 {
991 return $this->getMockBuilder(RuleBasedTagger::class)
992 ->setMethods(['tag'])
993 ->disableOriginalConstructor()
994 ->getMock();
995 }
996
997 private function getLogger()
998 {
999 return new NullLogger();
1000 }
1001
1002 private function getValidator($withDefaultMock = true)
1003 {
1004 $mock = $this->getMockBuilder(RecursiveValidator::class)
1005 ->setMethods(['validate'])
1006 ->disableOriginalConstructor()
1007 ->getMock();
1008
1009 if ($withDefaultMock) {
1010 $mock->expects($this->any())
1011 ->method('validate')
1012 ->willReturn(new ConstraintViolationList());
1013 }
1014
1015 return $mock;
1016 }
1017 }