]> git.immae.eu Git - github/wallabag/wallabag.git/blob - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
CS & fix tests
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
1 <?php
2
3 namespace Tests\Wallabag\CoreBundle\Helper;
4
5 use Graby\Graby;
6 use Monolog\Handler\TestHandler;
7 use Monolog\Logger;
8 use PHPUnit\Framework\TestCase;
9 use Psr\Log\NullLogger;
10 use Symfony\Component\Validator\ConstraintViolation;
11 use Symfony\Component\Validator\ConstraintViolationList;
12 use Symfony\Component\Validator\Validator\RecursiveValidator;
13 use Wallabag\CoreBundle\Entity\Entry;
14 use Wallabag\CoreBundle\Helper\ContentProxy;
15 use Wallabag\CoreBundle\Helper\RuleBasedTagger;
16 use Wallabag\UserBundle\Entity\User;
17
18 class ContentProxyTest extends TestCase
19 {
20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
21
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
29 ->setMethods(['fetchContent'])
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
35 ->willReturn([
36 'html' => false,
37 'title' => '',
38 'url' => '',
39 'headers' => [
40 'content-type' => '',
41 ],
42 'language' => '',
43 ]);
44
45 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
46 $entry = new Entry(new User());
47 $proxy->updateEntry($entry, 'http://user@:80');
48
49 $this->assertSame('http://user@:80', $entry->getUrl());
50 $this->assertEmpty($entry->getTitle());
51 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
52 $this->assertEmpty($entry->getPreviewPicture());
53 $this->assertEmpty($entry->getMimetype());
54 $this->assertEmpty($entry->getLanguage());
55 $this->assertSame(0.0, $entry->getReadingTime());
56 $this->assertNull($entry->getDomainName());
57 }
58
59 public function testWithEmptyContent()
60 {
61 $tagger = $this->getTaggerMock();
62 $tagger->expects($this->once())
63 ->method('tag');
64
65 $graby = $this->getMockBuilder('Graby\Graby')
66 ->setMethods(['fetchContent'])
67 ->disableOriginalConstructor()
68 ->getMock();
69
70 $graby->expects($this->any())
71 ->method('fetchContent')
72 ->willReturn([
73 'html' => false,
74 'title' => '',
75 'url' => '',
76 'headers' => [
77 'content-type' => '',
78 ],
79 'language' => '',
80 ]);
81
82 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
83 $entry = new Entry(new User());
84 $proxy->updateEntry($entry, 'http://0.0.0.0');
85
86 $this->assertSame('http://0.0.0.0', $entry->getUrl());
87 $this->assertEmpty($entry->getTitle());
88 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
89 $this->assertEmpty($entry->getPreviewPicture());
90 $this->assertEmpty($entry->getMimetype());
91 $this->assertEmpty($entry->getLanguage());
92 $this->assertSame(0.0, $entry->getReadingTime());
93 $this->assertSame('0.0.0.0', $entry->getDomainName());
94 }
95
96 public function testWithEmptyContentButOG()
97 {
98 $tagger = $this->getTaggerMock();
99 $tagger->expects($this->once())
100 ->method('tag');
101
102 $graby = $this->getMockBuilder('Graby\Graby')
103 ->setMethods(['fetchContent'])
104 ->disableOriginalConstructor()
105 ->getMock();
106
107 $graby->expects($this->any())
108 ->method('fetchContent')
109 ->willReturn([
110 'html' => false,
111 'title' => 'my title',
112 'url' => '',
113 'headers' => [
114 'content-type' => '',
115 ],
116 'language' => '',
117 'status' => '',
118 'description' => 'desc',
119 ]);
120
121 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
122 $entry = new Entry(new User());
123 $proxy->updateEntry($entry, 'http://domain.io');
124
125 $this->assertSame('http://domain.io', $entry->getUrl());
126 $this->assertSame('my title', $entry->getTitle());
127 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
128 $this->assertEmpty($entry->getPreviewPicture());
129 $this->assertEmpty($entry->getLanguage());
130 $this->assertEmpty($entry->getHttpStatus());
131 $this->assertEmpty($entry->getMimetype());
132 $this->assertSame(0.0, $entry->getReadingTime());
133 $this->assertSame('domain.io', $entry->getDomainName());
134 }
135
136 public function testWithContent()
137 {
138 $tagger = $this->getTaggerMock();
139 $tagger->expects($this->once())
140 ->method('tag');
141
142 $graby = $this->getMockBuilder('Graby\Graby')
143 ->setMethods(['fetchContent'])
144 ->disableOriginalConstructor()
145 ->getMock();
146
147 $graby->expects($this->any())
148 ->method('fetchContent')
149 ->willReturn([
150 'html' => str_repeat('this is my content', 325),
151 'title' => 'this is my title',
152 'url' => 'http://1.1.1.1',
153 'language' => 'fr',
154 'status' => '200',
155 'description' => 'OG desc',
156 'image' => 'http://3.3.3.3/cover.jpg',
157 'headers' => [
158 'content-type' => 'text/html',
159 ],
160 ]);
161
162 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
163 $entry = new Entry(new User());
164 $proxy->updateEntry($entry, 'http://0.0.0.0');
165
166 $this->assertSame('http://1.1.1.1', $entry->getUrl());
167 $this->assertSame('this is my title', $entry->getTitle());
168 $this->assertContains('content', $entry->getContent());
169 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
170 $this->assertSame('text/html', $entry->getMimetype());
171 $this->assertSame('fr', $entry->getLanguage());
172 $this->assertSame('200', $entry->getHttpStatus());
173 $this->assertSame(4.0, $entry->getReadingTime());
174 $this->assertSame('1.1.1.1', $entry->getDomainName());
175 }
176
177 public function testWithContentAndNoOgImage()
178 {
179 $tagger = $this->getTaggerMock();
180 $tagger->expects($this->once())
181 ->method('tag');
182
183 $graby = $this->getMockBuilder('Graby\Graby')
184 ->setMethods(['fetchContent'])
185 ->disableOriginalConstructor()
186 ->getMock();
187
188 $graby->expects($this->any())
189 ->method('fetchContent')
190 ->willReturn([
191 'html' => str_repeat('this is my content', 325),
192 'title' => 'this is my title',
193 'url' => 'http://1.1.1.1',
194 'language' => 'fr',
195 'status' => '200',
196 'description' => 'OG desc',
197 'image' => null,
198 'headers' => [
199 'content-type' => 'text/html',
200 ],
201 ]);
202
203 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
204 $entry = new Entry(new User());
205 $proxy->updateEntry($entry, 'http://0.0.0.0');
206
207 $this->assertSame('http://1.1.1.1', $entry->getUrl());
208 $this->assertSame('this is my title', $entry->getTitle());
209 $this->assertContains('content', $entry->getContent());
210 $this->assertNull($entry->getPreviewPicture());
211 $this->assertSame('text/html', $entry->getMimetype());
212 $this->assertSame('fr', $entry->getLanguage());
213 $this->assertSame('200', $entry->getHttpStatus());
214 $this->assertSame(4.0, $entry->getReadingTime());
215 $this->assertSame('1.1.1.1', $entry->getDomainName());
216 }
217
218 public function testWithContentAndContentImage()
219 {
220 $tagger = $this->getTaggerMock();
221 $tagger->expects($this->once())
222 ->method('tag');
223
224 $graby = $this->getMockBuilder('Graby\Graby')
225 ->setMethods(['fetchContent'])
226 ->disableOriginalConstructor()
227 ->getMock();
228
229 $graby->expects($this->any())
230 ->method('fetchContent')
231 ->willReturn([
232 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
233 'title' => 'this is my title',
234 'url' => 'http://1.1.1.1',
235 'content_type' => 'text/html',
236 'language' => 'fr',
237 'status' => '200',
238 'open_graph' => [
239 'og_title' => 'my OG title',
240 'og_description' => 'OG desc',
241 'og_image' => null,
242 ],
243 ]);
244
245 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
246 $entry = new Entry(new User());
247 $proxy->updateEntry($entry, 'http://0.0.0.0');
248
249 $this->assertSame('http://1.1.1.1', $entry->getUrl());
250 $this->assertSame('this is my title', $entry->getTitle());
251 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
252 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
253 $this->assertSame('text/html', $entry->getMimetype());
254 $this->assertSame('fr', $entry->getLanguage());
255 $this->assertSame('200', $entry->getHttpStatus());
256 $this->assertSame(0.0, $entry->getReadingTime());
257 $this->assertSame('1.1.1.1', $entry->getDomainName());
258 }
259
260 public function testWithContentImageAndOgImage()
261 {
262 $tagger = $this->getTaggerMock();
263 $tagger->expects($this->once())
264 ->method('tag');
265
266 $graby = $this->getMockBuilder('Graby\Graby')
267 ->setMethods(['fetchContent'])
268 ->disableOriginalConstructor()
269 ->getMock();
270
271 $graby->expects($this->any())
272 ->method('fetchContent')
273 ->willReturn([
274 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
275 'title' => 'this is my title',
276 'url' => 'http://1.1.1.1',
277 'content_type' => 'text/html',
278 'language' => 'fr',
279 'status' => '200',
280 'open_graph' => [
281 'og_title' => 'my OG title',
282 'og_description' => 'OG desc',
283 'og_image' => 'http://3.3.3.3/cover.jpg',
284 ],
285 ]);
286
287 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
288 $entry = new Entry(new User());
289 $proxy->updateEntry($entry, 'http://0.0.0.0');
290
291 $this->assertSame('http://1.1.1.1', $entry->getUrl());
292 $this->assertSame('this is my title', $entry->getTitle());
293 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
294 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
295 $this->assertSame('text/html', $entry->getMimetype());
296 $this->assertSame('fr', $entry->getLanguage());
297 $this->assertSame('200', $entry->getHttpStatus());
298 $this->assertSame(0.0, $entry->getReadingTime());
299 $this->assertSame('1.1.1.1', $entry->getDomainName());
300 }
301
302 public function testWithContentAndBadLanguage()
303 {
304 $tagger = $this->getTaggerMock();
305 $tagger->expects($this->once())
306 ->method('tag');
307
308 $validator = $this->getValidator(false);
309 $validator->expects($this->once())
310 ->method('validate')
311 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
312
313 $graby = $this->getMockBuilder('Graby\Graby')
314 ->setMethods(['fetchContent'])
315 ->disableOriginalConstructor()
316 ->getMock();
317
318 $graby->expects($this->any())
319 ->method('fetchContent')
320 ->willReturn([
321 'html' => str_repeat('this is my content', 325),
322 'title' => 'this is my title',
323 'url' => 'http://1.1.1.1',
324 'language' => 'dontexist',
325 'status' => '200',
326 'headers' => [
327 'content-type' => 'text/html',
328 ],
329 ]);
330
331 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
332 $entry = new Entry(new User());
333 $proxy->updateEntry($entry, 'http://0.0.0.0');
334
335 $this->assertSame('http://1.1.1.1', $entry->getUrl());
336 $this->assertSame('this is my title', $entry->getTitle());
337 $this->assertContains('content', $entry->getContent());
338 $this->assertSame('text/html', $entry->getMimetype());
339 $this->assertNull($entry->getLanguage());
340 $this->assertSame('200', $entry->getHttpStatus());
341 $this->assertSame(4.0, $entry->getReadingTime());
342 $this->assertSame('1.1.1.1', $entry->getDomainName());
343 }
344
345 public function testWithContentAndBadOgImage()
346 {
347 $tagger = $this->getTaggerMock();
348 $tagger->expects($this->once())
349 ->method('tag');
350
351 $validator = $this->getValidator(false);
352 $validator->expects($this->exactly(2))
353 ->method('validate')
354 ->will($this->onConsecutiveCalls(
355 new ConstraintViolationList(),
356 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
357 ));
358
359 $graby = $this->getMockBuilder('Graby\Graby')
360 ->setMethods(['fetchContent'])
361 ->disableOriginalConstructor()
362 ->getMock();
363
364 $graby->expects($this->any())
365 ->method('fetchContent')
366 ->willReturn([
367 'html' => str_repeat('this is my content', 325),
368 'title' => 'this is my title',
369 'url' => 'http://1.1.1.1',
370 'headers' => [
371 'content-type' => 'text/html',
372 ],
373 'language' => 'fr',
374 'status' => '200',
375 'description' => 'OG desc',
376 'image' => 'https://',
377 ]);
378
379 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
380 $entry = new Entry(new User());
381 $proxy->updateEntry($entry, 'http://0.0.0.0');
382
383 $this->assertSame('http://1.1.1.1', $entry->getUrl());
384 $this->assertSame('this is my title', $entry->getTitle());
385 $this->assertContains('content', $entry->getContent());
386 $this->assertNull($entry->getPreviewPicture());
387 $this->assertSame('text/html', $entry->getMimetype());
388 $this->assertSame('fr', $entry->getLanguage());
389 $this->assertSame('200', $entry->getHttpStatus());
390 $this->assertSame(4.0, $entry->getReadingTime());
391 $this->assertSame('1.1.1.1', $entry->getDomainName());
392 }
393
394 public function testWithForcedContent()
395 {
396 $tagger = $this->getTaggerMock();
397 $tagger->expects($this->once())
398 ->method('tag');
399
400 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
401 $entry = new Entry(new User());
402 $proxy->updateEntry(
403 $entry,
404 'http://0.0.0.0',
405 [
406 'html' => str_repeat('this is my content', 325),
407 'title' => 'this is my title',
408 'url' => 'http://1.1.1.1',
409 'language' => 'fr',
410 'date' => '1395635872',
411 'authors' => ['Jeremy', 'Nico', 'Thomas'],
412 'headers' => [
413 'cache-control' => 'no-cache',
414 'content-type' => 'text/html',
415 ],
416 ]
417 );
418
419 $this->assertSame('http://1.1.1.1', $entry->getUrl());
420 $this->assertSame('this is my title', $entry->getTitle());
421 $this->assertContains('content', $entry->getContent());
422 $this->assertSame('text/html', $entry->getMimetype());
423 $this->assertSame('fr', $entry->getLanguage());
424 $this->assertSame(4.0, $entry->getReadingTime());
425 $this->assertSame('1.1.1.1', $entry->getDomainName());
426 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
427 $this->assertContains('Jeremy', $entry->getPublishedBy());
428 $this->assertContains('Nico', $entry->getPublishedBy());
429 $this->assertContains('Thomas', $entry->getPublishedBy());
430 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
431 $this->assertContains('no-cache', $entry->getHeaders());
432 }
433
434 public function testWithForcedContentAndDatetime()
435 {
436 $tagger = $this->getTaggerMock();
437 $tagger->expects($this->once())
438 ->method('tag');
439
440 $logHandler = new TestHandler();
441 $logger = new Logger('test', [$logHandler]);
442
443 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
444 $entry = new Entry(new User());
445 $proxy->updateEntry(
446 $entry,
447 'http://1.1.1.1',
448 [
449 'html' => str_repeat('this is my content', 325),
450 'title' => 'this is my title',
451 'url' => 'http://1.1.1.1',
452 'language' => 'fr',
453 'date' => '2016-09-08T11:55:58+0200',
454 'headers' => [
455 'content-type' => 'text/html',
456 ],
457 ]
458 );
459
460 $this->assertSame('http://1.1.1.1', $entry->getUrl());
461 $this->assertSame('this is my title', $entry->getTitle());
462 $this->assertContains('content', $entry->getContent());
463 $this->assertSame('text/html', $entry->getMimetype());
464 $this->assertSame('fr', $entry->getLanguage());
465 $this->assertSame(4.0, $entry->getReadingTime());
466 $this->assertSame('1.1.1.1', $entry->getDomainName());
467 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
468 }
469
470 public function testWithForcedContentAndBadDate()
471 {
472 $tagger = $this->getTaggerMock();
473 $tagger->expects($this->once())
474 ->method('tag');
475
476 $logger = new Logger('foo');
477 $handler = new TestHandler();
478 $logger->pushHandler($handler);
479
480 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
481 $entry = new Entry(new User());
482 $proxy->updateEntry(
483 $entry,
484 'http://1.1.1.1',
485 [
486 'html' => str_repeat('this is my content', 325),
487 'title' => 'this is my title',
488 'url' => 'http://1.1.1.1',
489 'language' => 'fr',
490 'date' => '01 02 2012',
491 'headers' => [
492 'content-type' => 'text/html',
493 ],
494 ]
495 );
496
497 $this->assertSame('http://1.1.1.1', $entry->getUrl());
498 $this->assertSame('this is my title', $entry->getTitle());
499 $this->assertContains('content', $entry->getContent());
500 $this->assertSame('text/html', $entry->getMimetype());
501 $this->assertSame('fr', $entry->getLanguage());
502 $this->assertSame(4.0, $entry->getReadingTime());
503 $this->assertSame('1.1.1.1', $entry->getDomainName());
504 $this->assertNull($entry->getPublishedAt());
505
506 $records = $handler->getRecords();
507
508 $this->assertCount(3, $records);
509 $this->assertContains('Error while defining date', $records[0]['message']);
510 }
511
512 public function testTaggerThrowException()
513 {
514 $tagger = $this->getTaggerMock();
515 $tagger->expects($this->once())
516 ->method('tag')
517 ->will($this->throwException(new \Exception()));
518
519 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
520 $entry = new Entry(new User());
521 $proxy->updateEntry(
522 $entry,
523 'http://1.1.1.1',
524 [
525 'html' => str_repeat('this is my content', 325),
526 'title' => 'this is my title',
527 'url' => 'http://1.1.1.1',
528 'language' => 'fr',
529 'headers' => [
530 'content-type' => 'text/html',
531 ],
532 ]
533 );
534
535 $this->assertCount(0, $entry->getTags());
536 }
537
538 public function dataForCrazyHtml()
539 {
540 return [
541 'script and comment' => [
542 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
543 'lol',
544 ],
545 'script' => [
546 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
547 'script',
548 ],
549 ];
550 }
551
552 /**
553 * @dataProvider dataForCrazyHtml
554 */
555 public function testWithCrazyHtmlContent($html, $escapedString)
556 {
557 $tagger = $this->getTaggerMock();
558 $tagger->expects($this->once())
559 ->method('tag');
560
561 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
562 $entry = new Entry(new User());
563 $proxy->updateEntry(
564 $entry,
565 'http://1.1.1.1',
566 [
567 'html' => $html,
568 'title' => 'this is my title',
569 'url' => 'http://1.1.1.1',
570 'language' => 'fr',
571 'status' => '200',
572 //'og_title' => 'my OG title',
573 'description' => 'OG desc',
574 'image' => 'http://3.3.3.3/cover.jpg',
575 'headers' => [
576 'content-type' => 'text/html',
577 ],
578 ]
579 );
580
581 $this->assertSame('http://1.1.1.1', $entry->getUrl());
582 $this->assertSame('this is my title', $entry->getTitle());
583 $this->assertNotContains($escapedString, $entry->getContent());
584 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
585 $this->assertSame('text/html', $entry->getMimetype());
586 $this->assertSame('fr', $entry->getLanguage());
587 $this->assertSame('200', $entry->getHttpStatus());
588 $this->assertSame('1.1.1.1', $entry->getDomainName());
589 }
590
591 public function testWithImageAsContent()
592 {
593 $tagger = $this->getTaggerMock();
594 $tagger->expects($this->once())
595 ->method('tag');
596
597 $graby = $this->getMockBuilder('Graby\Graby')
598 ->setMethods(['fetchContent'])
599 ->disableOriginalConstructor()
600 ->getMock();
601
602 $graby->expects($this->any())
603 ->method('fetchContent')
604 ->willReturn([
605 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
606 'title' => 'this is my title',
607 'url' => 'http://1.1.1.1/image.jpg',
608 'status' => '200',
609 'headers' => [
610 'content-type' => 'image/jpeg',
611 ],
612 ]);
613
614 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
615 $entry = new Entry(new User());
616 $proxy->updateEntry($entry, 'http://0.0.0.0');
617
618 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
619 $this->assertSame('this is my title', $entry->getTitle());
620 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
621 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
622 $this->assertSame('image/jpeg', $entry->getMimetype());
623 $this->assertSame('200', $entry->getHttpStatus());
624 $this->assertSame('1.1.1.1', $entry->getDomainName());
625 }
626
627 public function testWebsiteWithValidUTF8Title_doNothing()
628 {
629 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
630 // See http://graphemica.com for more info about the characters
631 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
632 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
633
634 $tagger = $this->getTaggerMock();
635 $tagger->expects($this->once())
636 ->method('tag');
637
638 $graby = $this->getMockBuilder('Graby\Graby')
639 ->setMethods(['fetchContent'])
640 ->disableOriginalConstructor()
641 ->getMock();
642
643 $graby->expects($this->any())
644 ->method('fetchContent')
645 ->willReturn([
646 'html' => false,
647 'title' => $actualTitle,
648 'url' => '',
649 'headers' => [
650 'content-type' => 'text/html',
651 ],
652 'language' => '',
653 ]);
654
655 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
656 $entry = new Entry(new User());
657 $proxy->updateEntry($entry, 'http://0.0.0.0');
658
659 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
660 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
661 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
662 }
663
664 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
665 {
666 // See http://graphemica.com for more info about the characters
667 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
668 // The correct UTF-8 € character (U+20AC) is E282AC
669 $actualTitle = $this->hexToStr('61' . '80' . '62');
670
671 $tagger = $this->getTaggerMock();
672 $tagger->expects($this->once())
673 ->method('tag');
674
675 $graby = $this->getMockBuilder('Graby\Graby')
676 ->setMethods(['fetchContent'])
677 ->disableOriginalConstructor()
678 ->getMock();
679
680 $graby->expects($this->any())
681 ->method('fetchContent')
682 ->willReturn([
683 'html' => false,
684 'title' => $actualTitle,
685 'url' => '',
686 'headers' => [
687 'content-type' => 'text/html',
688 ],
689 'language' => '',
690 ]);
691
692 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
693 $entry = new Entry(new User());
694 $proxy->updateEntry($entry, 'http://0.0.0.0');
695
696 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
697 $expectedTitle = '61' . '62';
698 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
699 }
700
701 public function testPdfWithUTF16BETitle_convertToUTF8()
702 {
703 // See http://graphemica.com for more info about the characters
704 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
705 $actualTitle = $this->hexToStr('D83DDE3B');
706
707 $tagger = $this->getTaggerMock();
708 $tagger->expects($this->once())
709 ->method('tag');
710
711 $graby = $this->getMockBuilder('Graby\Graby')
712 ->setMethods(['fetchContent'])
713 ->disableOriginalConstructor()
714 ->getMock();
715
716 $graby->expects($this->any())
717 ->method('fetchContent')
718 ->willReturn([
719 'html' => false,
720 'title' => $actualTitle,
721 'url' => '',
722 'headers' => [
723 'content-type' => 'application/pdf',
724 ],
725 'language' => '',
726 ]);
727
728 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
729 $entry = new Entry(new User());
730 $proxy->updateEntry($entry, 'http://0.0.0.0');
731
732 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
733 $expectedTitle = 'F09F98BB';
734 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
735 }
736
737 public function testPdfWithUTF8Title_doNothing()
738 {
739 // See http://graphemica.com for more info about the characters
740 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
741 $actualTitle = $this->hexToStr('F09F98BB');
742
743 $tagger = $this->getTaggerMock();
744 $tagger->expects($this->once())
745 ->method('tag');
746
747 $graby = $this->getMockBuilder('Graby\Graby')
748 ->setMethods(['fetchContent'])
749 ->disableOriginalConstructor()
750 ->getMock();
751
752 $graby->expects($this->any())
753 ->method('fetchContent')
754 ->willReturn([
755 'html' => false,
756 'title' => $actualTitle,
757 'url' => '',
758 'headers' => [
759 'content-type' => 'application/pdf',
760 ],
761 'language' => '',
762 ]);
763
764 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
765 $entry = new Entry(new User());
766 $proxy->updateEntry($entry, 'http://0.0.0.0');
767
768 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
769 $expectedTitle = 'F09F98BB';
770 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
771 }
772
773 public function testPdfWithWINDOWS1252Title_convertToUTF8()
774 {
775 // See http://graphemica.com for more info about the characters
776 // '€' (80) in hexadecimal and WINDOWS-1252
777 $actualTitle = $this->hexToStr('80');
778
779 $tagger = $this->getTaggerMock();
780 $tagger->expects($this->once())
781 ->method('tag');
782
783 $graby = $this->getMockBuilder('Graby\Graby')
784 ->setMethods(['fetchContent'])
785 ->disableOriginalConstructor()
786 ->getMock();
787
788 $graby->expects($this->any())
789 ->method('fetchContent')
790 ->willReturn([
791 'html' => false,
792 'title' => $actualTitle,
793 'url' => '',
794 'headers' => [
795 'content-type' => 'application/pdf',
796 ],
797 'language' => '',
798 ]);
799
800 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
801 $entry = new Entry(new User());
802 $proxy->updateEntry($entry, 'http://0.0.0.0');
803
804 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
805 $expectedTitle = 'E282AC';
806 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
807 }
808
809 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
810 {
811 // See http://graphemica.com for more info about the characters
812 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
813 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
814 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
815
816 $tagger = $this->getTaggerMock();
817 $tagger->expects($this->once())
818 ->method('tag');
819
820 $graby = $this->getMockBuilder('Graby\Graby')
821 ->setMethods(['fetchContent'])
822 ->disableOriginalConstructor()
823 ->getMock();
824
825 $graby->expects($this->any())
826 ->method('fetchContent')
827 ->willReturn([
828 'html' => false,
829 'title' => $actualTitle,
830 'url' => '',
831 'headers' => [
832 'content-type' => 'application/pdf',
833 ],
834 'language' => '',
835 ]);
836
837 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
838 $entry = new Entry(new User());
839 $proxy->updateEntry($entry, 'http://0.0.0.0');
840
841 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
842 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
843 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
844 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
845 }
846
847 /**
848 * Data provider for testWithChangedUrl.
849 *
850 * Arrays contain the following values:
851 * $entry_url
852 * $origin_url
853 * $content_url
854 * $expected_entry_url
855 * $expected_origin_url
856 * $expected_domain
857 */
858 public function dataForChangedUrl()
859 {
860 return [
861 'normal' => [
862 'http://0.0.0.0',
863 null,
864 'http://1.1.1.1',
865 'http://1.1.1.1',
866 'http://0.0.0.0',
867 '1.1.1.1',
868 ],
869 'origin already set' => [
870 'http://0.0.0.0',
871 'http://hello',
872 'http://1.1.1.1',
873 'http://1.1.1.1',
874 'http://hello',
875 '1.1.1.1',
876 ],
877 'trailing slash' => [
878 'https://example.com/hello-world',
879 null,
880 'https://example.com/hello-world/',
881 'https://example.com/hello-world/',
882 null,
883 'example.com',
884 ],
885 'query string in fetched content' => [
886 'https://example.org/hello',
887 null,
888 'https://example.org/hello?world=1',
889 'https://example.org/hello?world=1',
890 'https://example.org/hello',
891 'example.org',
892 ],
893 'fragment in fetched content' => [
894 'https://example.org/hello',
895 null,
896 'https://example.org/hello#world',
897 'https://example.org/hello',
898 null,
899 'example.org',
900 ],
901 'fragment and query string in fetched content' => [
902 'https://example.org/hello',
903 null,
904 'https://example.org/hello?foo#world',
905 'https://example.org/hello?foo#world',
906 'https://example.org/hello',
907 'example.org',
908 ],
909 'different path and query string in fetch content' => [
910 'https://example.org/hello',
911 null,
912 'https://example.org/world?foo',
913 'https://example.org/world?foo',
914 'https://example.org/hello',
915 'example.org',
916 ],
917 'feedproxy ignore list test' => [
918 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
919 null,
920 'https://example.org/hello-wallabag',
921 'https://example.org/hello-wallabag',
922 null,
923 'example.org',
924 ],
925 'feedproxy ignore list test with origin url already set' => [
926 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
927 'https://example.org/this-is-source',
928 'https://example.org/hello-wallabag',
929 'https://example.org/hello-wallabag',
930 'https://example.org/this-is-source',
931 'example.org',
932 ],
933 'lemonde ignore pattern test' => [
934 'http://www.lemonde.fr/tiny/url',
935 null,
936 'http://example.com/hello-world',
937 'http://example.com/hello-world',
938 null,
939 'example.com',
940 ],
941 ];
942 }
943
944 /**
945 * @dataProvider dataForChangedUrl
946 */
947 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
948 {
949 $tagger = $this->getTaggerMock();
950 $tagger->expects($this->once())
951 ->method('tag');
952
953 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
954 $entry = new Entry(new User());
955 $entry->setOriginUrl($origin_url);
956 $proxy->updateEntry(
957 $entry,
958 $entry_url,
959 [
960 'html' => false,
961 'title' => '',
962 'url' => $content_url,
963 'headers' => [
964 'content-type' => '',
965 ],
966 'language' => '',
967 ],
968 true
969 );
970
971 $this->assertSame($expected_entry_url, $entry->getUrl());
972 $this->assertSame($expected_domain, $entry->getDomainName());
973 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
974 }
975
976 /**
977 * https://stackoverflow.com/a/18506801.
978 *
979 * @param $string
980 *
981 * @return string
982 */
983 private function strToHex($string)
984 {
985 $hex = '';
986 for ($i = 0; $i < \strlen($string); ++$i) {
987 $ord = \ord($string[$i]);
988 $hexCode = dechex($ord);
989 $hex .= substr('0' . $hexCode, -2);
990 }
991
992 return strtoupper($hex);
993 }
994
995 /**
996 * Convert hex to string.
997 *
998 * @see https://stackoverflow.com/a/18506801
999 *
1000 * @param $hex
1001 *
1002 * @return string
1003 */
1004 private function hexToStr($hex)
1005 {
1006 $string = '';
1007 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
1008 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
1009 }
1010
1011 return $string;
1012 }
1013
1014 private function getTaggerMock()
1015 {
1016 return $this->getMockBuilder(RuleBasedTagger::class)
1017 ->setMethods(['tag'])
1018 ->disableOriginalConstructor()
1019 ->getMock();
1020 }
1021
1022 private function getLogger()
1023 {
1024 return new NullLogger();
1025 }
1026
1027 private function getValidator($withDefaultMock = true)
1028 {
1029 $mock = $this->getMockBuilder(RecursiveValidator::class)
1030 ->setMethods(['validate'])
1031 ->disableOriginalConstructor()
1032 ->getMock();
1033
1034 if ($withDefaultMock) {
1035 $mock->expects($this->any())
1036 ->method('validate')
1037 ->willReturn(new ConstraintViolationList());
1038 }
1039
1040 return $mock;
1041 }
1042 }