]> git.immae.eu Git - github/wallabag/wallabag.git/blame - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Fix tests after rebase
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
CommitLineData
558d9aab
JB
1<?php
2
a2c1b94e 3namespace Tests\Wallabag\CoreBundle\Helper;
558d9aab 4
f808b016 5use Graby\Graby;
d5c2cc54 6use Monolog\Handler\TestHandler;
f808b016 7use Monolog\Logger;
bd91bd5c 8use PHPUnit\Framework\TestCase;
f808b016
JB
9use Psr\Log\NullLogger;
10use Symfony\Component\Validator\ConstraintViolation;
11use Symfony\Component\Validator\ConstraintViolationList;
12use Symfony\Component\Validator\Validator\RecursiveValidator;
c2656f96 13use Wallabag\CoreBundle\Entity\Entry;
f808b016 14use Wallabag\CoreBundle\Helper\ContentProxy;
6bc6fb1f 15use Wallabag\CoreBundle\Helper\RuleBasedTagger;
f808b016 16use Wallabag\UserBundle\Entity\User;
558d9aab 17
bd91bd5c 18class ContentProxyTest extends TestCase
558d9aab 19{
ac1509a6 20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
a2c1b94e 21
4d0ec0e7
JB
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 29 ->setMethods(['fetchContent'])
4d0ec0e7
JB
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
4094ea47 35 ->willReturn([
4d0ec0e7
JB
36 'html' => false,
37 'title' => '',
38 'url' => '',
36b0d52e
JB
39 'headers' => [
40 'content-type' => '',
41 ],
4d0ec0e7 42 'language' => '',
4094ea47 43 ]);
4d0ec0e7 44
709e21a3 45 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
46 $entry = new Entry(new User());
47 $proxy->updateEntry($entry, 'http://user@:80');
4d0ec0e7 48
f808b016 49 $this->assertSame('http://user@:80', $entry->getUrl());
4d0ec0e7 50 $this->assertEmpty($entry->getTitle());
f808b016 51 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
4d0ec0e7
JB
52 $this->assertEmpty($entry->getPreviewPicture());
53 $this->assertEmpty($entry->getMimetype());
54 $this->assertEmpty($entry->getLanguage());
f808b016 55 $this->assertSame(0.0, $entry->getReadingTime());
709e21a3 56 $this->assertNull($entry->getDomainName());
4d0ec0e7
JB
57 }
58
558d9aab
JB
59 public function testWithEmptyContent()
60 {
f530f7f5
KG
61 $tagger = $this->getTaggerMock();
62 $tagger->expects($this->once())
63 ->method('tag');
64
558d9aab 65 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 66 ->setMethods(['fetchContent'])
558d9aab
JB
67 ->disableOriginalConstructor()
68 ->getMock();
69
70 $graby->expects($this->any())
71 ->method('fetchContent')
4094ea47 72 ->willReturn([
98f0929f
JB
73 'html' => false,
74 'title' => '',
75 'url' => '',
36b0d52e
JB
76 'headers' => [
77 'content-type' => '',
78 ],
98f0929f 79 'language' => '',
4094ea47 80 ]);
558d9aab 81
709e21a3 82 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
83 $entry = new Entry(new User());
84 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 85
f808b016 86 $this->assertSame('http://0.0.0.0', $entry->getUrl());
558d9aab 87 $this->assertEmpty($entry->getTitle());
f808b016 88 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
558d9aab
JB
89 $this->assertEmpty($entry->getPreviewPicture());
90 $this->assertEmpty($entry->getMimetype());
98f0929f 91 $this->assertEmpty($entry->getLanguage());
f808b016
JB
92 $this->assertSame(0.0, $entry->getReadingTime());
93 $this->assertSame('0.0.0.0', $entry->getDomainName());
558d9aab
JB
94 }
95
96 public function testWithEmptyContentButOG()
97 {
f530f7f5
KG
98 $tagger = $this->getTaggerMock();
99 $tagger->expects($this->once())
100 ->method('tag');
101
558d9aab 102 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 103 ->setMethods(['fetchContent'])
558d9aab
JB
104 ->disableOriginalConstructor()
105 ->getMock();
106
107 $graby->expects($this->any())
108 ->method('fetchContent')
4094ea47 109 ->willReturn([
98f0929f 110 'html' => false,
5f084262 111 'title' => 'my title',
98f0929f 112 'url' => '',
36b0d52e
JB
113 'headers' => [
114 'content-type' => '',
115 ],
98f0929f 116 'language' => '',
10b35097 117 'status' => '',
5f084262 118 'description' => 'desc',
4094ea47 119 ]);
558d9aab 120
709e21a3 121 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
122 $entry = new Entry(new User());
123 $proxy->updateEntry($entry, 'http://domain.io');
558d9aab 124
f808b016
JB
125 $this->assertSame('http://domain.io', $entry->getUrl());
126 $this->assertSame('my title', $entry->getTitle());
127 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
558d9aab 128 $this->assertEmpty($entry->getPreviewPicture());
98f0929f 129 $this->assertEmpty($entry->getLanguage());
10b35097 130 $this->assertEmpty($entry->getHttpStatus());
558d9aab 131 $this->assertEmpty($entry->getMimetype());
f808b016
JB
132 $this->assertSame(0.0, $entry->getReadingTime());
133 $this->assertSame('domain.io', $entry->getDomainName());
558d9aab
JB
134 }
135
136 public function testWithContent()
137 {
f530f7f5
KG
138 $tagger = $this->getTaggerMock();
139 $tagger->expects($this->once())
140 ->method('tag');
141
558d9aab 142 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 143 ->setMethods(['fetchContent'])
558d9aab
JB
144 ->disableOriginalConstructor()
145 ->getMock();
146
147 $graby->expects($this->any())
148 ->method('fetchContent')
4094ea47 149 ->willReturn([
da3d4998 150 'html' => str_repeat('this is my content', 325),
558d9aab
JB
151 'title' => 'this is my title',
152 'url' => 'http://1.1.1.1',
98f0929f 153 'language' => 'fr',
10b35097 154 'status' => '200',
5f084262 155 'description' => 'OG desc',
156 'image' => 'http://3.3.3.3/cover.jpg',
157 'headers' => [
158 'content-type' => 'text/html',
4094ea47
JB
159 ],
160 ]);
558d9aab 161
709e21a3 162 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
163 $entry = new Entry(new User());
164 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 165
f808b016
JB
166 $this->assertSame('http://1.1.1.1', $entry->getUrl());
167 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 168 $this->assertContains('content', $entry->getContent());
f808b016
JB
169 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
170 $this->assertSame('text/html', $entry->getMimetype());
171 $this->assertSame('fr', $entry->getLanguage());
172 $this->assertSame('200', $entry->getHttpStatus());
173 $this->assertSame(4.0, $entry->getReadingTime());
174 $this->assertSame('1.1.1.1', $entry->getDomainName());
558d9aab 175 }
f530f7f5 176
3d71d403
JB
177 public function testWithContentAndNoOgImage()
178 {
179 $tagger = $this->getTaggerMock();
180 $tagger->expects($this->once())
181 ->method('tag');
182
183 $graby = $this->getMockBuilder('Graby\Graby')
184 ->setMethods(['fetchContent'])
185 ->disableOriginalConstructor()
186 ->getMock();
187
188 $graby->expects($this->any())
189 ->method('fetchContent')
190 ->willReturn([
191 'html' => str_repeat('this is my content', 325),
192 'title' => 'this is my title',
193 'url' => 'http://1.1.1.1',
3d71d403
JB
194 'language' => 'fr',
195 'status' => '200',
5f084262 196 'description' => 'OG desc',
197 'image' => null,
198 'headers' => [
199 'content-type' => 'text/html',
3d71d403
JB
200 ],
201 ]);
202
709e21a3 203 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d5c2cc54
JB
204 $entry = new Entry(new User());
205 $proxy->updateEntry($entry, 'http://0.0.0.0');
3d71d403 206
f808b016
JB
207 $this->assertSame('http://1.1.1.1', $entry->getUrl());
208 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 209 $this->assertContains('content', $entry->getContent());
41d45c61 210 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
211 $this->assertSame('text/html', $entry->getMimetype());
212 $this->assertSame('fr', $entry->getLanguage());
213 $this->assertSame('200', $entry->getHttpStatus());
214 $this->assertSame(4.0, $entry->getReadingTime());
215 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
216 }
217
715fabf8 218 public function testWithContentAndContentImage()
219 {
220 $tagger = $this->getTaggerMock();
221 $tagger->expects($this->once())
222 ->method('tag');
223
224 $graby = $this->getMockBuilder('Graby\Graby')
225 ->setMethods(['fetchContent'])
226 ->disableOriginalConstructor()
227 ->getMock();
228
229 $graby->expects($this->any())
230 ->method('fetchContent')
231 ->willReturn([
232 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
233 'title' => 'this is my title',
234 'url' => 'http://1.1.1.1',
6e68417f
JB
235 'headers' => [
236 'content-type' => 'text/html',
237 ],
715fabf8 238 'language' => 'fr',
239 'status' => '200',
6e68417f 240 'image' => null,
715fabf8 241 ]);
242
243 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
244 $entry = new Entry(new User());
245 $proxy->updateEntry($entry, 'http://0.0.0.0');
246
247 $this->assertSame('http://1.1.1.1', $entry->getUrl());
248 $this->assertSame('this is my title', $entry->getTitle());
249 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
250 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
251 $this->assertSame('text/html', $entry->getMimetype());
252 $this->assertSame('fr', $entry->getLanguage());
253 $this->assertSame('200', $entry->getHttpStatus());
254 $this->assertSame(0.0, $entry->getReadingTime());
255 $this->assertSame('1.1.1.1', $entry->getDomainName());
256 }
257
258 public function testWithContentImageAndOgImage()
259 {
260 $tagger = $this->getTaggerMock();
261 $tagger->expects($this->once())
262 ->method('tag');
263
264 $graby = $this->getMockBuilder('Graby\Graby')
265 ->setMethods(['fetchContent'])
266 ->disableOriginalConstructor()
267 ->getMock();
268
269 $graby->expects($this->any())
270 ->method('fetchContent')
271 ->willReturn([
272 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
273 'title' => 'this is my title',
274 'url' => 'http://1.1.1.1',
6e68417f
JB
275 'headers' => [
276 'content-type' => 'text/html',
277 ],
715fabf8 278 'language' => 'fr',
279 'status' => '200',
6e68417f 280 'image' => 'http://3.3.3.3/cover.jpg',
715fabf8 281 ]);
282
283 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
284 $entry = new Entry(new User());
285 $proxy->updateEntry($entry, 'http://0.0.0.0');
286
287 $this->assertSame('http://1.1.1.1', $entry->getUrl());
288 $this->assertSame('this is my title', $entry->getTitle());
289 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
290 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
291 $this->assertSame('text/html', $entry->getMimetype());
292 $this->assertSame('fr', $entry->getLanguage());
293 $this->assertSame('200', $entry->getHttpStatus());
294 $this->assertSame(0.0, $entry->getReadingTime());
295 $this->assertSame('1.1.1.1', $entry->getDomainName());
296 }
297
0d349ea6
JB
298 public function testWithContentAndBadLanguage()
299 {
300 $tagger = $this->getTaggerMock();
301 $tagger->expects($this->once())
302 ->method('tag');
303
5661e8d4 304 $validator = $this->getValidator(false);
a05b6115 305 $validator->expects($this->once())
0d349ea6 306 ->method('validate')
a05b6115 307 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
0d349ea6
JB
308
309 $graby = $this->getMockBuilder('Graby\Graby')
310 ->setMethods(['fetchContent'])
311 ->disableOriginalConstructor()
312 ->getMock();
313
314 $graby->expects($this->any())
315 ->method('fetchContent')
316 ->willReturn([
317 'html' => str_repeat('this is my content', 325),
318 'title' => 'this is my title',
319 'url' => 'http://1.1.1.1',
0d349ea6
JB
320 'language' => 'dontexist',
321 'status' => '200',
5f084262 322 'headers' => [
323 'content-type' => 'text/html',
324 ],
0d349ea6
JB
325 ]);
326
709e21a3 327 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
328 $entry = new Entry(new User());
329 $proxy->updateEntry($entry, 'http://0.0.0.0');
330
f808b016
JB
331 $this->assertSame('http://1.1.1.1', $entry->getUrl());
332 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 333 $this->assertContains('content', $entry->getContent());
f808b016 334 $this->assertSame('text/html', $entry->getMimetype());
41d45c61 335 $this->assertNull($entry->getLanguage());
f808b016
JB
336 $this->assertSame('200', $entry->getHttpStatus());
337 $this->assertSame(4.0, $entry->getReadingTime());
338 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
339 }
340
341 public function testWithContentAndBadOgImage()
342 {
343 $tagger = $this->getTaggerMock();
344 $tagger->expects($this->once())
345 ->method('tag');
346
5661e8d4 347 $validator = $this->getValidator(false);
0d349ea6
JB
348 $validator->expects($this->exactly(2))
349 ->method('validate')
350 ->will($this->onConsecutiveCalls(
351 new ConstraintViolationList(),
352 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
353 ));
354
355 $graby = $this->getMockBuilder('Graby\Graby')
356 ->setMethods(['fetchContent'])
357 ->disableOriginalConstructor()
358 ->getMock();
359
360 $graby->expects($this->any())
361 ->method('fetchContent')
362 ->willReturn([
363 'html' => str_repeat('this is my content', 325),
364 'title' => 'this is my title',
365 'url' => 'http://1.1.1.1',
36b0d52e
JB
366 'headers' => [
367 'content-type' => 'text/html',
368 ],
0d349ea6
JB
369 'language' => 'fr',
370 'status' => '200',
5f084262 371 'description' => 'OG desc',
372 'image' => 'https://',
0d349ea6
JB
373 ]);
374
709e21a3 375 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
376 $entry = new Entry(new User());
377 $proxy->updateEntry($entry, 'http://0.0.0.0');
378
f808b016
JB
379 $this->assertSame('http://1.1.1.1', $entry->getUrl());
380 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 381 $this->assertContains('content', $entry->getContent());
41d45c61 382 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
383 $this->assertSame('text/html', $entry->getMimetype());
384 $this->assertSame('fr', $entry->getLanguage());
385 $this->assertSame('200', $entry->getHttpStatus());
386 $this->assertSame(4.0, $entry->getReadingTime());
387 $this->assertSame('1.1.1.1', $entry->getDomainName());
3d71d403
JB
388 }
389
4d0ec0e7
JB
390 public function testWithForcedContent()
391 {
392 $tagger = $this->getTaggerMock();
393 $tagger->expects($this->once())
394 ->method('tag');
395
709e21a3 396 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
7aba665e
JC
397 $entry = new Entry(new User());
398 $proxy->updateEntry(
399 $entry,
0d6cfb88
JB
400 'http://0.0.0.0',
401 [
402 'html' => str_repeat('this is my content', 325),
403 'title' => 'this is my title',
404 'url' => 'http://1.1.1.1',
0d6cfb88 405 'language' => 'fr',
f0378b4d
JB
406 'date' => '1395635872',
407 'authors' => ['Jeremy', 'Nico', 'Thomas'],
5f084262 408 'headers' => [
409 'cache-control' => 'no-cache',
410 'content-type' => 'text/html',
38a04dee 411 ],
0d6cfb88
JB
412 ]
413 );
4d0ec0e7 414
f808b016
JB
415 $this->assertSame('http://1.1.1.1', $entry->getUrl());
416 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 417 $this->assertContains('content', $entry->getContent());
f808b016
JB
418 $this->assertSame('text/html', $entry->getMimetype());
419 $this->assertSame('fr', $entry->getLanguage());
420 $this->assertSame(4.0, $entry->getReadingTime());
421 $this->assertSame('1.1.1.1', $entry->getDomainName());
422 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
423 $this->assertContains('Jeremy', $entry->getPublishedBy());
424 $this->assertContains('Nico', $entry->getPublishedBy());
425 $this->assertContains('Thomas', $entry->getPublishedBy());
709e21a3 426 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
f0378b4d
JB
427 $this->assertContains('no-cache', $entry->getHeaders());
428 }
429
430 public function testWithForcedContentAndDatetime()
431 {
432 $tagger = $this->getTaggerMock();
433 $tagger->expects($this->once())
434 ->method('tag');
435
d5c2cc54 436 $logHandler = new TestHandler();
6acadf8e 437 $logger = new Logger('test', [$logHandler]);
d5c2cc54 438
709e21a3 439 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e 440 $entry = new Entry(new User());
6acadf8e 441 $proxy->updateEntry(
7aba665e 442 $entry,
6acadf8e 443 'http://1.1.1.1',
f0378b4d
JB
444 [
445 'html' => str_repeat('this is my content', 325),
446 'title' => 'this is my title',
447 'url' => 'http://1.1.1.1',
f0378b4d
JB
448 'language' => 'fr',
449 'date' => '2016-09-08T11:55:58+0200',
5f084262 450 'headers' => [
451 'content-type' => 'text/html',
452 ],
f0378b4d
JB
453 ]
454 );
455
f808b016
JB
456 $this->assertSame('http://1.1.1.1', $entry->getUrl());
457 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 458 $this->assertContains('content', $entry->getContent());
f808b016
JB
459 $this->assertSame('text/html', $entry->getMimetype());
460 $this->assertSame('fr', $entry->getLanguage());
461 $this->assertSame(4.0, $entry->getReadingTime());
462 $this->assertSame('1.1.1.1', $entry->getDomainName());
463 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
464 }
465
466 public function testWithForcedContentAndBadDate()
467 {
468 $tagger = $this->getTaggerMock();
469 $tagger->expects($this->once())
470 ->method('tag');
471
472 $logger = new Logger('foo');
473 $handler = new TestHandler();
474 $logger->pushHandler($handler);
475
709e21a3 476 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e
JC
477 $entry = new Entry(new User());
478 $proxy->updateEntry(
479 $entry,
6acadf8e 480 'http://1.1.1.1',
f0378b4d
JB
481 [
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
f0378b4d
JB
485 'language' => 'fr',
486 'date' => '01 02 2012',
5f084262 487 'headers' => [
488 'content-type' => 'text/html',
489 ],
f0378b4d
JB
490 ]
491 );
492
f808b016
JB
493 $this->assertSame('http://1.1.1.1', $entry->getUrl());
494 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 495 $this->assertContains('content', $entry->getContent());
f808b016
JB
496 $this->assertSame('text/html', $entry->getMimetype());
497 $this->assertSame('fr', $entry->getLanguage());
498 $this->assertSame(4.0, $entry->getReadingTime());
499 $this->assertSame('1.1.1.1', $entry->getDomainName());
f0378b4d
JB
500 $this->assertNull($entry->getPublishedAt());
501
502 $records = $handler->getRecords();
503
d99e6423 504 $this->assertCount(3, $records);
f0378b4d 505 $this->assertContains('Error while defining date', $records[0]['message']);
4d0ec0e7
JB
506 }
507
508 public function testTaggerThrowException()
509 {
4d0ec0e7
JB
510 $tagger = $this->getTaggerMock();
511 $tagger->expects($this->once())
512 ->method('tag')
513 ->will($this->throwException(new \Exception()));
514
709e21a3 515 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e 516 $entry = new Entry(new User());
6acadf8e
JB
517 $proxy->updateEntry(
518 $entry,
519 'http://1.1.1.1',
520 [
521 'html' => str_repeat('this is my content', 325),
522 'title' => 'this is my title',
523 'url' => 'http://1.1.1.1',
6acadf8e 524 'language' => 'fr',
5f084262 525 'headers' => [
526 'content-type' => 'text/html',
527 ],
6acadf8e 528 ]
d0e9b3d6 529 );
4d0ec0e7
JB
530
531 $this->assertCount(0, $entry->getTags());
532 }
533
74a75f7d
JB
534 public function dataForCrazyHtml()
535 {
536 return [
537 'script and comment' => [
538 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
38a04dee 539 'lol',
74a75f7d
JB
540 ],
541 'script' => [
542 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
38a04dee 543 'script',
74a75f7d
JB
544 ],
545 ];
546 }
547
548 /**
549 * @dataProvider dataForCrazyHtml
550 */
551 public function testWithCrazyHtmlContent($html, $escapedString)
552 {
553 $tagger = $this->getTaggerMock();
554 $tagger->expects($this->once())
555 ->method('tag');
556
709e21a3 557 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
6acadf8e
JB
558 $entry = new Entry(new User());
559 $proxy->updateEntry(
560 $entry,
74a75f7d
JB
561 'http://1.1.1.1',
562 [
563 'html' => $html,
564 'title' => 'this is my title',
565 'url' => 'http://1.1.1.1',
74a75f7d
JB
566 'language' => 'fr',
567 'status' => '200',
5f084262 568 //'og_title' => 'my OG title',
569 'description' => 'OG desc',
570 'image' => 'http://3.3.3.3/cover.jpg',
571 'headers' => [
572 'content-type' => 'text/html',
74a75f7d
JB
573 ],
574 ]
575 );
576
f808b016
JB
577 $this->assertSame('http://1.1.1.1', $entry->getUrl());
578 $this->assertSame('this is my title', $entry->getTitle());
74a75f7d 579 $this->assertNotContains($escapedString, $entry->getContent());
f808b016
JB
580 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
581 $this->assertSame('text/html', $entry->getMimetype());
582 $this->assertSame('fr', $entry->getLanguage());
583 $this->assertSame('200', $entry->getHttpStatus());
584 $this->assertSame('1.1.1.1', $entry->getDomainName());
74a75f7d
JB
585 }
586
d0ec2ddd
JB
587 public function testWithImageAsContent()
588 {
589 $tagger = $this->getTaggerMock();
590 $tagger->expects($this->once())
591 ->method('tag');
592
593 $graby = $this->getMockBuilder('Graby\Graby')
594 ->setMethods(['fetchContent'])
595 ->disableOriginalConstructor()
596 ->getMock();
597
598 $graby->expects($this->any())
599 ->method('fetchContent')
600 ->willReturn([
601 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
602 'title' => 'this is my title',
603 'url' => 'http://1.1.1.1/image.jpg',
d0ec2ddd 604 'status' => '200',
5f084262 605 'headers' => [
606 'content-type' => 'image/jpeg',
607 ],
d0ec2ddd
JB
608 ]);
609
709e21a3 610 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d0ec2ddd
JB
611 $entry = new Entry(new User());
612 $proxy->updateEntry($entry, 'http://0.0.0.0');
613
c18a2476
JB
614 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
615 $this->assertSame('this is my title', $entry->getTitle());
d0ec2ddd
JB
616 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
617 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
c18a2476
JB
618 $this->assertSame('image/jpeg', $entry->getMimetype());
619 $this->assertSame('200', $entry->getHttpStatus());
620 $this->assertSame('1.1.1.1', $entry->getDomainName());
d0ec2ddd
JB
621 }
622
c01d9532
T
623 public function testWebsiteWithValidUTF8Title_doNothing()
624 {
625 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
626 // See http://graphemica.com for more info about the characters
627 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
628 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
629
630 $tagger = $this->getTaggerMock();
631 $tagger->expects($this->once())
632 ->method('tag');
633
634 $graby = $this->getMockBuilder('Graby\Graby')
635 ->setMethods(['fetchContent'])
636 ->disableOriginalConstructor()
637 ->getMock();
638
639 $graby->expects($this->any())
640 ->method('fetchContent')
641 ->willReturn([
642 'html' => false,
643 'title' => $actualTitle,
644 'url' => '',
36b0d52e
JB
645 'headers' => [
646 'content-type' => 'text/html',
647 ],
c01d9532
T
648 'language' => '',
649 ]);
650
651 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
652 $entry = new Entry(new User());
653 $proxy->updateEntry($entry, 'http://0.0.0.0');
654
655 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
656 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
657 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
658 }
659
660 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
661 {
662 // See http://graphemica.com for more info about the characters
663 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
664 // The correct UTF-8 € character (U+20AC) is E282AC
665 $actualTitle = $this->hexToStr('61' . '80' . '62');
666
667 $tagger = $this->getTaggerMock();
668 $tagger->expects($this->once())
669 ->method('tag');
670
671 $graby = $this->getMockBuilder('Graby\Graby')
672 ->setMethods(['fetchContent'])
673 ->disableOriginalConstructor()
674 ->getMock();
675
676 $graby->expects($this->any())
677 ->method('fetchContent')
678 ->willReturn([
679 'html' => false,
680 'title' => $actualTitle,
681 'url' => '',
36b0d52e
JB
682 'headers' => [
683 'content-type' => 'text/html',
684 ],
c01d9532
T
685 'language' => '',
686 ]);
687
688 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
689 $entry = new Entry(new User());
690 $proxy->updateEntry($entry, 'http://0.0.0.0');
691
692 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
693 $expectedTitle = '61' . '62';
694 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
695 }
696
697 public function testPdfWithUTF16BETitle_convertToUTF8()
698 {
699 // See http://graphemica.com for more info about the characters
700 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
701 $actualTitle = $this->hexToStr('D83DDE3B');
702
703 $tagger = $this->getTaggerMock();
704 $tagger->expects($this->once())
705 ->method('tag');
706
707 $graby = $this->getMockBuilder('Graby\Graby')
708 ->setMethods(['fetchContent'])
709 ->disableOriginalConstructor()
710 ->getMock();
711
712 $graby->expects($this->any())
713 ->method('fetchContent')
714 ->willReturn([
715 'html' => false,
716 'title' => $actualTitle,
717 'url' => '',
36b0d52e
JB
718 'headers' => [
719 'content-type' => 'application/pdf',
720 ],
c01d9532
T
721 'language' => '',
722 ]);
723
724 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
725 $entry = new Entry(new User());
726 $proxy->updateEntry($entry, 'http://0.0.0.0');
727
728 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
729 $expectedTitle = 'F09F98BB';
730 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
731 }
732
733 public function testPdfWithUTF8Title_doNothing()
734 {
735 // See http://graphemica.com for more info about the characters
736 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
737 $actualTitle = $this->hexToStr('F09F98BB');
738
739 $tagger = $this->getTaggerMock();
740 $tagger->expects($this->once())
741 ->method('tag');
742
743 $graby = $this->getMockBuilder('Graby\Graby')
744 ->setMethods(['fetchContent'])
745 ->disableOriginalConstructor()
746 ->getMock();
747
748 $graby->expects($this->any())
749 ->method('fetchContent')
750 ->willReturn([
751 'html' => false,
752 'title' => $actualTitle,
753 'url' => '',
36b0d52e
JB
754 'headers' => [
755 'content-type' => 'application/pdf',
756 ],
c01d9532
T
757 'language' => '',
758 ]);
759
760 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
761 $entry = new Entry(new User());
762 $proxy->updateEntry($entry, 'http://0.0.0.0');
763
764 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
765 $expectedTitle = 'F09F98BB';
766 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
767 }
768
769 public function testPdfWithWINDOWS1252Title_convertToUTF8()
770 {
771 // See http://graphemica.com for more info about the characters
772 // '€' (80) in hexadecimal and WINDOWS-1252
773 $actualTitle = $this->hexToStr('80');
774
775 $tagger = $this->getTaggerMock();
776 $tagger->expects($this->once())
777 ->method('tag');
778
779 $graby = $this->getMockBuilder('Graby\Graby')
780 ->setMethods(['fetchContent'])
781 ->disableOriginalConstructor()
782 ->getMock();
783
784 $graby->expects($this->any())
785 ->method('fetchContent')
786 ->willReturn([
787 'html' => false,
788 'title' => $actualTitle,
789 'url' => '',
36b0d52e
JB
790 'headers' => [
791 'content-type' => 'application/pdf',
792 ],
c01d9532
T
793 'language' => '',
794 ]);
795
796 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
797 $entry = new Entry(new User());
798 $proxy->updateEntry($entry, 'http://0.0.0.0');
799
800 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
801 $expectedTitle = 'E282AC';
802 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
803 }
804
805 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
806 {
807 // See http://graphemica.com for more info about the characters
808 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
809 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
810 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
811
812 $tagger = $this->getTaggerMock();
813 $tagger->expects($this->once())
814 ->method('tag');
815
816 $graby = $this->getMockBuilder('Graby\Graby')
817 ->setMethods(['fetchContent'])
818 ->disableOriginalConstructor()
819 ->getMock();
820
821 $graby->expects($this->any())
822 ->method('fetchContent')
823 ->willReturn([
824 'html' => false,
825 'title' => $actualTitle,
826 'url' => '',
36b0d52e
JB
827 'headers' => [
828 'content-type' => 'application/pdf',
829 ],
c01d9532
T
830 'language' => '',
831 ]);
832
833 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
834 $entry = new Entry(new User());
835 $proxy->updateEntry($entry, 'http://0.0.0.0');
836
837 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
838 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
839 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
840 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
841 }
842
e07fadea
KD
843 /**
844 * Data provider for testWithChangedUrl.
845 *
846 * Arrays contain the following values:
847 * $entry_url
848 * $origin_url
849 * $content_url
850 * $expected_entry_url
851 * $expected_origin_url
852 * $expected_domain
853 */
854 public function dataForChangedUrl()
855 {
856 return [
857 'normal' => [
858 'http://0.0.0.0',
859 null,
860 'http://1.1.1.1',
861 'http://1.1.1.1',
862 'http://0.0.0.0',
863 '1.1.1.1',
864 ],
865 'origin already set' => [
866 'http://0.0.0.0',
867 'http://hello',
868 'http://1.1.1.1',
869 'http://1.1.1.1',
870 'http://hello',
871 '1.1.1.1',
872 ],
873 'trailing slash' => [
874 'https://example.com/hello-world',
875 null,
876 'https://example.com/hello-world/',
877 'https://example.com/hello-world/',
878 null,
879 'example.com',
880 ],
e07fadea
KD
881 'query string in fetched content' => [
882 'https://example.org/hello',
883 null,
884 'https://example.org/hello?world=1',
60599679 885 'https://example.org/hello?world=1',
e07fadea 886 'https://example.org/hello',
e07fadea
KD
887 'example.org',
888 ],
889 'fragment in fetched content' => [
890 'https://example.org/hello',
891 null,
892 'https://example.org/hello#world',
893 'https://example.org/hello',
894 null,
895 'example.org',
896 ],
fc040c74
KD
897 'fragment and query string in fetched content' => [
898 'https://example.org/hello',
899 null,
900 'https://example.org/hello?foo#world',
60599679 901 'https://example.org/hello?foo#world',
fc040c74 902 'https://example.org/hello',
fc040c74 903 'example.org',
b49c87ac
KD
904 ],
905 'different path and query string in fetch content' => [
906 'https://example.org/hello',
907 null,
908 'https://example.org/world?foo',
909 'https://example.org/world?foo',
910 'https://example.org/hello',
911 'example.org',
912 ],
913 'feedproxy ignore list test' => [
914 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
915 null,
916 'https://example.org/hello-wallabag',
917 'https://example.org/hello-wallabag',
918 null,
919 'example.org',
920 ],
921 'feedproxy ignore list test with origin url already set' => [
922 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
923 'https://example.org/this-is-source',
924 'https://example.org/hello-wallabag',
925 'https://example.org/hello-wallabag',
926 'https://example.org/this-is-source',
927 'example.org',
928 ],
929 'lemonde ignore pattern test' => [
930 'http://www.lemonde.fr/tiny/url',
931 null,
932 'http://example.com/hello-world',
933 'http://example.com/hello-world',
934 null,
935 'example.com',
936 ],
e07fadea
KD
937 ];
938 }
939
940 /**
941 * @dataProvider dataForChangedUrl
942 */
943 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
944 {
945 $tagger = $this->getTaggerMock();
946 $tagger->expects($this->once())
947 ->method('tag');
948
949 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
950 $entry = new Entry(new User());
951 $entry->setOriginUrl($origin_url);
952 $proxy->updateEntry(
953 $entry,
954 $entry_url,
955 [
956 'html' => false,
957 'title' => '',
958 'url' => $content_url,
36b0d52e
JB
959 'headers' => [
960 'content-type' => '',
961 ],
e07fadea
KD
962 'language' => '',
963 ],
964 true
965 );
966
967 $this->assertSame($expected_entry_url, $entry->getUrl());
968 $this->assertSame($expected_domain, $entry->getDomainName());
969 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
970 }
971
c01d9532 972 /**
28cc645b
T
973 * https://stackoverflow.com/a/18506801.
974 *
c01d9532 975 * @param $string
28cc645b 976 *
c01d9532
T
977 * @return string
978 */
28cc645b
T
979 private function strToHex($string)
980 {
c01d9532 981 $hex = '';
28cc645b
T
982 for ($i = 0; $i < \strlen($string); ++$i) {
983 $ord = \ord($string[$i]);
c01d9532 984 $hexCode = dechex($ord);
28cc645b 985 $hex .= substr('0' . $hexCode, -2);
c01d9532 986 }
28cc645b
T
987
988 return strtoupper($hex);
c01d9532
T
989 }
990
991 /**
36b0d52e
JB
992 * Convert hex to string.
993 *
994 * @see https://stackoverflow.com/a/18506801
28cc645b 995 *
c01d9532 996 * @param $hex
28cc645b 997 *
c01d9532
T
998 * @return string
999 */
28cc645b
T
1000 private function hexToStr($hex)
1001 {
1002 $string = '';
1003 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
1004 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
c01d9532 1005 }
28cc645b 1006
c01d9532
T
1007 return $string;
1008 }
1009
f530f7f5
KG
1010 private function getTaggerMock()
1011 {
6bc6fb1f 1012 return $this->getMockBuilder(RuleBasedTagger::class)
4094ea47 1013 ->setMethods(['tag'])
f530f7f5
KG
1014 ->disableOriginalConstructor()
1015 ->getMock();
1016 }
1c9cd2a7 1017
0c5bcd82 1018 private function getLogger()
1c9cd2a7 1019 {
0c5bcd82 1020 return new NullLogger();
1c9cd2a7 1021 }
0d349ea6 1022
5661e8d4 1023 private function getValidator($withDefaultMock = true)
0d349ea6 1024 {
5661e8d4 1025 $mock = $this->getMockBuilder(RecursiveValidator::class)
0d349ea6
JB
1026 ->setMethods(['validate'])
1027 ->disableOriginalConstructor()
1028 ->getMock();
5661e8d4
JB
1029
1030 if ($withDefaultMock) {
1031 $mock->expects($this->any())
1032 ->method('validate')
1033 ->willReturn(new ConstraintViolationList());
1034 }
1035
1036 return $mock;
0d349ea6 1037 }
558d9aab 1038}