]> git.immae.eu Git - github/wallabag/wallabag.git/blame - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
CS & fix tests
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
CommitLineData
558d9aab
JB
1<?php
2
a2c1b94e 3namespace Tests\Wallabag\CoreBundle\Helper;
558d9aab 4
f808b016 5use Graby\Graby;
d5c2cc54 6use Monolog\Handler\TestHandler;
f808b016 7use Monolog\Logger;
bd91bd5c 8use PHPUnit\Framework\TestCase;
f808b016
JB
9use Psr\Log\NullLogger;
10use Symfony\Component\Validator\ConstraintViolation;
11use Symfony\Component\Validator\ConstraintViolationList;
12use Symfony\Component\Validator\Validator\RecursiveValidator;
c2656f96 13use Wallabag\CoreBundle\Entity\Entry;
f808b016 14use Wallabag\CoreBundle\Helper\ContentProxy;
6bc6fb1f 15use Wallabag\CoreBundle\Helper\RuleBasedTagger;
f808b016 16use Wallabag\UserBundle\Entity\User;
558d9aab 17
bd91bd5c 18class ContentProxyTest extends TestCase
558d9aab 19{
ac1509a6 20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
a2c1b94e 21
4d0ec0e7
JB
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 29 ->setMethods(['fetchContent'])
4d0ec0e7
JB
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
4094ea47 35 ->willReturn([
4d0ec0e7
JB
36 'html' => false,
37 'title' => '',
38 'url' => '',
36b0d52e
JB
39 'headers' => [
40 'content-type' => '',
41 ],
4d0ec0e7 42 'language' => '',
4094ea47 43 ]);
4d0ec0e7 44
709e21a3 45 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
46 $entry = new Entry(new User());
47 $proxy->updateEntry($entry, 'http://user@:80');
4d0ec0e7 48
f808b016 49 $this->assertSame('http://user@:80', $entry->getUrl());
4d0ec0e7 50 $this->assertEmpty($entry->getTitle());
f808b016 51 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
4d0ec0e7
JB
52 $this->assertEmpty($entry->getPreviewPicture());
53 $this->assertEmpty($entry->getMimetype());
54 $this->assertEmpty($entry->getLanguage());
f808b016 55 $this->assertSame(0.0, $entry->getReadingTime());
709e21a3 56 $this->assertNull($entry->getDomainName());
4d0ec0e7
JB
57 }
58
558d9aab
JB
59 public function testWithEmptyContent()
60 {
f530f7f5
KG
61 $tagger = $this->getTaggerMock();
62 $tagger->expects($this->once())
63 ->method('tag');
64
558d9aab 65 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 66 ->setMethods(['fetchContent'])
558d9aab
JB
67 ->disableOriginalConstructor()
68 ->getMock();
69
70 $graby->expects($this->any())
71 ->method('fetchContent')
4094ea47 72 ->willReturn([
98f0929f
JB
73 'html' => false,
74 'title' => '',
75 'url' => '',
36b0d52e
JB
76 'headers' => [
77 'content-type' => '',
78 ],
98f0929f 79 'language' => '',
4094ea47 80 ]);
558d9aab 81
709e21a3 82 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
83 $entry = new Entry(new User());
84 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 85
f808b016 86 $this->assertSame('http://0.0.0.0', $entry->getUrl());
558d9aab 87 $this->assertEmpty($entry->getTitle());
f808b016 88 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
558d9aab
JB
89 $this->assertEmpty($entry->getPreviewPicture());
90 $this->assertEmpty($entry->getMimetype());
98f0929f 91 $this->assertEmpty($entry->getLanguage());
f808b016
JB
92 $this->assertSame(0.0, $entry->getReadingTime());
93 $this->assertSame('0.0.0.0', $entry->getDomainName());
558d9aab
JB
94 }
95
96 public function testWithEmptyContentButOG()
97 {
f530f7f5
KG
98 $tagger = $this->getTaggerMock();
99 $tagger->expects($this->once())
100 ->method('tag');
101
558d9aab 102 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 103 ->setMethods(['fetchContent'])
558d9aab
JB
104 ->disableOriginalConstructor()
105 ->getMock();
106
107 $graby->expects($this->any())
108 ->method('fetchContent')
4094ea47 109 ->willReturn([
98f0929f 110 'html' => false,
5f084262 111 'title' => 'my title',
98f0929f 112 'url' => '',
36b0d52e
JB
113 'headers' => [
114 'content-type' => '',
115 ],
98f0929f 116 'language' => '',
10b35097 117 'status' => '',
5f084262 118 'description' => 'desc',
4094ea47 119 ]);
558d9aab 120
709e21a3 121 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
122 $entry = new Entry(new User());
123 $proxy->updateEntry($entry, 'http://domain.io');
558d9aab 124
f808b016
JB
125 $this->assertSame('http://domain.io', $entry->getUrl());
126 $this->assertSame('my title', $entry->getTitle());
127 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
558d9aab 128 $this->assertEmpty($entry->getPreviewPicture());
98f0929f 129 $this->assertEmpty($entry->getLanguage());
10b35097 130 $this->assertEmpty($entry->getHttpStatus());
558d9aab 131 $this->assertEmpty($entry->getMimetype());
f808b016
JB
132 $this->assertSame(0.0, $entry->getReadingTime());
133 $this->assertSame('domain.io', $entry->getDomainName());
558d9aab
JB
134 }
135
136 public function testWithContent()
137 {
f530f7f5
KG
138 $tagger = $this->getTaggerMock();
139 $tagger->expects($this->once())
140 ->method('tag');
141
558d9aab 142 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 143 ->setMethods(['fetchContent'])
558d9aab
JB
144 ->disableOriginalConstructor()
145 ->getMock();
146
147 $graby->expects($this->any())
148 ->method('fetchContent')
4094ea47 149 ->willReturn([
da3d4998 150 'html' => str_repeat('this is my content', 325),
558d9aab
JB
151 'title' => 'this is my title',
152 'url' => 'http://1.1.1.1',
98f0929f 153 'language' => 'fr',
10b35097 154 'status' => '200',
5f084262 155 'description' => 'OG desc',
156 'image' => 'http://3.3.3.3/cover.jpg',
157 'headers' => [
158 'content-type' => 'text/html',
4094ea47
JB
159 ],
160 ]);
558d9aab 161
709e21a3 162 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
163 $entry = new Entry(new User());
164 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 165
f808b016
JB
166 $this->assertSame('http://1.1.1.1', $entry->getUrl());
167 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 168 $this->assertContains('content', $entry->getContent());
f808b016
JB
169 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
170 $this->assertSame('text/html', $entry->getMimetype());
171 $this->assertSame('fr', $entry->getLanguage());
172 $this->assertSame('200', $entry->getHttpStatus());
173 $this->assertSame(4.0, $entry->getReadingTime());
174 $this->assertSame('1.1.1.1', $entry->getDomainName());
558d9aab 175 }
f530f7f5 176
3d71d403
JB
177 public function testWithContentAndNoOgImage()
178 {
179 $tagger = $this->getTaggerMock();
180 $tagger->expects($this->once())
181 ->method('tag');
182
183 $graby = $this->getMockBuilder('Graby\Graby')
184 ->setMethods(['fetchContent'])
185 ->disableOriginalConstructor()
186 ->getMock();
187
188 $graby->expects($this->any())
189 ->method('fetchContent')
190 ->willReturn([
191 'html' => str_repeat('this is my content', 325),
192 'title' => 'this is my title',
193 'url' => 'http://1.1.1.1',
3d71d403
JB
194 'language' => 'fr',
195 'status' => '200',
5f084262 196 'description' => 'OG desc',
197 'image' => null,
198 'headers' => [
199 'content-type' => 'text/html',
3d71d403
JB
200 ],
201 ]);
202
709e21a3 203 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d5c2cc54
JB
204 $entry = new Entry(new User());
205 $proxy->updateEntry($entry, 'http://0.0.0.0');
3d71d403 206
f808b016
JB
207 $this->assertSame('http://1.1.1.1', $entry->getUrl());
208 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 209 $this->assertContains('content', $entry->getContent());
41d45c61 210 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
211 $this->assertSame('text/html', $entry->getMimetype());
212 $this->assertSame('fr', $entry->getLanguage());
213 $this->assertSame('200', $entry->getHttpStatus());
214 $this->assertSame(4.0, $entry->getReadingTime());
215 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
216 }
217
715fabf8 218 public function testWithContentAndContentImage()
219 {
220 $tagger = $this->getTaggerMock();
221 $tagger->expects($this->once())
222 ->method('tag');
223
224 $graby = $this->getMockBuilder('Graby\Graby')
225 ->setMethods(['fetchContent'])
226 ->disableOriginalConstructor()
227 ->getMock();
228
229 $graby->expects($this->any())
230 ->method('fetchContent')
231 ->willReturn([
232 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
233 'title' => 'this is my title',
234 'url' => 'http://1.1.1.1',
235 'content_type' => 'text/html',
236 'language' => 'fr',
237 'status' => '200',
238 'open_graph' => [
239 'og_title' => 'my OG title',
240 'og_description' => 'OG desc',
241 'og_image' => null,
242 ],
243 ]);
244
245 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
246 $entry = new Entry(new User());
247 $proxy->updateEntry($entry, 'http://0.0.0.0');
248
249 $this->assertSame('http://1.1.1.1', $entry->getUrl());
250 $this->assertSame('this is my title', $entry->getTitle());
251 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
252 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
253 $this->assertSame('text/html', $entry->getMimetype());
254 $this->assertSame('fr', $entry->getLanguage());
255 $this->assertSame('200', $entry->getHttpStatus());
256 $this->assertSame(0.0, $entry->getReadingTime());
257 $this->assertSame('1.1.1.1', $entry->getDomainName());
258 }
259
260 public function testWithContentImageAndOgImage()
261 {
262 $tagger = $this->getTaggerMock();
263 $tagger->expects($this->once())
264 ->method('tag');
265
266 $graby = $this->getMockBuilder('Graby\Graby')
267 ->setMethods(['fetchContent'])
268 ->disableOriginalConstructor()
269 ->getMock();
270
271 $graby->expects($this->any())
272 ->method('fetchContent')
273 ->willReturn([
274 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
275 'title' => 'this is my title',
276 'url' => 'http://1.1.1.1',
277 'content_type' => 'text/html',
278 'language' => 'fr',
279 'status' => '200',
280 'open_graph' => [
281 'og_title' => 'my OG title',
282 'og_description' => 'OG desc',
283 'og_image' => 'http://3.3.3.3/cover.jpg',
284 ],
285 ]);
286
287 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
288 $entry = new Entry(new User());
289 $proxy->updateEntry($entry, 'http://0.0.0.0');
290
291 $this->assertSame('http://1.1.1.1', $entry->getUrl());
292 $this->assertSame('this is my title', $entry->getTitle());
293 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
294 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
295 $this->assertSame('text/html', $entry->getMimetype());
296 $this->assertSame('fr', $entry->getLanguage());
297 $this->assertSame('200', $entry->getHttpStatus());
298 $this->assertSame(0.0, $entry->getReadingTime());
299 $this->assertSame('1.1.1.1', $entry->getDomainName());
300 }
301
0d349ea6
JB
302 public function testWithContentAndBadLanguage()
303 {
304 $tagger = $this->getTaggerMock();
305 $tagger->expects($this->once())
306 ->method('tag');
307
5661e8d4 308 $validator = $this->getValidator(false);
a05b6115 309 $validator->expects($this->once())
0d349ea6 310 ->method('validate')
a05b6115 311 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
0d349ea6
JB
312
313 $graby = $this->getMockBuilder('Graby\Graby')
314 ->setMethods(['fetchContent'])
315 ->disableOriginalConstructor()
316 ->getMock();
317
318 $graby->expects($this->any())
319 ->method('fetchContent')
320 ->willReturn([
321 'html' => str_repeat('this is my content', 325),
322 'title' => 'this is my title',
323 'url' => 'http://1.1.1.1',
0d349ea6
JB
324 'language' => 'dontexist',
325 'status' => '200',
5f084262 326 'headers' => [
327 'content-type' => 'text/html',
328 ],
0d349ea6
JB
329 ]);
330
709e21a3 331 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
332 $entry = new Entry(new User());
333 $proxy->updateEntry($entry, 'http://0.0.0.0');
334
f808b016
JB
335 $this->assertSame('http://1.1.1.1', $entry->getUrl());
336 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 337 $this->assertContains('content', $entry->getContent());
f808b016 338 $this->assertSame('text/html', $entry->getMimetype());
41d45c61 339 $this->assertNull($entry->getLanguage());
f808b016
JB
340 $this->assertSame('200', $entry->getHttpStatus());
341 $this->assertSame(4.0, $entry->getReadingTime());
342 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
343 }
344
345 public function testWithContentAndBadOgImage()
346 {
347 $tagger = $this->getTaggerMock();
348 $tagger->expects($this->once())
349 ->method('tag');
350
5661e8d4 351 $validator = $this->getValidator(false);
0d349ea6
JB
352 $validator->expects($this->exactly(2))
353 ->method('validate')
354 ->will($this->onConsecutiveCalls(
355 new ConstraintViolationList(),
356 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
357 ));
358
359 $graby = $this->getMockBuilder('Graby\Graby')
360 ->setMethods(['fetchContent'])
361 ->disableOriginalConstructor()
362 ->getMock();
363
364 $graby->expects($this->any())
365 ->method('fetchContent')
366 ->willReturn([
367 'html' => str_repeat('this is my content', 325),
368 'title' => 'this is my title',
369 'url' => 'http://1.1.1.1',
36b0d52e
JB
370 'headers' => [
371 'content-type' => 'text/html',
372 ],
0d349ea6
JB
373 'language' => 'fr',
374 'status' => '200',
5f084262 375 'description' => 'OG desc',
376 'image' => 'https://',
0d349ea6
JB
377 ]);
378
709e21a3 379 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
380 $entry = new Entry(new User());
381 $proxy->updateEntry($entry, 'http://0.0.0.0');
382
f808b016
JB
383 $this->assertSame('http://1.1.1.1', $entry->getUrl());
384 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 385 $this->assertContains('content', $entry->getContent());
41d45c61 386 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
387 $this->assertSame('text/html', $entry->getMimetype());
388 $this->assertSame('fr', $entry->getLanguage());
389 $this->assertSame('200', $entry->getHttpStatus());
390 $this->assertSame(4.0, $entry->getReadingTime());
391 $this->assertSame('1.1.1.1', $entry->getDomainName());
3d71d403
JB
392 }
393
4d0ec0e7
JB
394 public function testWithForcedContent()
395 {
396 $tagger = $this->getTaggerMock();
397 $tagger->expects($this->once())
398 ->method('tag');
399
709e21a3 400 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
7aba665e
JC
401 $entry = new Entry(new User());
402 $proxy->updateEntry(
403 $entry,
0d6cfb88
JB
404 'http://0.0.0.0',
405 [
406 'html' => str_repeat('this is my content', 325),
407 'title' => 'this is my title',
408 'url' => 'http://1.1.1.1',
0d6cfb88 409 'language' => 'fr',
f0378b4d
JB
410 'date' => '1395635872',
411 'authors' => ['Jeremy', 'Nico', 'Thomas'],
5f084262 412 'headers' => [
413 'cache-control' => 'no-cache',
414 'content-type' => 'text/html',
38a04dee 415 ],
0d6cfb88
JB
416 ]
417 );
4d0ec0e7 418
f808b016
JB
419 $this->assertSame('http://1.1.1.1', $entry->getUrl());
420 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 421 $this->assertContains('content', $entry->getContent());
f808b016
JB
422 $this->assertSame('text/html', $entry->getMimetype());
423 $this->assertSame('fr', $entry->getLanguage());
424 $this->assertSame(4.0, $entry->getReadingTime());
425 $this->assertSame('1.1.1.1', $entry->getDomainName());
426 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
427 $this->assertContains('Jeremy', $entry->getPublishedBy());
428 $this->assertContains('Nico', $entry->getPublishedBy());
429 $this->assertContains('Thomas', $entry->getPublishedBy());
709e21a3 430 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
f0378b4d
JB
431 $this->assertContains('no-cache', $entry->getHeaders());
432 }
433
434 public function testWithForcedContentAndDatetime()
435 {
436 $tagger = $this->getTaggerMock();
437 $tagger->expects($this->once())
438 ->method('tag');
439
d5c2cc54 440 $logHandler = new TestHandler();
6acadf8e 441 $logger = new Logger('test', [$logHandler]);
d5c2cc54 442
709e21a3 443 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e 444 $entry = new Entry(new User());
6acadf8e 445 $proxy->updateEntry(
7aba665e 446 $entry,
6acadf8e 447 'http://1.1.1.1',
f0378b4d
JB
448 [
449 'html' => str_repeat('this is my content', 325),
450 'title' => 'this is my title',
451 'url' => 'http://1.1.1.1',
f0378b4d
JB
452 'language' => 'fr',
453 'date' => '2016-09-08T11:55:58+0200',
5f084262 454 'headers' => [
455 'content-type' => 'text/html',
456 ],
f0378b4d
JB
457 ]
458 );
459
f808b016
JB
460 $this->assertSame('http://1.1.1.1', $entry->getUrl());
461 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 462 $this->assertContains('content', $entry->getContent());
f808b016
JB
463 $this->assertSame('text/html', $entry->getMimetype());
464 $this->assertSame('fr', $entry->getLanguage());
465 $this->assertSame(4.0, $entry->getReadingTime());
466 $this->assertSame('1.1.1.1', $entry->getDomainName());
467 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
468 }
469
470 public function testWithForcedContentAndBadDate()
471 {
472 $tagger = $this->getTaggerMock();
473 $tagger->expects($this->once())
474 ->method('tag');
475
476 $logger = new Logger('foo');
477 $handler = new TestHandler();
478 $logger->pushHandler($handler);
479
709e21a3 480 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e
JC
481 $entry = new Entry(new User());
482 $proxy->updateEntry(
483 $entry,
6acadf8e 484 'http://1.1.1.1',
f0378b4d
JB
485 [
486 'html' => str_repeat('this is my content', 325),
487 'title' => 'this is my title',
488 'url' => 'http://1.1.1.1',
f0378b4d
JB
489 'language' => 'fr',
490 'date' => '01 02 2012',
5f084262 491 'headers' => [
492 'content-type' => 'text/html',
493 ],
f0378b4d
JB
494 ]
495 );
496
f808b016
JB
497 $this->assertSame('http://1.1.1.1', $entry->getUrl());
498 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 499 $this->assertContains('content', $entry->getContent());
f808b016
JB
500 $this->assertSame('text/html', $entry->getMimetype());
501 $this->assertSame('fr', $entry->getLanguage());
502 $this->assertSame(4.0, $entry->getReadingTime());
503 $this->assertSame('1.1.1.1', $entry->getDomainName());
f0378b4d
JB
504 $this->assertNull($entry->getPublishedAt());
505
506 $records = $handler->getRecords();
507
d99e6423 508 $this->assertCount(3, $records);
f0378b4d 509 $this->assertContains('Error while defining date', $records[0]['message']);
4d0ec0e7
JB
510 }
511
512 public function testTaggerThrowException()
513 {
4d0ec0e7
JB
514 $tagger = $this->getTaggerMock();
515 $tagger->expects($this->once())
516 ->method('tag')
517 ->will($this->throwException(new \Exception()));
518
709e21a3 519 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e 520 $entry = new Entry(new User());
6acadf8e
JB
521 $proxy->updateEntry(
522 $entry,
523 'http://1.1.1.1',
524 [
525 'html' => str_repeat('this is my content', 325),
526 'title' => 'this is my title',
527 'url' => 'http://1.1.1.1',
6acadf8e 528 'language' => 'fr',
5f084262 529 'headers' => [
530 'content-type' => 'text/html',
531 ],
6acadf8e 532 ]
d0e9b3d6 533 );
4d0ec0e7
JB
534
535 $this->assertCount(0, $entry->getTags());
536 }
537
74a75f7d
JB
538 public function dataForCrazyHtml()
539 {
540 return [
541 'script and comment' => [
542 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
38a04dee 543 'lol',
74a75f7d
JB
544 ],
545 'script' => [
546 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
38a04dee 547 'script',
74a75f7d
JB
548 ],
549 ];
550 }
551
552 /**
553 * @dataProvider dataForCrazyHtml
554 */
555 public function testWithCrazyHtmlContent($html, $escapedString)
556 {
557 $tagger = $this->getTaggerMock();
558 $tagger->expects($this->once())
559 ->method('tag');
560
709e21a3 561 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
6acadf8e
JB
562 $entry = new Entry(new User());
563 $proxy->updateEntry(
564 $entry,
74a75f7d
JB
565 'http://1.1.1.1',
566 [
567 'html' => $html,
568 'title' => 'this is my title',
569 'url' => 'http://1.1.1.1',
74a75f7d
JB
570 'language' => 'fr',
571 'status' => '200',
5f084262 572 //'og_title' => 'my OG title',
573 'description' => 'OG desc',
574 'image' => 'http://3.3.3.3/cover.jpg',
575 'headers' => [
576 'content-type' => 'text/html',
74a75f7d
JB
577 ],
578 ]
579 );
580
f808b016
JB
581 $this->assertSame('http://1.1.1.1', $entry->getUrl());
582 $this->assertSame('this is my title', $entry->getTitle());
74a75f7d 583 $this->assertNotContains($escapedString, $entry->getContent());
f808b016
JB
584 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
585 $this->assertSame('text/html', $entry->getMimetype());
586 $this->assertSame('fr', $entry->getLanguage());
587 $this->assertSame('200', $entry->getHttpStatus());
588 $this->assertSame('1.1.1.1', $entry->getDomainName());
74a75f7d
JB
589 }
590
d0ec2ddd
JB
591 public function testWithImageAsContent()
592 {
593 $tagger = $this->getTaggerMock();
594 $tagger->expects($this->once())
595 ->method('tag');
596
597 $graby = $this->getMockBuilder('Graby\Graby')
598 ->setMethods(['fetchContent'])
599 ->disableOriginalConstructor()
600 ->getMock();
601
602 $graby->expects($this->any())
603 ->method('fetchContent')
604 ->willReturn([
605 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
606 'title' => 'this is my title',
607 'url' => 'http://1.1.1.1/image.jpg',
d0ec2ddd 608 'status' => '200',
5f084262 609 'headers' => [
610 'content-type' => 'image/jpeg',
611 ],
d0ec2ddd
JB
612 ]);
613
709e21a3 614 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d0ec2ddd
JB
615 $entry = new Entry(new User());
616 $proxy->updateEntry($entry, 'http://0.0.0.0');
617
c18a2476
JB
618 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
619 $this->assertSame('this is my title', $entry->getTitle());
d0ec2ddd
JB
620 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
621 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
c18a2476
JB
622 $this->assertSame('image/jpeg', $entry->getMimetype());
623 $this->assertSame('200', $entry->getHttpStatus());
624 $this->assertSame('1.1.1.1', $entry->getDomainName());
d0ec2ddd
JB
625 }
626
c01d9532
T
627 public function testWebsiteWithValidUTF8Title_doNothing()
628 {
629 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
630 // See http://graphemica.com for more info about the characters
631 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
632 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
633
634 $tagger = $this->getTaggerMock();
635 $tagger->expects($this->once())
636 ->method('tag');
637
638 $graby = $this->getMockBuilder('Graby\Graby')
639 ->setMethods(['fetchContent'])
640 ->disableOriginalConstructor()
641 ->getMock();
642
643 $graby->expects($this->any())
644 ->method('fetchContent')
645 ->willReturn([
646 'html' => false,
647 'title' => $actualTitle,
648 'url' => '',
36b0d52e
JB
649 'headers' => [
650 'content-type' => 'text/html',
651 ],
c01d9532
T
652 'language' => '',
653 ]);
654
655 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
656 $entry = new Entry(new User());
657 $proxy->updateEntry($entry, 'http://0.0.0.0');
658
659 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
660 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
661 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
662 }
663
664 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
665 {
666 // See http://graphemica.com for more info about the characters
667 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
668 // The correct UTF-8 € character (U+20AC) is E282AC
669 $actualTitle = $this->hexToStr('61' . '80' . '62');
670
671 $tagger = $this->getTaggerMock();
672 $tagger->expects($this->once())
673 ->method('tag');
674
675 $graby = $this->getMockBuilder('Graby\Graby')
676 ->setMethods(['fetchContent'])
677 ->disableOriginalConstructor()
678 ->getMock();
679
680 $graby->expects($this->any())
681 ->method('fetchContent')
682 ->willReturn([
683 'html' => false,
684 'title' => $actualTitle,
685 'url' => '',
36b0d52e
JB
686 'headers' => [
687 'content-type' => 'text/html',
688 ],
c01d9532
T
689 'language' => '',
690 ]);
691
692 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
693 $entry = new Entry(new User());
694 $proxy->updateEntry($entry, 'http://0.0.0.0');
695
696 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
697 $expectedTitle = '61' . '62';
698 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
699 }
700
701 public function testPdfWithUTF16BETitle_convertToUTF8()
702 {
703 // See http://graphemica.com for more info about the characters
704 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
705 $actualTitle = $this->hexToStr('D83DDE3B');
706
707 $tagger = $this->getTaggerMock();
708 $tagger->expects($this->once())
709 ->method('tag');
710
711 $graby = $this->getMockBuilder('Graby\Graby')
712 ->setMethods(['fetchContent'])
713 ->disableOriginalConstructor()
714 ->getMock();
715
716 $graby->expects($this->any())
717 ->method('fetchContent')
718 ->willReturn([
719 'html' => false,
720 'title' => $actualTitle,
721 'url' => '',
36b0d52e
JB
722 'headers' => [
723 'content-type' => 'application/pdf',
724 ],
c01d9532
T
725 'language' => '',
726 ]);
727
728 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
729 $entry = new Entry(new User());
730 $proxy->updateEntry($entry, 'http://0.0.0.0');
731
732 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
733 $expectedTitle = 'F09F98BB';
734 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
735 }
736
737 public function testPdfWithUTF8Title_doNothing()
738 {
739 // See http://graphemica.com for more info about the characters
740 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
741 $actualTitle = $this->hexToStr('F09F98BB');
742
743 $tagger = $this->getTaggerMock();
744 $tagger->expects($this->once())
745 ->method('tag');
746
747 $graby = $this->getMockBuilder('Graby\Graby')
748 ->setMethods(['fetchContent'])
749 ->disableOriginalConstructor()
750 ->getMock();
751
752 $graby->expects($this->any())
753 ->method('fetchContent')
754 ->willReturn([
755 'html' => false,
756 'title' => $actualTitle,
757 'url' => '',
36b0d52e
JB
758 'headers' => [
759 'content-type' => 'application/pdf',
760 ],
c01d9532
T
761 'language' => '',
762 ]);
763
764 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
765 $entry = new Entry(new User());
766 $proxy->updateEntry($entry, 'http://0.0.0.0');
767
768 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
769 $expectedTitle = 'F09F98BB';
770 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
771 }
772
773 public function testPdfWithWINDOWS1252Title_convertToUTF8()
774 {
775 // See http://graphemica.com for more info about the characters
776 // '€' (80) in hexadecimal and WINDOWS-1252
777 $actualTitle = $this->hexToStr('80');
778
779 $tagger = $this->getTaggerMock();
780 $tagger->expects($this->once())
781 ->method('tag');
782
783 $graby = $this->getMockBuilder('Graby\Graby')
784 ->setMethods(['fetchContent'])
785 ->disableOriginalConstructor()
786 ->getMock();
787
788 $graby->expects($this->any())
789 ->method('fetchContent')
790 ->willReturn([
791 'html' => false,
792 'title' => $actualTitle,
793 'url' => '',
36b0d52e
JB
794 'headers' => [
795 'content-type' => 'application/pdf',
796 ],
c01d9532
T
797 'language' => '',
798 ]);
799
800 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
801 $entry = new Entry(new User());
802 $proxy->updateEntry($entry, 'http://0.0.0.0');
803
804 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
805 $expectedTitle = 'E282AC';
806 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
807 }
808
809 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
810 {
811 // See http://graphemica.com for more info about the characters
812 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
813 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
814 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
815
816 $tagger = $this->getTaggerMock();
817 $tagger->expects($this->once())
818 ->method('tag');
819
820 $graby = $this->getMockBuilder('Graby\Graby')
821 ->setMethods(['fetchContent'])
822 ->disableOriginalConstructor()
823 ->getMock();
824
825 $graby->expects($this->any())
826 ->method('fetchContent')
827 ->willReturn([
828 'html' => false,
829 'title' => $actualTitle,
830 'url' => '',
36b0d52e
JB
831 'headers' => [
832 'content-type' => 'application/pdf',
833 ],
c01d9532
T
834 'language' => '',
835 ]);
836
837 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
838 $entry = new Entry(new User());
839 $proxy->updateEntry($entry, 'http://0.0.0.0');
840
841 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
842 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
843 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
844 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
845 }
846
e07fadea
KD
847 /**
848 * Data provider for testWithChangedUrl.
849 *
850 * Arrays contain the following values:
851 * $entry_url
852 * $origin_url
853 * $content_url
854 * $expected_entry_url
855 * $expected_origin_url
856 * $expected_domain
857 */
858 public function dataForChangedUrl()
859 {
860 return [
861 'normal' => [
862 'http://0.0.0.0',
863 null,
864 'http://1.1.1.1',
865 'http://1.1.1.1',
866 'http://0.0.0.0',
867 '1.1.1.1',
868 ],
869 'origin already set' => [
870 'http://0.0.0.0',
871 'http://hello',
872 'http://1.1.1.1',
873 'http://1.1.1.1',
874 'http://hello',
875 '1.1.1.1',
876 ],
877 'trailing slash' => [
878 'https://example.com/hello-world',
879 null,
880 'https://example.com/hello-world/',
881 'https://example.com/hello-world/',
882 null,
883 'example.com',
884 ],
e07fadea
KD
885 'query string in fetched content' => [
886 'https://example.org/hello',
887 null,
888 'https://example.org/hello?world=1',
60599679 889 'https://example.org/hello?world=1',
e07fadea 890 'https://example.org/hello',
e07fadea
KD
891 'example.org',
892 ],
893 'fragment in fetched content' => [
894 'https://example.org/hello',
895 null,
896 'https://example.org/hello#world',
897 'https://example.org/hello',
898 null,
899 'example.org',
900 ],
fc040c74
KD
901 'fragment and query string in fetched content' => [
902 'https://example.org/hello',
903 null,
904 'https://example.org/hello?foo#world',
60599679 905 'https://example.org/hello?foo#world',
fc040c74 906 'https://example.org/hello',
fc040c74 907 'example.org',
b49c87ac
KD
908 ],
909 'different path and query string in fetch content' => [
910 'https://example.org/hello',
911 null,
912 'https://example.org/world?foo',
913 'https://example.org/world?foo',
914 'https://example.org/hello',
915 'example.org',
916 ],
917 'feedproxy ignore list test' => [
918 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
919 null,
920 'https://example.org/hello-wallabag',
921 'https://example.org/hello-wallabag',
922 null,
923 'example.org',
924 ],
925 'feedproxy ignore list test with origin url already set' => [
926 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
927 'https://example.org/this-is-source',
928 'https://example.org/hello-wallabag',
929 'https://example.org/hello-wallabag',
930 'https://example.org/this-is-source',
931 'example.org',
932 ],
933 'lemonde ignore pattern test' => [
934 'http://www.lemonde.fr/tiny/url',
935 null,
936 'http://example.com/hello-world',
937 'http://example.com/hello-world',
938 null,
939 'example.com',
940 ],
e07fadea
KD
941 ];
942 }
943
944 /**
945 * @dataProvider dataForChangedUrl
946 */
947 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
948 {
949 $tagger = $this->getTaggerMock();
950 $tagger->expects($this->once())
951 ->method('tag');
952
953 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
954 $entry = new Entry(new User());
955 $entry->setOriginUrl($origin_url);
956 $proxy->updateEntry(
957 $entry,
958 $entry_url,
959 [
960 'html' => false,
961 'title' => '',
962 'url' => $content_url,
36b0d52e
JB
963 'headers' => [
964 'content-type' => '',
965 ],
e07fadea
KD
966 'language' => '',
967 ],
968 true
969 );
970
971 $this->assertSame($expected_entry_url, $entry->getUrl());
972 $this->assertSame($expected_domain, $entry->getDomainName());
973 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
974 }
975
c01d9532 976 /**
28cc645b
T
977 * https://stackoverflow.com/a/18506801.
978 *
c01d9532 979 * @param $string
28cc645b 980 *
c01d9532
T
981 * @return string
982 */
28cc645b
T
983 private function strToHex($string)
984 {
c01d9532 985 $hex = '';
28cc645b
T
986 for ($i = 0; $i < \strlen($string); ++$i) {
987 $ord = \ord($string[$i]);
c01d9532 988 $hexCode = dechex($ord);
28cc645b 989 $hex .= substr('0' . $hexCode, -2);
c01d9532 990 }
28cc645b
T
991
992 return strtoupper($hex);
c01d9532
T
993 }
994
995 /**
36b0d52e
JB
996 * Convert hex to string.
997 *
998 * @see https://stackoverflow.com/a/18506801
28cc645b 999 *
c01d9532 1000 * @param $hex
28cc645b 1001 *
c01d9532
T
1002 * @return string
1003 */
28cc645b
T
1004 private function hexToStr($hex)
1005 {
1006 $string = '';
1007 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
1008 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
c01d9532 1009 }
28cc645b 1010
c01d9532
T
1011 return $string;
1012 }
1013
f530f7f5
KG
1014 private function getTaggerMock()
1015 {
6bc6fb1f 1016 return $this->getMockBuilder(RuleBasedTagger::class)
4094ea47 1017 ->setMethods(['tag'])
f530f7f5
KG
1018 ->disableOriginalConstructor()
1019 ->getMock();
1020 }
1c9cd2a7 1021
0c5bcd82 1022 private function getLogger()
1c9cd2a7 1023 {
0c5bcd82 1024 return new NullLogger();
1c9cd2a7 1025 }
0d349ea6 1026
5661e8d4 1027 private function getValidator($withDefaultMock = true)
0d349ea6 1028 {
5661e8d4 1029 $mock = $this->getMockBuilder(RecursiveValidator::class)
0d349ea6
JB
1030 ->setMethods(['validate'])
1031 ->disableOriginalConstructor()
1032 ->getMock();
5661e8d4
JB
1033
1034 if ($withDefaultMock) {
1035 $mock->expects($this->any())
1036 ->method('validate')
1037 ->willReturn(new ConstraintViolationList());
1038 }
1039
1040 return $mock;
0d349ea6 1041 }
558d9aab 1042}