]> git.immae.eu Git - github/wallabag/wallabag.git/blame - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Merge remote-tracking branch 'origin/master' into 2.4
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
CommitLineData
558d9aab
JB
1<?php
2
a2c1b94e 3namespace Tests\Wallabag\CoreBundle\Helper;
558d9aab 4
f808b016 5use Graby\Graby;
d5c2cc54 6use Monolog\Handler\TestHandler;
f808b016 7use Monolog\Logger;
bd91bd5c 8use PHPUnit\Framework\TestCase;
f808b016
JB
9use Psr\Log\NullLogger;
10use Symfony\Component\Validator\ConstraintViolation;
11use Symfony\Component\Validator\ConstraintViolationList;
12use Symfony\Component\Validator\Validator\RecursiveValidator;
c2656f96 13use Wallabag\CoreBundle\Entity\Entry;
f808b016 14use Wallabag\CoreBundle\Helper\ContentProxy;
6bc6fb1f 15use Wallabag\CoreBundle\Helper\RuleBasedTagger;
f808b016 16use Wallabag\UserBundle\Entity\User;
558d9aab 17
bd91bd5c 18class ContentProxyTest extends TestCase
558d9aab 19{
ac1509a6 20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
a2c1b94e 21
4d0ec0e7
JB
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 29 ->setMethods(['fetchContent'])
4d0ec0e7
JB
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
4094ea47 35 ->willReturn([
4d0ec0e7
JB
36 'html' => false,
37 'title' => '',
38 'url' => '',
39 'content_type' => '',
40 'language' => '',
4094ea47 41 ]);
4d0ec0e7 42
709e21a3 43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
4d0ec0e7 46
f808b016 47 $this->assertSame('http://user@:80', $entry->getUrl());
4d0ec0e7 48 $this->assertEmpty($entry->getTitle());
f808b016 49 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
4d0ec0e7
JB
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
f808b016 53 $this->assertSame(0.0, $entry->getReadingTime());
709e21a3 54 $this->assertNull($entry->getDomainName());
4d0ec0e7
JB
55 }
56
558d9aab
JB
57 public function testWithEmptyContent()
58 {
f530f7f5
KG
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
61 ->method('tag');
62
558d9aab 63 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 64 ->setMethods(['fetchContent'])
558d9aab
JB
65 ->disableOriginalConstructor()
66 ->getMock();
67
68 $graby->expects($this->any())
69 ->method('fetchContent')
4094ea47 70 ->willReturn([
98f0929f
JB
71 'html' => false,
72 'title' => '',
73 'url' => '',
74 'content_type' => '',
75 'language' => '',
4094ea47 76 ]);
558d9aab 77
709e21a3 78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 81
f808b016 82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
558d9aab 83 $this->assertEmpty($entry->getTitle());
f808b016 84 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
558d9aab
JB
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
98f0929f 87 $this->assertEmpty($entry->getLanguage());
f808b016
JB
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
558d9aab
JB
90 }
91
92 public function testWithEmptyContentButOG()
93 {
f530f7f5
KG
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
96 ->method('tag');
97
558d9aab 98 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 99 ->setMethods(['fetchContent'])
558d9aab
JB
100 ->disableOriginalConstructor()
101 ->getMock();
102
103 $graby->expects($this->any())
104 ->method('fetchContent')
4094ea47 105 ->willReturn([
98f0929f
JB
106 'html' => false,
107 'title' => '',
108 'url' => '',
109 'content_type' => '',
110 'language' => '',
10b35097 111 'status' => '',
4094ea47 112 'open_graph' => [
98f0929f
JB
113 'og_title' => 'my title',
114 'og_description' => 'desc',
4094ea47
JB
115 ],
116 ]);
558d9aab 117
709e21a3 118 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
119 $entry = new Entry(new User());
120 $proxy->updateEntry($entry, 'http://domain.io');
558d9aab 121
f808b016
JB
122 $this->assertSame('http://domain.io', $entry->getUrl());
123 $this->assertSame('my title', $entry->getTitle());
124 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
558d9aab 125 $this->assertEmpty($entry->getPreviewPicture());
98f0929f 126 $this->assertEmpty($entry->getLanguage());
10b35097 127 $this->assertEmpty($entry->getHttpStatus());
558d9aab 128 $this->assertEmpty($entry->getMimetype());
f808b016
JB
129 $this->assertSame(0.0, $entry->getReadingTime());
130 $this->assertSame('domain.io', $entry->getDomainName());
558d9aab
JB
131 }
132
133 public function testWithContent()
134 {
f530f7f5
KG
135 $tagger = $this->getTaggerMock();
136 $tagger->expects($this->once())
137 ->method('tag');
138
558d9aab 139 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 140 ->setMethods(['fetchContent'])
558d9aab
JB
141 ->disableOriginalConstructor()
142 ->getMock();
143
144 $graby->expects($this->any())
145 ->method('fetchContent')
4094ea47 146 ->willReturn([
da3d4998 147 'html' => str_repeat('this is my content', 325),
558d9aab
JB
148 'title' => 'this is my title',
149 'url' => 'http://1.1.1.1',
150 'content_type' => 'text/html',
98f0929f 151 'language' => 'fr',
10b35097 152 'status' => '200',
4094ea47 153 'open_graph' => [
558d9aab
JB
154 'og_title' => 'my OG title',
155 'og_description' => 'OG desc',
f1e29e69 156 'og_image' => 'http://3.3.3.3/cover.jpg',
4094ea47
JB
157 ],
158 ]);
558d9aab 159
709e21a3 160 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
161 $entry = new Entry(new User());
162 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 163
f808b016
JB
164 $this->assertSame('http://1.1.1.1', $entry->getUrl());
165 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 166 $this->assertContains('content', $entry->getContent());
f808b016
JB
167 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
168 $this->assertSame('text/html', $entry->getMimetype());
169 $this->assertSame('fr', $entry->getLanguage());
170 $this->assertSame('200', $entry->getHttpStatus());
171 $this->assertSame(4.0, $entry->getReadingTime());
172 $this->assertSame('1.1.1.1', $entry->getDomainName());
558d9aab 173 }
f530f7f5 174
3d71d403
JB
175 public function testWithContentAndNoOgImage()
176 {
177 $tagger = $this->getTaggerMock();
178 $tagger->expects($this->once())
179 ->method('tag');
180
181 $graby = $this->getMockBuilder('Graby\Graby')
182 ->setMethods(['fetchContent'])
183 ->disableOriginalConstructor()
184 ->getMock();
185
186 $graby->expects($this->any())
187 ->method('fetchContent')
188 ->willReturn([
189 'html' => str_repeat('this is my content', 325),
190 'title' => 'this is my title',
191 'url' => 'http://1.1.1.1',
192 'content_type' => 'text/html',
193 'language' => 'fr',
194 'status' => '200',
195 'open_graph' => [
196 'og_title' => 'my OG title',
197 'og_description' => 'OG desc',
0d349ea6 198 'og_image' => null,
3d71d403
JB
199 ],
200 ]);
201
709e21a3 202 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d5c2cc54
JB
203 $entry = new Entry(new User());
204 $proxy->updateEntry($entry, 'http://0.0.0.0');
3d71d403 205
f808b016
JB
206 $this->assertSame('http://1.1.1.1', $entry->getUrl());
207 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 208 $this->assertContains('content', $entry->getContent());
41d45c61 209 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
210 $this->assertSame('text/html', $entry->getMimetype());
211 $this->assertSame('fr', $entry->getLanguage());
212 $this->assertSame('200', $entry->getHttpStatus());
213 $this->assertSame(4.0, $entry->getReadingTime());
214 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
215 }
216
715fabf8 217 public function testWithContentAndContentImage()
218 {
219 $tagger = $this->getTaggerMock();
220 $tagger->expects($this->once())
221 ->method('tag');
222
223 $graby = $this->getMockBuilder('Graby\Graby')
224 ->setMethods(['fetchContent'])
225 ->disableOriginalConstructor()
226 ->getMock();
227
228 $graby->expects($this->any())
229 ->method('fetchContent')
230 ->willReturn([
231 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
232 'title' => 'this is my title',
233 'url' => 'http://1.1.1.1',
234 'content_type' => 'text/html',
235 'language' => 'fr',
236 'status' => '200',
237 'open_graph' => [
238 'og_title' => 'my OG title',
239 'og_description' => 'OG desc',
240 'og_image' => null,
241 ],
242 ]);
243
244 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
245 $entry = new Entry(new User());
246 $proxy->updateEntry($entry, 'http://0.0.0.0');
247
248 $this->assertSame('http://1.1.1.1', $entry->getUrl());
249 $this->assertSame('this is my title', $entry->getTitle());
250 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
251 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
252 $this->assertSame('text/html', $entry->getMimetype());
253 $this->assertSame('fr', $entry->getLanguage());
254 $this->assertSame('200', $entry->getHttpStatus());
255 $this->assertSame(0.0, $entry->getReadingTime());
256 $this->assertSame('1.1.1.1', $entry->getDomainName());
257 }
258
259 public function testWithContentImageAndOgImage()
260 {
261 $tagger = $this->getTaggerMock();
262 $tagger->expects($this->once())
263 ->method('tag');
264
265 $graby = $this->getMockBuilder('Graby\Graby')
266 ->setMethods(['fetchContent'])
267 ->disableOriginalConstructor()
268 ->getMock();
269
270 $graby->expects($this->any())
271 ->method('fetchContent')
272 ->willReturn([
273 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
274 'title' => 'this is my title',
275 'url' => 'http://1.1.1.1',
276 'content_type' => 'text/html',
277 'language' => 'fr',
278 'status' => '200',
279 'open_graph' => [
280 'og_title' => 'my OG title',
281 'og_description' => 'OG desc',
282 'og_image' => 'http://3.3.3.3/cover.jpg',
283 ],
284 ]);
285
286 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
287 $entry = new Entry(new User());
288 $proxy->updateEntry($entry, 'http://0.0.0.0');
289
290 $this->assertSame('http://1.1.1.1', $entry->getUrl());
291 $this->assertSame('this is my title', $entry->getTitle());
292 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
293 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
294 $this->assertSame('text/html', $entry->getMimetype());
295 $this->assertSame('fr', $entry->getLanguage());
296 $this->assertSame('200', $entry->getHttpStatus());
297 $this->assertSame(0.0, $entry->getReadingTime());
298 $this->assertSame('1.1.1.1', $entry->getDomainName());
299 }
300
0d349ea6
JB
301 public function testWithContentAndBadLanguage()
302 {
303 $tagger = $this->getTaggerMock();
304 $tagger->expects($this->once())
305 ->method('tag');
306
5661e8d4 307 $validator = $this->getValidator(false);
a05b6115 308 $validator->expects($this->once())
0d349ea6 309 ->method('validate')
a05b6115 310 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
0d349ea6
JB
311
312 $graby = $this->getMockBuilder('Graby\Graby')
313 ->setMethods(['fetchContent'])
314 ->disableOriginalConstructor()
315 ->getMock();
316
317 $graby->expects($this->any())
318 ->method('fetchContent')
319 ->willReturn([
320 'html' => str_repeat('this is my content', 325),
321 'title' => 'this is my title',
322 'url' => 'http://1.1.1.1',
323 'content_type' => 'text/html',
324 'language' => 'dontexist',
325 'status' => '200',
326 ]);
327
709e21a3 328 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
329 $entry = new Entry(new User());
330 $proxy->updateEntry($entry, 'http://0.0.0.0');
331
f808b016
JB
332 $this->assertSame('http://1.1.1.1', $entry->getUrl());
333 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 334 $this->assertContains('content', $entry->getContent());
f808b016 335 $this->assertSame('text/html', $entry->getMimetype());
41d45c61 336 $this->assertNull($entry->getLanguage());
f808b016
JB
337 $this->assertSame('200', $entry->getHttpStatus());
338 $this->assertSame(4.0, $entry->getReadingTime());
339 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
340 }
341
342 public function testWithContentAndBadOgImage()
343 {
344 $tagger = $this->getTaggerMock();
345 $tagger->expects($this->once())
346 ->method('tag');
347
5661e8d4 348 $validator = $this->getValidator(false);
0d349ea6
JB
349 $validator->expects($this->exactly(2))
350 ->method('validate')
351 ->will($this->onConsecutiveCalls(
352 new ConstraintViolationList(),
353 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
354 ));
355
356 $graby = $this->getMockBuilder('Graby\Graby')
357 ->setMethods(['fetchContent'])
358 ->disableOriginalConstructor()
359 ->getMock();
360
361 $graby->expects($this->any())
362 ->method('fetchContent')
363 ->willReturn([
364 'html' => str_repeat('this is my content', 325),
365 'title' => 'this is my title',
366 'url' => 'http://1.1.1.1',
367 'content_type' => 'text/html',
368 'language' => 'fr',
369 'status' => '200',
370 'open_graph' => [
371 'og_title' => 'my OG title',
372 'og_description' => 'OG desc',
373 'og_image' => 'https://',
374 ],
375 ]);
376
709e21a3 377 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
378 $entry = new Entry(new User());
379 $proxy->updateEntry($entry, 'http://0.0.0.0');
380
f808b016
JB
381 $this->assertSame('http://1.1.1.1', $entry->getUrl());
382 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 383 $this->assertContains('content', $entry->getContent());
41d45c61 384 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
385 $this->assertSame('text/html', $entry->getMimetype());
386 $this->assertSame('fr', $entry->getLanguage());
387 $this->assertSame('200', $entry->getHttpStatus());
388 $this->assertSame(4.0, $entry->getReadingTime());
389 $this->assertSame('1.1.1.1', $entry->getDomainName());
3d71d403
JB
390 }
391
4d0ec0e7
JB
392 public function testWithForcedContent()
393 {
394 $tagger = $this->getTaggerMock();
395 $tagger->expects($this->once())
396 ->method('tag');
397
709e21a3 398 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
7aba665e
JC
399 $entry = new Entry(new User());
400 $proxy->updateEntry(
401 $entry,
0d6cfb88
JB
402 'http://0.0.0.0',
403 [
404 'html' => str_repeat('this is my content', 325),
405 'title' => 'this is my title',
406 'url' => 'http://1.1.1.1',
407 'content_type' => 'text/html',
408 'language' => 'fr',
f0378b4d
JB
409 'date' => '1395635872',
410 'authors' => ['Jeremy', 'Nico', 'Thomas'],
411 'all_headers' => [
412 'Cache-Control' => 'no-cache',
38a04dee 413 ],
0d6cfb88
JB
414 ]
415 );
4d0ec0e7 416
f808b016
JB
417 $this->assertSame('http://1.1.1.1', $entry->getUrl());
418 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 419 $this->assertContains('content', $entry->getContent());
f808b016
JB
420 $this->assertSame('text/html', $entry->getMimetype());
421 $this->assertSame('fr', $entry->getLanguage());
422 $this->assertSame(4.0, $entry->getReadingTime());
423 $this->assertSame('1.1.1.1', $entry->getDomainName());
424 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
425 $this->assertContains('Jeremy', $entry->getPublishedBy());
426 $this->assertContains('Nico', $entry->getPublishedBy());
427 $this->assertContains('Thomas', $entry->getPublishedBy());
709e21a3 428 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
f0378b4d
JB
429 $this->assertContains('no-cache', $entry->getHeaders());
430 }
431
432 public function testWithForcedContentAndDatetime()
433 {
434 $tagger = $this->getTaggerMock();
435 $tagger->expects($this->once())
436 ->method('tag');
437
d5c2cc54 438 $logHandler = new TestHandler();
6acadf8e 439 $logger = new Logger('test', [$logHandler]);
d5c2cc54 440
709e21a3 441 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e 442 $entry = new Entry(new User());
6acadf8e 443 $proxy->updateEntry(
7aba665e 444 $entry,
6acadf8e 445 'http://1.1.1.1',
f0378b4d
JB
446 [
447 'html' => str_repeat('this is my content', 325),
448 'title' => 'this is my title',
449 'url' => 'http://1.1.1.1',
450 'content_type' => 'text/html',
451 'language' => 'fr',
452 'date' => '2016-09-08T11:55:58+0200',
453 ]
454 );
455
f808b016
JB
456 $this->assertSame('http://1.1.1.1', $entry->getUrl());
457 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 458 $this->assertContains('content', $entry->getContent());
f808b016
JB
459 $this->assertSame('text/html', $entry->getMimetype());
460 $this->assertSame('fr', $entry->getLanguage());
461 $this->assertSame(4.0, $entry->getReadingTime());
462 $this->assertSame('1.1.1.1', $entry->getDomainName());
463 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
464 }
465
466 public function testWithForcedContentAndBadDate()
467 {
468 $tagger = $this->getTaggerMock();
469 $tagger->expects($this->once())
470 ->method('tag');
471
472 $logger = new Logger('foo');
473 $handler = new TestHandler();
474 $logger->pushHandler($handler);
475
709e21a3 476 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e
JC
477 $entry = new Entry(new User());
478 $proxy->updateEntry(
479 $entry,
6acadf8e 480 'http://1.1.1.1',
f0378b4d
JB
481 [
482 'html' => str_repeat('this is my content', 325),
483 'title' => 'this is my title',
484 'url' => 'http://1.1.1.1',
485 'content_type' => 'text/html',
486 'language' => 'fr',
487 'date' => '01 02 2012',
488 ]
489 );
490
f808b016
JB
491 $this->assertSame('http://1.1.1.1', $entry->getUrl());
492 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 493 $this->assertContains('content', $entry->getContent());
f808b016
JB
494 $this->assertSame('text/html', $entry->getMimetype());
495 $this->assertSame('fr', $entry->getLanguage());
496 $this->assertSame(4.0, $entry->getReadingTime());
497 $this->assertSame('1.1.1.1', $entry->getDomainName());
f0378b4d
JB
498 $this->assertNull($entry->getPublishedAt());
499
500 $records = $handler->getRecords();
501
d99e6423 502 $this->assertCount(3, $records);
f0378b4d 503 $this->assertContains('Error while defining date', $records[0]['message']);
4d0ec0e7
JB
504 }
505
506 public function testTaggerThrowException()
507 {
4d0ec0e7
JB
508 $tagger = $this->getTaggerMock();
509 $tagger->expects($this->once())
510 ->method('tag')
511 ->will($this->throwException(new \Exception()));
512
709e21a3 513 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e 514 $entry = new Entry(new User());
6acadf8e
JB
515 $proxy->updateEntry(
516 $entry,
517 'http://1.1.1.1',
518 [
519 'html' => str_repeat('this is my content', 325),
520 'title' => 'this is my title',
521 'url' => 'http://1.1.1.1',
522 'content_type' => 'text/html',
523 'language' => 'fr',
524 ]
d0e9b3d6 525 );
4d0ec0e7
JB
526
527 $this->assertCount(0, $entry->getTags());
528 }
529
74a75f7d
JB
530 public function dataForCrazyHtml()
531 {
532 return [
533 'script and comment' => [
534 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
38a04dee 535 'lol',
74a75f7d
JB
536 ],
537 'script' => [
538 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
38a04dee 539 'script',
74a75f7d
JB
540 ],
541 ];
542 }
543
544 /**
545 * @dataProvider dataForCrazyHtml
546 */
547 public function testWithCrazyHtmlContent($html, $escapedString)
548 {
549 $tagger = $this->getTaggerMock();
550 $tagger->expects($this->once())
551 ->method('tag');
552
709e21a3 553 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
6acadf8e
JB
554 $entry = new Entry(new User());
555 $proxy->updateEntry(
556 $entry,
74a75f7d
JB
557 'http://1.1.1.1',
558 [
559 'html' => $html,
560 'title' => 'this is my title',
561 'url' => 'http://1.1.1.1',
562 'content_type' => 'text/html',
563 'language' => 'fr',
564 'status' => '200',
565 'open_graph' => [
566 'og_title' => 'my OG title',
567 'og_description' => 'OG desc',
568 'og_image' => 'http://3.3.3.3/cover.jpg',
569 ],
570 ]
571 );
572
f808b016
JB
573 $this->assertSame('http://1.1.1.1', $entry->getUrl());
574 $this->assertSame('this is my title', $entry->getTitle());
74a75f7d 575 $this->assertNotContains($escapedString, $entry->getContent());
f808b016
JB
576 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
577 $this->assertSame('text/html', $entry->getMimetype());
578 $this->assertSame('fr', $entry->getLanguage());
579 $this->assertSame('200', $entry->getHttpStatus());
580 $this->assertSame('1.1.1.1', $entry->getDomainName());
74a75f7d
JB
581 }
582
d0ec2ddd
JB
583 public function testWithImageAsContent()
584 {
585 $tagger = $this->getTaggerMock();
586 $tagger->expects($this->once())
587 ->method('tag');
588
589 $graby = $this->getMockBuilder('Graby\Graby')
590 ->setMethods(['fetchContent'])
591 ->disableOriginalConstructor()
592 ->getMock();
593
594 $graby->expects($this->any())
595 ->method('fetchContent')
596 ->willReturn([
597 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
598 'title' => 'this is my title',
599 'url' => 'http://1.1.1.1/image.jpg',
600 'content_type' => 'image/jpeg',
601 'status' => '200',
602 'open_graph' => [],
603 ]);
604
709e21a3 605 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d0ec2ddd
JB
606 $entry = new Entry(new User());
607 $proxy->updateEntry($entry, 'http://0.0.0.0');
608
c18a2476
JB
609 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
610 $this->assertSame('this is my title', $entry->getTitle());
d0ec2ddd
JB
611 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
612 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
c18a2476
JB
613 $this->assertSame('image/jpeg', $entry->getMimetype());
614 $this->assertSame('200', $entry->getHttpStatus());
615 $this->assertSame('1.1.1.1', $entry->getDomainName());
d0ec2ddd
JB
616 }
617
c01d9532
T
618 public function testWebsiteWithValidUTF8Title_doNothing()
619 {
620 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
621 // See http://graphemica.com for more info about the characters
622 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
623 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
624
625 $tagger = $this->getTaggerMock();
626 $tagger->expects($this->once())
627 ->method('tag');
628
629 $graby = $this->getMockBuilder('Graby\Graby')
630 ->setMethods(['fetchContent'])
631 ->disableOriginalConstructor()
632 ->getMock();
633
634 $graby->expects($this->any())
635 ->method('fetchContent')
636 ->willReturn([
637 'html' => false,
638 'title' => $actualTitle,
639 'url' => '',
640 'content_type' => 'text/html',
641 'language' => '',
642 ]);
643
644 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
645 $entry = new Entry(new User());
646 $proxy->updateEntry($entry, 'http://0.0.0.0');
647
648 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
649 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
650 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
651 }
652
653 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
654 {
655 // See http://graphemica.com for more info about the characters
656 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
657 // The correct UTF-8 € character (U+20AC) is E282AC
658 $actualTitle = $this->hexToStr('61' . '80' . '62');
659
660 $tagger = $this->getTaggerMock();
661 $tagger->expects($this->once())
662 ->method('tag');
663
664 $graby = $this->getMockBuilder('Graby\Graby')
665 ->setMethods(['fetchContent'])
666 ->disableOriginalConstructor()
667 ->getMock();
668
669 $graby->expects($this->any())
670 ->method('fetchContent')
671 ->willReturn([
672 'html' => false,
673 'title' => $actualTitle,
674 'url' => '',
675 'content_type' => 'text/html',
676 'language' => '',
677 ]);
678
679 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
680 $entry = new Entry(new User());
681 $proxy->updateEntry($entry, 'http://0.0.0.0');
682
683 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
684 $expectedTitle = '61' . '62';
685 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
686 }
687
688 public function testPdfWithUTF16BETitle_convertToUTF8()
689 {
690 // See http://graphemica.com for more info about the characters
691 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
692 $actualTitle = $this->hexToStr('D83DDE3B');
693
694 $tagger = $this->getTaggerMock();
695 $tagger->expects($this->once())
696 ->method('tag');
697
698 $graby = $this->getMockBuilder('Graby\Graby')
699 ->setMethods(['fetchContent'])
700 ->disableOriginalConstructor()
701 ->getMock();
702
703 $graby->expects($this->any())
704 ->method('fetchContent')
705 ->willReturn([
706 'html' => false,
707 'title' => $actualTitle,
708 'url' => '',
709 'content_type' => 'application/pdf',
710 'language' => '',
711 ]);
712
713 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
714 $entry = new Entry(new User());
715 $proxy->updateEntry($entry, 'http://0.0.0.0');
716
717 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
718 $expectedTitle = 'F09F98BB';
719 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
720 }
721
722 public function testPdfWithUTF8Title_doNothing()
723 {
724 // See http://graphemica.com for more info about the characters
725 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
726 $actualTitle = $this->hexToStr('F09F98BB');
727
728 $tagger = $this->getTaggerMock();
729 $tagger->expects($this->once())
730 ->method('tag');
731
732 $graby = $this->getMockBuilder('Graby\Graby')
733 ->setMethods(['fetchContent'])
734 ->disableOriginalConstructor()
735 ->getMock();
736
737 $graby->expects($this->any())
738 ->method('fetchContent')
739 ->willReturn([
740 'html' => false,
741 'title' => $actualTitle,
742 'url' => '',
743 'content_type' => 'application/pdf',
744 'language' => '',
745 ]);
746
747 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
748 $entry = new Entry(new User());
749 $proxy->updateEntry($entry, 'http://0.0.0.0');
750
751 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
752 $expectedTitle = 'F09F98BB';
753 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
754 }
755
756 public function testPdfWithWINDOWS1252Title_convertToUTF8()
757 {
758 // See http://graphemica.com for more info about the characters
759 // '€' (80) in hexadecimal and WINDOWS-1252
760 $actualTitle = $this->hexToStr('80');
761
762 $tagger = $this->getTaggerMock();
763 $tagger->expects($this->once())
764 ->method('tag');
765
766 $graby = $this->getMockBuilder('Graby\Graby')
767 ->setMethods(['fetchContent'])
768 ->disableOriginalConstructor()
769 ->getMock();
770
771 $graby->expects($this->any())
772 ->method('fetchContent')
773 ->willReturn([
774 'html' => false,
775 'title' => $actualTitle,
776 'url' => '',
777 'content_type' => 'application/pdf',
778 'language' => '',
779 ]);
780
781 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
782 $entry = new Entry(new User());
783 $proxy->updateEntry($entry, 'http://0.0.0.0');
784
785 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
786 $expectedTitle = 'E282AC';
787 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
788 }
789
790 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
791 {
792 // See http://graphemica.com for more info about the characters
793 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
794 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
795 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
796
797 $tagger = $this->getTaggerMock();
798 $tagger->expects($this->once())
799 ->method('tag');
800
801 $graby = $this->getMockBuilder('Graby\Graby')
802 ->setMethods(['fetchContent'])
803 ->disableOriginalConstructor()
804 ->getMock();
805
806 $graby->expects($this->any())
807 ->method('fetchContent')
808 ->willReturn([
809 'html' => false,
810 'title' => $actualTitle,
811 'url' => '',
812 'content_type' => 'application/pdf',
813 'language' => '',
814 ]);
815
816 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
817 $entry = new Entry(new User());
818 $proxy->updateEntry($entry, 'http://0.0.0.0');
819
820 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
821 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
822 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
823 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
824 }
825
e07fadea
KD
826 /**
827 * Data provider for testWithChangedUrl.
828 *
829 * Arrays contain the following values:
830 * $entry_url
831 * $origin_url
832 * $content_url
833 * $expected_entry_url
834 * $expected_origin_url
835 * $expected_domain
836 */
837 public function dataForChangedUrl()
838 {
839 return [
840 'normal' => [
841 'http://0.0.0.0',
842 null,
843 'http://1.1.1.1',
844 'http://1.1.1.1',
845 'http://0.0.0.0',
846 '1.1.1.1',
847 ],
848 'origin already set' => [
849 'http://0.0.0.0',
850 'http://hello',
851 'http://1.1.1.1',
852 'http://1.1.1.1',
853 'http://hello',
854 '1.1.1.1',
855 ],
856 'trailing slash' => [
857 'https://example.com/hello-world',
858 null,
859 'https://example.com/hello-world/',
860 'https://example.com/hello-world/',
861 null,
862 'example.com',
863 ],
e07fadea
KD
864 'query string in fetched content' => [
865 'https://example.org/hello',
866 null,
867 'https://example.org/hello?world=1',
60599679 868 'https://example.org/hello?world=1',
e07fadea 869 'https://example.org/hello',
e07fadea
KD
870 'example.org',
871 ],
872 'fragment in fetched content' => [
873 'https://example.org/hello',
874 null,
875 'https://example.org/hello#world',
876 'https://example.org/hello',
877 null,
878 'example.org',
879 ],
fc040c74
KD
880 'fragment and query string in fetched content' => [
881 'https://example.org/hello',
882 null,
883 'https://example.org/hello?foo#world',
60599679 884 'https://example.org/hello?foo#world',
fc040c74 885 'https://example.org/hello',
fc040c74 886 'example.org',
b49c87ac
KD
887 ],
888 'different path and query string in fetch content' => [
889 'https://example.org/hello',
890 null,
891 'https://example.org/world?foo',
892 'https://example.org/world?foo',
893 'https://example.org/hello',
894 'example.org',
895 ],
896 'feedproxy ignore list test' => [
897 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
898 null,
899 'https://example.org/hello-wallabag',
900 'https://example.org/hello-wallabag',
901 null,
902 'example.org',
903 ],
904 'feedproxy ignore list test with origin url already set' => [
905 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
906 'https://example.org/this-is-source',
907 'https://example.org/hello-wallabag',
908 'https://example.org/hello-wallabag',
909 'https://example.org/this-is-source',
910 'example.org',
911 ],
912 'lemonde ignore pattern test' => [
913 'http://www.lemonde.fr/tiny/url',
914 null,
915 'http://example.com/hello-world',
916 'http://example.com/hello-world',
917 null,
918 'example.com',
919 ],
e07fadea
KD
920 ];
921 }
922
923 /**
924 * @dataProvider dataForChangedUrl
925 */
926 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
927 {
928 $tagger = $this->getTaggerMock();
929 $tagger->expects($this->once())
930 ->method('tag');
931
932 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
933 $entry = new Entry(new User());
934 $entry->setOriginUrl($origin_url);
935 $proxy->updateEntry(
936 $entry,
937 $entry_url,
938 [
939 'html' => false,
940 'title' => '',
941 'url' => $content_url,
942 'content_type' => '',
943 'language' => '',
944 ],
945 true
946 );
947
948 $this->assertSame($expected_entry_url, $entry->getUrl());
949 $this->assertSame($expected_domain, $entry->getDomainName());
950 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
951 }
952
c01d9532 953 /**
28cc645b
T
954 * https://stackoverflow.com/a/18506801.
955 *
c01d9532 956 * @param $string
28cc645b 957 *
c01d9532
T
958 * @return string
959 */
28cc645b
T
960 private function strToHex($string)
961 {
c01d9532 962 $hex = '';
28cc645b
T
963 for ($i = 0; $i < \strlen($string); ++$i) {
964 $ord = \ord($string[$i]);
c01d9532 965 $hexCode = dechex($ord);
28cc645b 966 $hex .= substr('0' . $hexCode, -2);
c01d9532 967 }
28cc645b
T
968
969 return strtoupper($hex);
c01d9532
T
970 }
971
972 /**
28cc645b
T
973 * https://stackoverflow.com/a/18506801.
974 *
c01d9532 975 * @param $hex
28cc645b 976 *
c01d9532
T
977 * @return string
978 */
28cc645b
T
979 private function hexToStr($hex)
980 {
981 $string = '';
982 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
983 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
c01d9532 984 }
28cc645b 985
c01d9532
T
986 return $string;
987 }
988
f530f7f5
KG
989 private function getTaggerMock()
990 {
6bc6fb1f 991 return $this->getMockBuilder(RuleBasedTagger::class)
4094ea47 992 ->setMethods(['tag'])
f530f7f5
KG
993 ->disableOriginalConstructor()
994 ->getMock();
995 }
1c9cd2a7 996
0c5bcd82 997 private function getLogger()
1c9cd2a7 998 {
0c5bcd82 999 return new NullLogger();
1c9cd2a7 1000 }
0d349ea6 1001
5661e8d4 1002 private function getValidator($withDefaultMock = true)
0d349ea6 1003 {
5661e8d4 1004 $mock = $this->getMockBuilder(RecursiveValidator::class)
0d349ea6
JB
1005 ->setMethods(['validate'])
1006 ->disableOriginalConstructor()
1007 ->getMock();
5661e8d4
JB
1008
1009 if ($withDefaultMock) {
1010 $mock->expects($this->any())
1011 ->method('validate')
1012 ->willReturn(new ConstraintViolationList());
1013 }
1014
1015 return $mock;
0d349ea6 1016 }
558d9aab 1017}