]> git.immae.eu Git - github/wallabag/wallabag.git/blame - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Fix because of some breaking changes of Graby 2.0
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
CommitLineData
558d9aab
JB
1<?php
2
a2c1b94e 3namespace Tests\Wallabag\CoreBundle\Helper;
558d9aab 4
f808b016 5use Graby\Graby;
d5c2cc54 6use Monolog\Handler\TestHandler;
f808b016 7use Monolog\Logger;
bd91bd5c 8use PHPUnit\Framework\TestCase;
f808b016
JB
9use Psr\Log\NullLogger;
10use Symfony\Component\Validator\ConstraintViolation;
11use Symfony\Component\Validator\ConstraintViolationList;
12use Symfony\Component\Validator\Validator\RecursiveValidator;
c2656f96 13use Wallabag\CoreBundle\Entity\Entry;
f808b016 14use Wallabag\CoreBundle\Helper\ContentProxy;
6bc6fb1f 15use Wallabag\CoreBundle\Helper\RuleBasedTagger;
f808b016 16use Wallabag\UserBundle\Entity\User;
558d9aab 17
bd91bd5c 18class ContentProxyTest extends TestCase
558d9aab 19{
ac1509a6 20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
a2c1b94e 21
4d0ec0e7
JB
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 29 ->setMethods(['fetchContent'])
4d0ec0e7
JB
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
4094ea47 35 ->willReturn([
4d0ec0e7
JB
36 'html' => false,
37 'title' => '',
38 'url' => '',
39 'content_type' => '',
40 'language' => '',
4094ea47 41 ]);
4d0ec0e7 42
709e21a3 43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
4d0ec0e7 46
f808b016 47 $this->assertSame('http://user@:80', $entry->getUrl());
4d0ec0e7 48 $this->assertEmpty($entry->getTitle());
f808b016 49 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
4d0ec0e7
JB
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
f808b016 53 $this->assertSame(0.0, $entry->getReadingTime());
709e21a3 54 $this->assertNull($entry->getDomainName());
4d0ec0e7
JB
55 }
56
558d9aab
JB
57 public function testWithEmptyContent()
58 {
f530f7f5
KG
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
61 ->method('tag');
62
558d9aab 63 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 64 ->setMethods(['fetchContent'])
558d9aab
JB
65 ->disableOriginalConstructor()
66 ->getMock();
67
68 $graby->expects($this->any())
69 ->method('fetchContent')
4094ea47 70 ->willReturn([
98f0929f
JB
71 'html' => false,
72 'title' => '',
73 'url' => '',
74 'content_type' => '',
75 'language' => '',
4094ea47 76 ]);
558d9aab 77
709e21a3 78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 81
f808b016 82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
558d9aab 83 $this->assertEmpty($entry->getTitle());
f808b016 84 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
558d9aab
JB
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
98f0929f 87 $this->assertEmpty($entry->getLanguage());
f808b016
JB
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
558d9aab
JB
90 }
91
92 public function testWithEmptyContentButOG()
93 {
f530f7f5
KG
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
96 ->method('tag');
97
558d9aab 98 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 99 ->setMethods(['fetchContent'])
558d9aab
JB
100 ->disableOriginalConstructor()
101 ->getMock();
102
103 $graby->expects($this->any())
104 ->method('fetchContent')
4094ea47 105 ->willReturn([
98f0929f 106 'html' => false,
5f084262 107 'title' => 'my title',
98f0929f
JB
108 'url' => '',
109 'content_type' => '',
110 'language' => '',
10b35097 111 'status' => '',
5f084262 112 'description' => 'desc',
4094ea47 113 ]);
558d9aab 114
709e21a3 115 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
116 $entry = new Entry(new User());
117 $proxy->updateEntry($entry, 'http://domain.io');
558d9aab 118
f808b016
JB
119 $this->assertSame('http://domain.io', $entry->getUrl());
120 $this->assertSame('my title', $entry->getTitle());
121 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
558d9aab 122 $this->assertEmpty($entry->getPreviewPicture());
98f0929f 123 $this->assertEmpty($entry->getLanguage());
10b35097 124 $this->assertEmpty($entry->getHttpStatus());
558d9aab 125 $this->assertEmpty($entry->getMimetype());
f808b016
JB
126 $this->assertSame(0.0, $entry->getReadingTime());
127 $this->assertSame('domain.io', $entry->getDomainName());
558d9aab
JB
128 }
129
130 public function testWithContent()
131 {
f530f7f5
KG
132 $tagger = $this->getTaggerMock();
133 $tagger->expects($this->once())
134 ->method('tag');
135
558d9aab 136 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 137 ->setMethods(['fetchContent'])
558d9aab
JB
138 ->disableOriginalConstructor()
139 ->getMock();
140
141 $graby->expects($this->any())
142 ->method('fetchContent')
4094ea47 143 ->willReturn([
da3d4998 144 'html' => str_repeat('this is my content', 325),
558d9aab
JB
145 'title' => 'this is my title',
146 'url' => 'http://1.1.1.1',
98f0929f 147 'language' => 'fr',
10b35097 148 'status' => '200',
5f084262 149 'description' => 'OG desc',
150 'image' => 'http://3.3.3.3/cover.jpg',
151 'headers' => [
152 'content-type' => 'text/html',
4094ea47
JB
153 ],
154 ]);
558d9aab 155
709e21a3 156 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
157 $entry = new Entry(new User());
158 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 159
f808b016
JB
160 $this->assertSame('http://1.1.1.1', $entry->getUrl());
161 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 162 $this->assertContains('content', $entry->getContent());
f808b016
JB
163 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
164 $this->assertSame('text/html', $entry->getMimetype());
165 $this->assertSame('fr', $entry->getLanguage());
166 $this->assertSame('200', $entry->getHttpStatus());
167 $this->assertSame(4.0, $entry->getReadingTime());
168 $this->assertSame('1.1.1.1', $entry->getDomainName());
558d9aab 169 }
f530f7f5 170
3d71d403
JB
171 public function testWithContentAndNoOgImage()
172 {
173 $tagger = $this->getTaggerMock();
174 $tagger->expects($this->once())
175 ->method('tag');
176
177 $graby = $this->getMockBuilder('Graby\Graby')
178 ->setMethods(['fetchContent'])
179 ->disableOriginalConstructor()
180 ->getMock();
181
182 $graby->expects($this->any())
183 ->method('fetchContent')
184 ->willReturn([
185 'html' => str_repeat('this is my content', 325),
186 'title' => 'this is my title',
187 'url' => 'http://1.1.1.1',
3d71d403
JB
188 'language' => 'fr',
189 'status' => '200',
5f084262 190 'description' => 'OG desc',
191 'image' => null,
192 'headers' => [
193 'content-type' => 'text/html',
3d71d403
JB
194 ],
195 ]);
196
709e21a3 197 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d5c2cc54
JB
198 $entry = new Entry(new User());
199 $proxy->updateEntry($entry, 'http://0.0.0.0');
3d71d403 200
f808b016
JB
201 $this->assertSame('http://1.1.1.1', $entry->getUrl());
202 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 203 $this->assertContains('content', $entry->getContent());
41d45c61 204 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
205 $this->assertSame('text/html', $entry->getMimetype());
206 $this->assertSame('fr', $entry->getLanguage());
207 $this->assertSame('200', $entry->getHttpStatus());
208 $this->assertSame(4.0, $entry->getReadingTime());
209 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
210 }
211
715fabf8 212 public function testWithContentAndContentImage()
213 {
214 $tagger = $this->getTaggerMock();
215 $tagger->expects($this->once())
216 ->method('tag');
217
218 $graby = $this->getMockBuilder('Graby\Graby')
219 ->setMethods(['fetchContent'])
220 ->disableOriginalConstructor()
221 ->getMock();
222
223 $graby->expects($this->any())
224 ->method('fetchContent')
225 ->willReturn([
226 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>",
227 'title' => 'this is my title',
228 'url' => 'http://1.1.1.1',
229 'content_type' => 'text/html',
230 'language' => 'fr',
231 'status' => '200',
232 'open_graph' => [
233 'og_title' => 'my OG title',
234 'og_description' => 'OG desc',
235 'og_image' => null,
236 ],
237 ]);
238
239 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
240 $entry = new Entry(new User());
241 $proxy->updateEntry($entry, 'http://0.0.0.0');
242
243 $this->assertSame('http://1.1.1.1', $entry->getUrl());
244 $this->assertSame('this is my title', $entry->getTitle());
245 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/cover.jpg'/></p>", $entry->getContent());
246 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
247 $this->assertSame('text/html', $entry->getMimetype());
248 $this->assertSame('fr', $entry->getLanguage());
249 $this->assertSame('200', $entry->getHttpStatus());
250 $this->assertSame(0.0, $entry->getReadingTime());
251 $this->assertSame('1.1.1.1', $entry->getDomainName());
252 }
253
254 public function testWithContentImageAndOgImage()
255 {
256 $tagger = $this->getTaggerMock();
257 $tagger->expects($this->once())
258 ->method('tag');
259
260 $graby = $this->getMockBuilder('Graby\Graby')
261 ->setMethods(['fetchContent'])
262 ->disableOriginalConstructor()
263 ->getMock();
264
265 $graby->expects($this->any())
266 ->method('fetchContent')
267 ->willReturn([
268 'html' => "<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>",
269 'title' => 'this is my title',
270 'url' => 'http://1.1.1.1',
271 'content_type' => 'text/html',
272 'language' => 'fr',
273 'status' => '200',
274 'open_graph' => [
275 'og_title' => 'my OG title',
276 'og_description' => 'OG desc',
277 'og_image' => 'http://3.3.3.3/cover.jpg',
278 ],
279 ]);
280
281 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
282 $entry = new Entry(new User());
283 $proxy->updateEntry($entry, 'http://0.0.0.0');
284
285 $this->assertSame('http://1.1.1.1', $entry->getUrl());
286 $this->assertSame('this is my title', $entry->getTitle());
287 $this->assertSame("<h1>Test</h1><p><img src='http://3.3.3.3/nevermind.jpg'/></p>", $entry->getContent());
288 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
289 $this->assertSame('text/html', $entry->getMimetype());
290 $this->assertSame('fr', $entry->getLanguage());
291 $this->assertSame('200', $entry->getHttpStatus());
292 $this->assertSame(0.0, $entry->getReadingTime());
293 $this->assertSame('1.1.1.1', $entry->getDomainName());
294 }
295
0d349ea6
JB
296 public function testWithContentAndBadLanguage()
297 {
298 $tagger = $this->getTaggerMock();
299 $tagger->expects($this->once())
300 ->method('tag');
301
5661e8d4 302 $validator = $this->getValidator(false);
a05b6115 303 $validator->expects($this->once())
0d349ea6 304 ->method('validate')
a05b6115 305 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
0d349ea6
JB
306
307 $graby = $this->getMockBuilder('Graby\Graby')
308 ->setMethods(['fetchContent'])
309 ->disableOriginalConstructor()
310 ->getMock();
311
312 $graby->expects($this->any())
313 ->method('fetchContent')
314 ->willReturn([
315 'html' => str_repeat('this is my content', 325),
316 'title' => 'this is my title',
317 'url' => 'http://1.1.1.1',
0d349ea6
JB
318 'language' => 'dontexist',
319 'status' => '200',
5f084262 320 'headers' => [
321 'content-type' => 'text/html',
322 ],
0d349ea6
JB
323 ]);
324
709e21a3 325 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
326 $entry = new Entry(new User());
327 $proxy->updateEntry($entry, 'http://0.0.0.0');
328
f808b016
JB
329 $this->assertSame('http://1.1.1.1', $entry->getUrl());
330 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 331 $this->assertContains('content', $entry->getContent());
f808b016 332 $this->assertSame('text/html', $entry->getMimetype());
41d45c61 333 $this->assertNull($entry->getLanguage());
f808b016
JB
334 $this->assertSame('200', $entry->getHttpStatus());
335 $this->assertSame(4.0, $entry->getReadingTime());
336 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
337 }
338
339 public function testWithContentAndBadOgImage()
340 {
341 $tagger = $this->getTaggerMock();
342 $tagger->expects($this->once())
343 ->method('tag');
344
5661e8d4 345 $validator = $this->getValidator(false);
0d349ea6
JB
346 $validator->expects($this->exactly(2))
347 ->method('validate')
348 ->will($this->onConsecutiveCalls(
349 new ConstraintViolationList(),
350 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
351 ));
352
353 $graby = $this->getMockBuilder('Graby\Graby')
354 ->setMethods(['fetchContent'])
355 ->disableOriginalConstructor()
356 ->getMock();
357
358 $graby->expects($this->any())
359 ->method('fetchContent')
360 ->willReturn([
361 'html' => str_repeat('this is my content', 325),
362 'title' => 'this is my title',
363 'url' => 'http://1.1.1.1',
364 'content_type' => 'text/html',
365 'language' => 'fr',
366 'status' => '200',
5f084262 367 'description' => 'OG desc',
368 'image' => 'https://',
369 'headers' => [
370 'content-type' => 'text/html',
0d349ea6
JB
371 ],
372 ]);
373
709e21a3 374 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
375 $entry = new Entry(new User());
376 $proxy->updateEntry($entry, 'http://0.0.0.0');
377
f808b016
JB
378 $this->assertSame('http://1.1.1.1', $entry->getUrl());
379 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 380 $this->assertContains('content', $entry->getContent());
41d45c61 381 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
382 $this->assertSame('text/html', $entry->getMimetype());
383 $this->assertSame('fr', $entry->getLanguage());
384 $this->assertSame('200', $entry->getHttpStatus());
385 $this->assertSame(4.0, $entry->getReadingTime());
386 $this->assertSame('1.1.1.1', $entry->getDomainName());
3d71d403
JB
387 }
388
4d0ec0e7
JB
389 public function testWithForcedContent()
390 {
391 $tagger = $this->getTaggerMock();
392 $tagger->expects($this->once())
393 ->method('tag');
394
709e21a3 395 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
7aba665e
JC
396 $entry = new Entry(new User());
397 $proxy->updateEntry(
398 $entry,
0d6cfb88
JB
399 'http://0.0.0.0',
400 [
401 'html' => str_repeat('this is my content', 325),
402 'title' => 'this is my title',
403 'url' => 'http://1.1.1.1',
0d6cfb88 404 'language' => 'fr',
f0378b4d
JB
405 'date' => '1395635872',
406 'authors' => ['Jeremy', 'Nico', 'Thomas'],
5f084262 407 'headers' => [
408 'cache-control' => 'no-cache',
409 'content-type' => 'text/html',
38a04dee 410 ],
0d6cfb88
JB
411 ]
412 );
4d0ec0e7 413
f808b016
JB
414 $this->assertSame('http://1.1.1.1', $entry->getUrl());
415 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 416 $this->assertContains('content', $entry->getContent());
f808b016
JB
417 $this->assertSame('text/html', $entry->getMimetype());
418 $this->assertSame('fr', $entry->getLanguage());
419 $this->assertSame(4.0, $entry->getReadingTime());
420 $this->assertSame('1.1.1.1', $entry->getDomainName());
421 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
422 $this->assertContains('Jeremy', $entry->getPublishedBy());
423 $this->assertContains('Nico', $entry->getPublishedBy());
424 $this->assertContains('Thomas', $entry->getPublishedBy());
709e21a3 425 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
f0378b4d
JB
426 $this->assertContains('no-cache', $entry->getHeaders());
427 }
428
429 public function testWithForcedContentAndDatetime()
430 {
431 $tagger = $this->getTaggerMock();
432 $tagger->expects($this->once())
433 ->method('tag');
434
d5c2cc54 435 $logHandler = new TestHandler();
6acadf8e 436 $logger = new Logger('test', [$logHandler]);
d5c2cc54 437
709e21a3 438 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e 439 $entry = new Entry(new User());
6acadf8e 440 $proxy->updateEntry(
7aba665e 441 $entry,
6acadf8e 442 'http://1.1.1.1',
f0378b4d
JB
443 [
444 'html' => str_repeat('this is my content', 325),
445 'title' => 'this is my title',
446 'url' => 'http://1.1.1.1',
f0378b4d
JB
447 'language' => 'fr',
448 'date' => '2016-09-08T11:55:58+0200',
5f084262 449 'headers' => [
450 'content-type' => 'text/html',
451 ],
f0378b4d
JB
452 ]
453 );
454
f808b016
JB
455 $this->assertSame('http://1.1.1.1', $entry->getUrl());
456 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 457 $this->assertContains('content', $entry->getContent());
f808b016
JB
458 $this->assertSame('text/html', $entry->getMimetype());
459 $this->assertSame('fr', $entry->getLanguage());
460 $this->assertSame(4.0, $entry->getReadingTime());
461 $this->assertSame('1.1.1.1', $entry->getDomainName());
462 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
463 }
464
465 public function testWithForcedContentAndBadDate()
466 {
467 $tagger = $this->getTaggerMock();
468 $tagger->expects($this->once())
469 ->method('tag');
470
471 $logger = new Logger('foo');
472 $handler = new TestHandler();
473 $logger->pushHandler($handler);
474
709e21a3 475 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e
JC
476 $entry = new Entry(new User());
477 $proxy->updateEntry(
478 $entry,
6acadf8e 479 'http://1.1.1.1',
f0378b4d
JB
480 [
481 'html' => str_repeat('this is my content', 325),
482 'title' => 'this is my title',
483 'url' => 'http://1.1.1.1',
f0378b4d
JB
484 'language' => 'fr',
485 'date' => '01 02 2012',
5f084262 486 'headers' => [
487 'content-type' => 'text/html',
488 ],
f0378b4d
JB
489 ]
490 );
491
f808b016
JB
492 $this->assertSame('http://1.1.1.1', $entry->getUrl());
493 $this->assertSame('this is my title', $entry->getTitle());
edc79ad8 494 $this->assertContains('content', $entry->getContent());
f808b016
JB
495 $this->assertSame('text/html', $entry->getMimetype());
496 $this->assertSame('fr', $entry->getLanguage());
497 $this->assertSame(4.0, $entry->getReadingTime());
498 $this->assertSame('1.1.1.1', $entry->getDomainName());
f0378b4d
JB
499 $this->assertNull($entry->getPublishedAt());
500
501 $records = $handler->getRecords();
502
d99e6423 503 $this->assertCount(3, $records);
f0378b4d 504 $this->assertContains('Error while defining date', $records[0]['message']);
4d0ec0e7
JB
505 }
506
507 public function testTaggerThrowException()
508 {
4d0ec0e7
JB
509 $tagger = $this->getTaggerMock();
510 $tagger->expects($this->once())
511 ->method('tag')
512 ->will($this->throwException(new \Exception()));
513
709e21a3 514 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e 515 $entry = new Entry(new User());
6acadf8e
JB
516 $proxy->updateEntry(
517 $entry,
518 'http://1.1.1.1',
519 [
520 'html' => str_repeat('this is my content', 325),
521 'title' => 'this is my title',
522 'url' => 'http://1.1.1.1',
6acadf8e 523 'language' => 'fr',
5f084262 524 'headers' => [
525 'content-type' => 'text/html',
526 ],
6acadf8e 527 ]
d0e9b3d6 528 );
4d0ec0e7
JB
529
530 $this->assertCount(0, $entry->getTags());
531 }
532
74a75f7d
JB
533 public function dataForCrazyHtml()
534 {
535 return [
536 'script and comment' => [
537 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
38a04dee 538 'lol',
74a75f7d
JB
539 ],
540 'script' => [
541 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
38a04dee 542 'script',
74a75f7d
JB
543 ],
544 ];
545 }
546
547 /**
548 * @dataProvider dataForCrazyHtml
549 */
550 public function testWithCrazyHtmlContent($html, $escapedString)
551 {
552 $tagger = $this->getTaggerMock();
553 $tagger->expects($this->once())
554 ->method('tag');
555
709e21a3 556 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
6acadf8e
JB
557 $entry = new Entry(new User());
558 $proxy->updateEntry(
559 $entry,
74a75f7d
JB
560 'http://1.1.1.1',
561 [
562 'html' => $html,
563 'title' => 'this is my title',
564 'url' => 'http://1.1.1.1',
74a75f7d
JB
565 'language' => 'fr',
566 'status' => '200',
5f084262 567 //'og_title' => 'my OG title',
568 'description' => 'OG desc',
569 'image' => 'http://3.3.3.3/cover.jpg',
570 'headers' => [
571 'content-type' => 'text/html',
74a75f7d
JB
572 ],
573 ]
574 );
575
f808b016
JB
576 $this->assertSame('http://1.1.1.1', $entry->getUrl());
577 $this->assertSame('this is my title', $entry->getTitle());
74a75f7d 578 $this->assertNotContains($escapedString, $entry->getContent());
f808b016
JB
579 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
580 $this->assertSame('text/html', $entry->getMimetype());
581 $this->assertSame('fr', $entry->getLanguage());
582 $this->assertSame('200', $entry->getHttpStatus());
583 $this->assertSame('1.1.1.1', $entry->getDomainName());
74a75f7d
JB
584 }
585
d0ec2ddd
JB
586 public function testWithImageAsContent()
587 {
588 $tagger = $this->getTaggerMock();
589 $tagger->expects($this->once())
590 ->method('tag');
591
592 $graby = $this->getMockBuilder('Graby\Graby')
593 ->setMethods(['fetchContent'])
594 ->disableOriginalConstructor()
595 ->getMock();
596
597 $graby->expects($this->any())
598 ->method('fetchContent')
599 ->willReturn([
600 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
601 'title' => 'this is my title',
602 'url' => 'http://1.1.1.1/image.jpg',
d0ec2ddd 603 'status' => '200',
5f084262 604 'headers' => [
605 'content-type' => 'image/jpeg',
606 ],
d0ec2ddd
JB
607 ]);
608
709e21a3 609 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d0ec2ddd
JB
610 $entry = new Entry(new User());
611 $proxy->updateEntry($entry, 'http://0.0.0.0');
612
c18a2476
JB
613 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
614 $this->assertSame('this is my title', $entry->getTitle());
d0ec2ddd
JB
615 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
616 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
c18a2476
JB
617 $this->assertSame('image/jpeg', $entry->getMimetype());
618 $this->assertSame('200', $entry->getHttpStatus());
619 $this->assertSame('1.1.1.1', $entry->getDomainName());
d0ec2ddd
JB
620 }
621
c01d9532
T
622 public function testWebsiteWithValidUTF8Title_doNothing()
623 {
624 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
625 // See http://graphemica.com for more info about the characters
626 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
627 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
628
629 $tagger = $this->getTaggerMock();
630 $tagger->expects($this->once())
631 ->method('tag');
632
633 $graby = $this->getMockBuilder('Graby\Graby')
634 ->setMethods(['fetchContent'])
635 ->disableOriginalConstructor()
636 ->getMock();
637
638 $graby->expects($this->any())
639 ->method('fetchContent')
640 ->willReturn([
641 'html' => false,
642 'title' => $actualTitle,
643 'url' => '',
644 'content_type' => 'text/html',
645 'language' => '',
646 ]);
647
648 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
649 $entry = new Entry(new User());
650 $proxy->updateEntry($entry, 'http://0.0.0.0');
651
652 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
653 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
654 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
655 }
656
657 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
658 {
659 // See http://graphemica.com for more info about the characters
660 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
661 // The correct UTF-8 € character (U+20AC) is E282AC
662 $actualTitle = $this->hexToStr('61' . '80' . '62');
663
664 $tagger = $this->getTaggerMock();
665 $tagger->expects($this->once())
666 ->method('tag');
667
668 $graby = $this->getMockBuilder('Graby\Graby')
669 ->setMethods(['fetchContent'])
670 ->disableOriginalConstructor()
671 ->getMock();
672
673 $graby->expects($this->any())
674 ->method('fetchContent')
675 ->willReturn([
676 'html' => false,
677 'title' => $actualTitle,
678 'url' => '',
679 'content_type' => 'text/html',
680 'language' => '',
681 ]);
682
683 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
684 $entry = new Entry(new User());
685 $proxy->updateEntry($entry, 'http://0.0.0.0');
686
687 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
688 $expectedTitle = '61' . '62';
689 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
690 }
691
692 public function testPdfWithUTF16BETitle_convertToUTF8()
693 {
694 // See http://graphemica.com for more info about the characters
695 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
696 $actualTitle = $this->hexToStr('D83DDE3B');
697
698 $tagger = $this->getTaggerMock();
699 $tagger->expects($this->once())
700 ->method('tag');
701
702 $graby = $this->getMockBuilder('Graby\Graby')
703 ->setMethods(['fetchContent'])
704 ->disableOriginalConstructor()
705 ->getMock();
706
707 $graby->expects($this->any())
708 ->method('fetchContent')
709 ->willReturn([
710 'html' => false,
711 'title' => $actualTitle,
712 'url' => '',
713 'content_type' => 'application/pdf',
714 'language' => '',
715 ]);
716
717 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
718 $entry = new Entry(new User());
719 $proxy->updateEntry($entry, 'http://0.0.0.0');
720
721 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
722 $expectedTitle = 'F09F98BB';
723 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
724 }
725
726 public function testPdfWithUTF8Title_doNothing()
727 {
728 // See http://graphemica.com for more info about the characters
729 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
730 $actualTitle = $this->hexToStr('F09F98BB');
731
732 $tagger = $this->getTaggerMock();
733 $tagger->expects($this->once())
734 ->method('tag');
735
736 $graby = $this->getMockBuilder('Graby\Graby')
737 ->setMethods(['fetchContent'])
738 ->disableOriginalConstructor()
739 ->getMock();
740
741 $graby->expects($this->any())
742 ->method('fetchContent')
743 ->willReturn([
744 'html' => false,
745 'title' => $actualTitle,
746 'url' => '',
747 'content_type' => 'application/pdf',
748 'language' => '',
749 ]);
750
751 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
752 $entry = new Entry(new User());
753 $proxy->updateEntry($entry, 'http://0.0.0.0');
754
755 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
756 $expectedTitle = 'F09F98BB';
757 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
758 }
759
760 public function testPdfWithWINDOWS1252Title_convertToUTF8()
761 {
762 // See http://graphemica.com for more info about the characters
763 // '€' (80) in hexadecimal and WINDOWS-1252
764 $actualTitle = $this->hexToStr('80');
765
766 $tagger = $this->getTaggerMock();
767 $tagger->expects($this->once())
768 ->method('tag');
769
770 $graby = $this->getMockBuilder('Graby\Graby')
771 ->setMethods(['fetchContent'])
772 ->disableOriginalConstructor()
773 ->getMock();
774
775 $graby->expects($this->any())
776 ->method('fetchContent')
777 ->willReturn([
778 'html' => false,
779 'title' => $actualTitle,
780 'url' => '',
781 'content_type' => 'application/pdf',
782 'language' => '',
783 ]);
784
785 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
786 $entry = new Entry(new User());
787 $proxy->updateEntry($entry, 'http://0.0.0.0');
788
789 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
790 $expectedTitle = 'E282AC';
791 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
792 }
793
794 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
795 {
796 // See http://graphemica.com for more info about the characters
797 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
798 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
799 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
800
801 $tagger = $this->getTaggerMock();
802 $tagger->expects($this->once())
803 ->method('tag');
804
805 $graby = $this->getMockBuilder('Graby\Graby')
806 ->setMethods(['fetchContent'])
807 ->disableOriginalConstructor()
808 ->getMock();
809
810 $graby->expects($this->any())
811 ->method('fetchContent')
812 ->willReturn([
813 'html' => false,
814 'title' => $actualTitle,
815 'url' => '',
816 'content_type' => 'application/pdf',
817 'language' => '',
818 ]);
819
820 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
821 $entry = new Entry(new User());
822 $proxy->updateEntry($entry, 'http://0.0.0.0');
823
824 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
825 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
826 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
827 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
828 }
829
e07fadea
KD
830 /**
831 * Data provider for testWithChangedUrl.
832 *
833 * Arrays contain the following values:
834 * $entry_url
835 * $origin_url
836 * $content_url
837 * $expected_entry_url
838 * $expected_origin_url
839 * $expected_domain
840 */
841 public function dataForChangedUrl()
842 {
843 return [
844 'normal' => [
845 'http://0.0.0.0',
846 null,
847 'http://1.1.1.1',
848 'http://1.1.1.1',
849 'http://0.0.0.0',
850 '1.1.1.1',
851 ],
852 'origin already set' => [
853 'http://0.0.0.0',
854 'http://hello',
855 'http://1.1.1.1',
856 'http://1.1.1.1',
857 'http://hello',
858 '1.1.1.1',
859 ],
860 'trailing slash' => [
861 'https://example.com/hello-world',
862 null,
863 'https://example.com/hello-world/',
864 'https://example.com/hello-world/',
865 null,
866 'example.com',
867 ],
e07fadea
KD
868 'query string in fetched content' => [
869 'https://example.org/hello',
870 null,
871 'https://example.org/hello?world=1',
60599679 872 'https://example.org/hello?world=1',
e07fadea 873 'https://example.org/hello',
e07fadea
KD
874 'example.org',
875 ],
876 'fragment in fetched content' => [
877 'https://example.org/hello',
878 null,
879 'https://example.org/hello#world',
880 'https://example.org/hello',
881 null,
882 'example.org',
883 ],
fc040c74
KD
884 'fragment and query string in fetched content' => [
885 'https://example.org/hello',
886 null,
887 'https://example.org/hello?foo#world',
60599679 888 'https://example.org/hello?foo#world',
fc040c74 889 'https://example.org/hello',
fc040c74 890 'example.org',
b49c87ac
KD
891 ],
892 'different path and query string in fetch content' => [
893 'https://example.org/hello',
894 null,
895 'https://example.org/world?foo',
896 'https://example.org/world?foo',
897 'https://example.org/hello',
898 'example.org',
899 ],
900 'feedproxy ignore list test' => [
901 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
902 null,
903 'https://example.org/hello-wallabag',
904 'https://example.org/hello-wallabag',
905 null,
906 'example.org',
907 ],
908 'feedproxy ignore list test with origin url already set' => [
909 'http://feedproxy.google.com/~r/Wallabag/~3/helloworld',
910 'https://example.org/this-is-source',
911 'https://example.org/hello-wallabag',
912 'https://example.org/hello-wallabag',
913 'https://example.org/this-is-source',
914 'example.org',
915 ],
916 'lemonde ignore pattern test' => [
917 'http://www.lemonde.fr/tiny/url',
918 null,
919 'http://example.com/hello-world',
920 'http://example.com/hello-world',
921 null,
922 'example.com',
923 ],
e07fadea
KD
924 ];
925 }
926
927 /**
928 * @dataProvider dataForChangedUrl
929 */
930 public function testWithChangedUrl($entry_url, $origin_url, $content_url, $expected_entry_url, $expected_origin_url, $expected_domain)
931 {
932 $tagger = $this->getTaggerMock();
933 $tagger->expects($this->once())
934 ->method('tag');
935
936 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
937 $entry = new Entry(new User());
938 $entry->setOriginUrl($origin_url);
939 $proxy->updateEntry(
940 $entry,
941 $entry_url,
942 [
943 'html' => false,
944 'title' => '',
945 'url' => $content_url,
946 'content_type' => '',
947 'language' => '',
948 ],
949 true
950 );
951
952 $this->assertSame($expected_entry_url, $entry->getUrl());
953 $this->assertSame($expected_domain, $entry->getDomainName());
954 $this->assertSame($expected_origin_url, $entry->getOriginUrl());
955 }
956
c01d9532 957 /**
28cc645b
T
958 * https://stackoverflow.com/a/18506801.
959 *
c01d9532 960 * @param $string
28cc645b 961 *
c01d9532
T
962 * @return string
963 */
28cc645b
T
964 private function strToHex($string)
965 {
c01d9532 966 $hex = '';
28cc645b
T
967 for ($i = 0; $i < \strlen($string); ++$i) {
968 $ord = \ord($string[$i]);
c01d9532 969 $hexCode = dechex($ord);
28cc645b 970 $hex .= substr('0' . $hexCode, -2);
c01d9532 971 }
28cc645b
T
972
973 return strtoupper($hex);
c01d9532
T
974 }
975
976 /**
28cc645b
T
977 * https://stackoverflow.com/a/18506801.
978 *
c01d9532 979 * @param $hex
28cc645b 980 *
c01d9532
T
981 * @return string
982 */
28cc645b
T
983 private function hexToStr($hex)
984 {
985 $string = '';
986 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
987 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
c01d9532 988 }
28cc645b 989
c01d9532
T
990 return $string;
991 }
992
f530f7f5
KG
993 private function getTaggerMock()
994 {
6bc6fb1f 995 return $this->getMockBuilder(RuleBasedTagger::class)
4094ea47 996 ->setMethods(['tag'])
f530f7f5
KG
997 ->disableOriginalConstructor()
998 ->getMock();
999 }
1c9cd2a7 1000
0c5bcd82 1001 private function getLogger()
1c9cd2a7 1002 {
0c5bcd82 1003 return new NullLogger();
1c9cd2a7 1004 }
0d349ea6 1005
5661e8d4 1006 private function getValidator($withDefaultMock = true)
0d349ea6 1007 {
5661e8d4 1008 $mock = $this->getMockBuilder(RecursiveValidator::class)
0d349ea6
JB
1009 ->setMethods(['validate'])
1010 ->disableOriginalConstructor()
1011 ->getMock();
5661e8d4
JB
1012
1013 if ($withDefaultMock) {
1014 $mock->expects($this->any())
1015 ->method('validate')
1016 ->willReturn(new ConstraintViolationList());
1017 }
1018
1019 return $mock;
0d349ea6 1020 }
558d9aab 1021}