]> git.immae.eu Git - github/wallabag/wallabag.git/blame - tests/Wallabag/CoreBundle/Helper/ContentProxyTest.php
Run php-cs-fixer for fixing coding standard issues (on ContentProxyTest)
[github/wallabag/wallabag.git] / tests / Wallabag / CoreBundle / Helper / ContentProxyTest.php
CommitLineData
558d9aab
JB
1<?php
2
a2c1b94e 3namespace Tests\Wallabag\CoreBundle\Helper;
558d9aab 4
f808b016 5use Graby\Graby;
d5c2cc54 6use Monolog\Handler\TestHandler;
f808b016 7use Monolog\Logger;
bd91bd5c 8use PHPUnit\Framework\TestCase;
f808b016
JB
9use Psr\Log\NullLogger;
10use Symfony\Component\Validator\ConstraintViolation;
11use Symfony\Component\Validator\ConstraintViolationList;
12use Symfony\Component\Validator\Validator\RecursiveValidator;
c2656f96 13use Wallabag\CoreBundle\Entity\Entry;
f808b016 14use Wallabag\CoreBundle\Helper\ContentProxy;
6bc6fb1f 15use Wallabag\CoreBundle\Helper\RuleBasedTagger;
f808b016 16use Wallabag\UserBundle\Entity\User;
558d9aab 17
bd91bd5c 18class ContentProxyTest extends TestCase
558d9aab 19{
ac1509a6 20 private $fetchingErrorMessage = 'wallabag can\'t retrieve contents for this article. Please <a href="http://doc.wallabag.org/en/user/errors_during_fetching.html#how-can-i-help-to-fix-that">troubleshoot this issue</a>.';
a2c1b94e 21
4d0ec0e7
JB
22 public function testWithBadUrl()
23 {
24 $tagger = $this->getTaggerMock();
25 $tagger->expects($this->once())
26 ->method('tag');
27
28 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 29 ->setMethods(['fetchContent'])
4d0ec0e7
JB
30 ->disableOriginalConstructor()
31 ->getMock();
32
33 $graby->expects($this->any())
34 ->method('fetchContent')
4094ea47 35 ->willReturn([
4d0ec0e7
JB
36 'html' => false,
37 'title' => '',
38 'url' => '',
39 'content_type' => '',
40 'language' => '',
4094ea47 41 ]);
4d0ec0e7 42
709e21a3 43 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
44 $entry = new Entry(new User());
45 $proxy->updateEntry($entry, 'http://user@:80');
4d0ec0e7 46
f808b016 47 $this->assertSame('http://user@:80', $entry->getUrl());
4d0ec0e7 48 $this->assertEmpty($entry->getTitle());
f808b016 49 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
4d0ec0e7
JB
50 $this->assertEmpty($entry->getPreviewPicture());
51 $this->assertEmpty($entry->getMimetype());
52 $this->assertEmpty($entry->getLanguage());
f808b016 53 $this->assertSame(0.0, $entry->getReadingTime());
709e21a3 54 $this->assertNull($entry->getDomainName());
4d0ec0e7
JB
55 }
56
558d9aab
JB
57 public function testWithEmptyContent()
58 {
f530f7f5
KG
59 $tagger = $this->getTaggerMock();
60 $tagger->expects($this->once())
61 ->method('tag');
62
558d9aab 63 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 64 ->setMethods(['fetchContent'])
558d9aab
JB
65 ->disableOriginalConstructor()
66 ->getMock();
67
68 $graby->expects($this->any())
69 ->method('fetchContent')
4094ea47 70 ->willReturn([
98f0929f
JB
71 'html' => false,
72 'title' => '',
73 'url' => '',
74 'content_type' => '',
75 'language' => '',
4094ea47 76 ]);
558d9aab 77
709e21a3 78 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
79 $entry = new Entry(new User());
80 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 81
f808b016 82 $this->assertSame('http://0.0.0.0', $entry->getUrl());
558d9aab 83 $this->assertEmpty($entry->getTitle());
f808b016 84 $this->assertSame($this->fetchingErrorMessage, $entry->getContent());
558d9aab
JB
85 $this->assertEmpty($entry->getPreviewPicture());
86 $this->assertEmpty($entry->getMimetype());
98f0929f 87 $this->assertEmpty($entry->getLanguage());
f808b016
JB
88 $this->assertSame(0.0, $entry->getReadingTime());
89 $this->assertSame('0.0.0.0', $entry->getDomainName());
558d9aab
JB
90 }
91
92 public function testWithEmptyContentButOG()
93 {
f530f7f5
KG
94 $tagger = $this->getTaggerMock();
95 $tagger->expects($this->once())
96 ->method('tag');
97
558d9aab 98 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 99 ->setMethods(['fetchContent'])
558d9aab
JB
100 ->disableOriginalConstructor()
101 ->getMock();
102
103 $graby->expects($this->any())
104 ->method('fetchContent')
4094ea47 105 ->willReturn([
98f0929f
JB
106 'html' => false,
107 'title' => '',
108 'url' => '',
109 'content_type' => '',
110 'language' => '',
10b35097 111 'status' => '',
4094ea47 112 'open_graph' => [
98f0929f
JB
113 'og_title' => 'my title',
114 'og_description' => 'desc',
4094ea47
JB
115 ],
116 ]);
558d9aab 117
709e21a3 118 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
119 $entry = new Entry(new User());
120 $proxy->updateEntry($entry, 'http://domain.io');
558d9aab 121
f808b016
JB
122 $this->assertSame('http://domain.io', $entry->getUrl());
123 $this->assertSame('my title', $entry->getTitle());
124 $this->assertSame($this->fetchingErrorMessage . '<p><i>But we found a short description: </i></p>desc', $entry->getContent());
558d9aab 125 $this->assertEmpty($entry->getPreviewPicture());
98f0929f 126 $this->assertEmpty($entry->getLanguage());
10b35097 127 $this->assertEmpty($entry->getHttpStatus());
558d9aab 128 $this->assertEmpty($entry->getMimetype());
f808b016
JB
129 $this->assertSame(0.0, $entry->getReadingTime());
130 $this->assertSame('domain.io', $entry->getDomainName());
558d9aab
JB
131 }
132
133 public function testWithContent()
134 {
f530f7f5
KG
135 $tagger = $this->getTaggerMock();
136 $tagger->expects($this->once())
137 ->method('tag');
138
558d9aab 139 $graby = $this->getMockBuilder('Graby\Graby')
4094ea47 140 ->setMethods(['fetchContent'])
558d9aab
JB
141 ->disableOriginalConstructor()
142 ->getMock();
143
144 $graby->expects($this->any())
145 ->method('fetchContent')
4094ea47 146 ->willReturn([
da3d4998 147 'html' => str_repeat('this is my content', 325),
558d9aab
JB
148 'title' => 'this is my title',
149 'url' => 'http://1.1.1.1',
150 'content_type' => 'text/html',
98f0929f 151 'language' => 'fr',
10b35097 152 'status' => '200',
4094ea47 153 'open_graph' => [
558d9aab
JB
154 'og_title' => 'my OG title',
155 'og_description' => 'OG desc',
f1e29e69 156 'og_image' => 'http://3.3.3.3/cover.jpg',
4094ea47
JB
157 ],
158 ]);
558d9aab 159
709e21a3 160 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e
JC
161 $entry = new Entry(new User());
162 $proxy->updateEntry($entry, 'http://0.0.0.0');
558d9aab 163
f808b016
JB
164 $this->assertSame('http://1.1.1.1', $entry->getUrl());
165 $this->assertSame('this is my title', $entry->getTitle());
da3d4998 166 $this->assertContains('this is my content', $entry->getContent());
f808b016
JB
167 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
168 $this->assertSame('text/html', $entry->getMimetype());
169 $this->assertSame('fr', $entry->getLanguage());
170 $this->assertSame('200', $entry->getHttpStatus());
171 $this->assertSame(4.0, $entry->getReadingTime());
172 $this->assertSame('1.1.1.1', $entry->getDomainName());
558d9aab 173 }
f530f7f5 174
3d71d403
JB
175 public function testWithContentAndNoOgImage()
176 {
177 $tagger = $this->getTaggerMock();
178 $tagger->expects($this->once())
179 ->method('tag');
180
181 $graby = $this->getMockBuilder('Graby\Graby')
182 ->setMethods(['fetchContent'])
183 ->disableOriginalConstructor()
184 ->getMock();
185
186 $graby->expects($this->any())
187 ->method('fetchContent')
188 ->willReturn([
189 'html' => str_repeat('this is my content', 325),
190 'title' => 'this is my title',
191 'url' => 'http://1.1.1.1',
192 'content_type' => 'text/html',
193 'language' => 'fr',
194 'status' => '200',
195 'open_graph' => [
196 'og_title' => 'my OG title',
197 'og_description' => 'OG desc',
0d349ea6 198 'og_image' => null,
3d71d403
JB
199 ],
200 ]);
201
709e21a3 202 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d5c2cc54
JB
203 $entry = new Entry(new User());
204 $proxy->updateEntry($entry, 'http://0.0.0.0');
3d71d403 205
f808b016
JB
206 $this->assertSame('http://1.1.1.1', $entry->getUrl());
207 $this->assertSame('this is my title', $entry->getTitle());
3d71d403 208 $this->assertContains('this is my content', $entry->getContent());
41d45c61 209 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
210 $this->assertSame('text/html', $entry->getMimetype());
211 $this->assertSame('fr', $entry->getLanguage());
212 $this->assertSame('200', $entry->getHttpStatus());
213 $this->assertSame(4.0, $entry->getReadingTime());
214 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
215 }
216
217 public function testWithContentAndBadLanguage()
218 {
219 $tagger = $this->getTaggerMock();
220 $tagger->expects($this->once())
221 ->method('tag');
222
5661e8d4 223 $validator = $this->getValidator(false);
a05b6115 224 $validator->expects($this->once())
0d349ea6 225 ->method('validate')
a05b6115 226 ->willReturn(new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'language', 'dontexist')]));
0d349ea6
JB
227
228 $graby = $this->getMockBuilder('Graby\Graby')
229 ->setMethods(['fetchContent'])
230 ->disableOriginalConstructor()
231 ->getMock();
232
233 $graby->expects($this->any())
234 ->method('fetchContent')
235 ->willReturn([
236 'html' => str_repeat('this is my content', 325),
237 'title' => 'this is my title',
238 'url' => 'http://1.1.1.1',
239 'content_type' => 'text/html',
240 'language' => 'dontexist',
241 'status' => '200',
242 ]);
243
709e21a3 244 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
245 $entry = new Entry(new User());
246 $proxy->updateEntry($entry, 'http://0.0.0.0');
247
f808b016
JB
248 $this->assertSame('http://1.1.1.1', $entry->getUrl());
249 $this->assertSame('this is my title', $entry->getTitle());
0d349ea6 250 $this->assertContains('this is my content', $entry->getContent());
f808b016 251 $this->assertSame('text/html', $entry->getMimetype());
41d45c61 252 $this->assertNull($entry->getLanguage());
f808b016
JB
253 $this->assertSame('200', $entry->getHttpStatus());
254 $this->assertSame(4.0, $entry->getReadingTime());
255 $this->assertSame('1.1.1.1', $entry->getDomainName());
0d349ea6
JB
256 }
257
258 public function testWithContentAndBadOgImage()
259 {
260 $tagger = $this->getTaggerMock();
261 $tagger->expects($this->once())
262 ->method('tag');
263
5661e8d4 264 $validator = $this->getValidator(false);
0d349ea6
JB
265 $validator->expects($this->exactly(2))
266 ->method('validate')
267 ->will($this->onConsecutiveCalls(
268 new ConstraintViolationList(),
269 new ConstraintViolationList([new ConstraintViolation('oops', 'oops', [], 'oops', 'url', 'https://')])
270 ));
271
272 $graby = $this->getMockBuilder('Graby\Graby')
273 ->setMethods(['fetchContent'])
274 ->disableOriginalConstructor()
275 ->getMock();
276
277 $graby->expects($this->any())
278 ->method('fetchContent')
279 ->willReturn([
280 'html' => str_repeat('this is my content', 325),
281 'title' => 'this is my title',
282 'url' => 'http://1.1.1.1',
283 'content_type' => 'text/html',
284 'language' => 'fr',
285 'status' => '200',
286 'open_graph' => [
287 'og_title' => 'my OG title',
288 'og_description' => 'OG desc',
289 'og_image' => 'https://',
290 ],
291 ]);
292
709e21a3 293 $proxy = new ContentProxy($graby, $tagger, $validator, $this->getLogger(), $this->fetchingErrorMessage);
0d349ea6
JB
294 $entry = new Entry(new User());
295 $proxy->updateEntry($entry, 'http://0.0.0.0');
296
f808b016
JB
297 $this->assertSame('http://1.1.1.1', $entry->getUrl());
298 $this->assertSame('this is my title', $entry->getTitle());
0d349ea6 299 $this->assertContains('this is my content', $entry->getContent());
41d45c61 300 $this->assertNull($entry->getPreviewPicture());
f808b016
JB
301 $this->assertSame('text/html', $entry->getMimetype());
302 $this->assertSame('fr', $entry->getLanguage());
303 $this->assertSame('200', $entry->getHttpStatus());
304 $this->assertSame(4.0, $entry->getReadingTime());
305 $this->assertSame('1.1.1.1', $entry->getDomainName());
3d71d403
JB
306 }
307
4d0ec0e7
JB
308 public function testWithForcedContent()
309 {
310 $tagger = $this->getTaggerMock();
311 $tagger->expects($this->once())
312 ->method('tag');
313
709e21a3 314 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage, true);
7aba665e
JC
315 $entry = new Entry(new User());
316 $proxy->updateEntry(
317 $entry,
0d6cfb88
JB
318 'http://0.0.0.0',
319 [
320 'html' => str_repeat('this is my content', 325),
321 'title' => 'this is my title',
322 'url' => 'http://1.1.1.1',
323 'content_type' => 'text/html',
324 'language' => 'fr',
f0378b4d
JB
325 'date' => '1395635872',
326 'authors' => ['Jeremy', 'Nico', 'Thomas'],
327 'all_headers' => [
328 'Cache-Control' => 'no-cache',
38a04dee 329 ],
0d6cfb88
JB
330 ]
331 );
4d0ec0e7 332
f808b016
JB
333 $this->assertSame('http://1.1.1.1', $entry->getUrl());
334 $this->assertSame('this is my title', $entry->getTitle());
4d0ec0e7 335 $this->assertContains('this is my content', $entry->getContent());
f808b016
JB
336 $this->assertSame('text/html', $entry->getMimetype());
337 $this->assertSame('fr', $entry->getLanguage());
338 $this->assertSame(4.0, $entry->getReadingTime());
339 $this->assertSame('1.1.1.1', $entry->getDomainName());
340 $this->assertSame('24/03/2014', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
341 $this->assertContains('Jeremy', $entry->getPublishedBy());
342 $this->assertContains('Nico', $entry->getPublishedBy());
343 $this->assertContains('Thomas', $entry->getPublishedBy());
709e21a3 344 $this->assertNotNull($entry->getHeaders(), 'Headers are stored, so value is not null');
f0378b4d
JB
345 $this->assertContains('no-cache', $entry->getHeaders());
346 }
347
348 public function testWithForcedContentAndDatetime()
349 {
350 $tagger = $this->getTaggerMock();
351 $tagger->expects($this->once())
352 ->method('tag');
353
d5c2cc54 354 $logHandler = new TestHandler();
6acadf8e 355 $logger = new Logger('test', [$logHandler]);
d5c2cc54 356
709e21a3 357 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e 358 $entry = new Entry(new User());
6acadf8e 359 $proxy->updateEntry(
7aba665e 360 $entry,
6acadf8e 361 'http://1.1.1.1',
f0378b4d
JB
362 [
363 'html' => str_repeat('this is my content', 325),
364 'title' => 'this is my title',
365 'url' => 'http://1.1.1.1',
366 'content_type' => 'text/html',
367 'language' => 'fr',
368 'date' => '2016-09-08T11:55:58+0200',
369 ]
370 );
371
f808b016
JB
372 $this->assertSame('http://1.1.1.1', $entry->getUrl());
373 $this->assertSame('this is my title', $entry->getTitle());
f0378b4d 374 $this->assertContains('this is my content', $entry->getContent());
f808b016
JB
375 $this->assertSame('text/html', $entry->getMimetype());
376 $this->assertSame('fr', $entry->getLanguage());
377 $this->assertSame(4.0, $entry->getReadingTime());
378 $this->assertSame('1.1.1.1', $entry->getDomainName());
379 $this->assertSame('08/09/2016', $entry->getPublishedAt()->format('d/m/Y'));
f0378b4d
JB
380 }
381
382 public function testWithForcedContentAndBadDate()
383 {
384 $tagger = $this->getTaggerMock();
385 $tagger->expects($this->once())
386 ->method('tag');
387
388 $logger = new Logger('foo');
389 $handler = new TestHandler();
390 $logger->pushHandler($handler);
391
709e21a3 392 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $logger, $this->fetchingErrorMessage);
7aba665e
JC
393 $entry = new Entry(new User());
394 $proxy->updateEntry(
395 $entry,
6acadf8e 396 'http://1.1.1.1',
f0378b4d
JB
397 [
398 'html' => str_repeat('this is my content', 325),
399 'title' => 'this is my title',
400 'url' => 'http://1.1.1.1',
401 'content_type' => 'text/html',
402 'language' => 'fr',
403 'date' => '01 02 2012',
404 ]
405 );
406
f808b016
JB
407 $this->assertSame('http://1.1.1.1', $entry->getUrl());
408 $this->assertSame('this is my title', $entry->getTitle());
f0378b4d 409 $this->assertContains('this is my content', $entry->getContent());
f808b016
JB
410 $this->assertSame('text/html', $entry->getMimetype());
411 $this->assertSame('fr', $entry->getLanguage());
412 $this->assertSame(4.0, $entry->getReadingTime());
413 $this->assertSame('1.1.1.1', $entry->getDomainName());
f0378b4d
JB
414 $this->assertNull($entry->getPublishedAt());
415
416 $records = $handler->getRecords();
417
418 $this->assertCount(1, $records);
419 $this->assertContains('Error while defining date', $records[0]['message']);
4d0ec0e7
JB
420 }
421
422 public function testTaggerThrowException()
423 {
4d0ec0e7
JB
424 $tagger = $this->getTaggerMock();
425 $tagger->expects($this->once())
426 ->method('tag')
427 ->will($this->throwException(new \Exception()));
428
709e21a3 429 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
7aba665e 430 $entry = new Entry(new User());
6acadf8e
JB
431 $proxy->updateEntry(
432 $entry,
433 'http://1.1.1.1',
434 [
435 'html' => str_repeat('this is my content', 325),
436 'title' => 'this is my title',
437 'url' => 'http://1.1.1.1',
438 'content_type' => 'text/html',
439 'language' => 'fr',
440 ]
d0e9b3d6 441 );
4d0ec0e7
JB
442
443 $this->assertCount(0, $entry->getTags());
444 }
445
74a75f7d
JB
446 public function dataForCrazyHtml()
447 {
448 return [
449 'script and comment' => [
450 '<strong>Script inside:</strong> <!--[if gte IE 4]><script>alert(\'lol\');</script><![endif]--><br />',
38a04dee 451 'lol',
74a75f7d
JB
452 ],
453 'script' => [
454 '<strong>Script inside:</strong><script>alert(\'lol\');</script>',
38a04dee 455 'script',
74a75f7d
JB
456 ],
457 ];
458 }
459
460 /**
461 * @dataProvider dataForCrazyHtml
462 */
463 public function testWithCrazyHtmlContent($html, $escapedString)
464 {
465 $tagger = $this->getTaggerMock();
466 $tagger->expects($this->once())
467 ->method('tag');
468
709e21a3 469 $proxy = new ContentProxy((new Graby()), $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
6acadf8e
JB
470 $entry = new Entry(new User());
471 $proxy->updateEntry(
472 $entry,
74a75f7d
JB
473 'http://1.1.1.1',
474 [
475 'html' => $html,
476 'title' => 'this is my title',
477 'url' => 'http://1.1.1.1',
478 'content_type' => 'text/html',
479 'language' => 'fr',
480 'status' => '200',
481 'open_graph' => [
482 'og_title' => 'my OG title',
483 'og_description' => 'OG desc',
484 'og_image' => 'http://3.3.3.3/cover.jpg',
485 ],
486 ]
487 );
488
f808b016
JB
489 $this->assertSame('http://1.1.1.1', $entry->getUrl());
490 $this->assertSame('this is my title', $entry->getTitle());
74a75f7d 491 $this->assertNotContains($escapedString, $entry->getContent());
f808b016
JB
492 $this->assertSame('http://3.3.3.3/cover.jpg', $entry->getPreviewPicture());
493 $this->assertSame('text/html', $entry->getMimetype());
494 $this->assertSame('fr', $entry->getLanguage());
495 $this->assertSame('200', $entry->getHttpStatus());
496 $this->assertSame('1.1.1.1', $entry->getDomainName());
74a75f7d
JB
497 }
498
d0ec2ddd
JB
499 public function testWithImageAsContent()
500 {
501 $tagger = $this->getTaggerMock();
502 $tagger->expects($this->once())
503 ->method('tag');
504
505 $graby = $this->getMockBuilder('Graby\Graby')
506 ->setMethods(['fetchContent'])
507 ->disableOriginalConstructor()
508 ->getMock();
509
510 $graby->expects($this->any())
511 ->method('fetchContent')
512 ->willReturn([
513 'html' => '<p><img src="http://1.1.1.1/image.jpg" /></p>',
514 'title' => 'this is my title',
515 'url' => 'http://1.1.1.1/image.jpg',
516 'content_type' => 'image/jpeg',
517 'status' => '200',
518 'open_graph' => [],
519 ]);
520
709e21a3 521 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
d0ec2ddd
JB
522 $entry = new Entry(new User());
523 $proxy->updateEntry($entry, 'http://0.0.0.0');
524
c18a2476
JB
525 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getUrl());
526 $this->assertSame('this is my title', $entry->getTitle());
d0ec2ddd
JB
527 $this->assertContains('http://1.1.1.1/image.jpg', $entry->getContent());
528 $this->assertSame('http://1.1.1.1/image.jpg', $entry->getPreviewPicture());
c18a2476
JB
529 $this->assertSame('image/jpeg', $entry->getMimetype());
530 $this->assertSame('200', $entry->getHttpStatus());
531 $this->assertSame('1.1.1.1', $entry->getDomainName());
d0ec2ddd
JB
532 }
533
c01d9532
T
534 public function testWebsiteWithValidUTF8Title_doNothing()
535 {
536 // You can use https://www.online-toolz.com/tools/text-hex-convertor.php to convert UTF-8 text <=> hex
537 // See http://graphemica.com for more info about the characters
538 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
539 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '7A');
540
541 $tagger = $this->getTaggerMock();
542 $tagger->expects($this->once())
543 ->method('tag');
544
545 $graby = $this->getMockBuilder('Graby\Graby')
546 ->setMethods(['fetchContent'])
547 ->disableOriginalConstructor()
548 ->getMock();
549
550 $graby->expects($this->any())
551 ->method('fetchContent')
552 ->willReturn([
553 'html' => false,
554 'title' => $actualTitle,
555 'url' => '',
556 'content_type' => 'text/html',
557 'language' => '',
558 ]);
559
560 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
561 $entry = new Entry(new User());
562 $proxy->updateEntry($entry, 'http://0.0.0.0');
563
564 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
565 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
566 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
567 }
568
569 public function testWebsiteWithInvalidUTF8Title_removeInvalidCharacter()
570 {
571 // See http://graphemica.com for more info about the characters
572 // 'a€b' (61;80;62) in hexadecimal and WINDOWS-1252 - but 80 is a invalid UTF-8 character.
573 // The correct UTF-8 € character (U+20AC) is E282AC
574 $actualTitle = $this->hexToStr('61' . '80' . '62');
575
576 $tagger = $this->getTaggerMock();
577 $tagger->expects($this->once())
578 ->method('tag');
579
580 $graby = $this->getMockBuilder('Graby\Graby')
581 ->setMethods(['fetchContent'])
582 ->disableOriginalConstructor()
583 ->getMock();
584
585 $graby->expects($this->any())
586 ->method('fetchContent')
587 ->willReturn([
588 'html' => false,
589 'title' => $actualTitle,
590 'url' => '',
591 'content_type' => 'text/html',
592 'language' => '',
593 ]);
594
595 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
596 $entry = new Entry(new User());
597 $proxy->updateEntry($entry, 'http://0.0.0.0');
598
599 // 'ab' (61;62) because all invalid UTF-8 character (like 80) are removed
600 $expectedTitle = '61' . '62';
601 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
602 }
603
604 public function testPdfWithUTF16BETitle_convertToUTF8()
605 {
606 // See http://graphemica.com for more info about the characters
607 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF16BE
608 $actualTitle = $this->hexToStr('D83DDE3B');
609
610 $tagger = $this->getTaggerMock();
611 $tagger->expects($this->once())
612 ->method('tag');
613
614 $graby = $this->getMockBuilder('Graby\Graby')
615 ->setMethods(['fetchContent'])
616 ->disableOriginalConstructor()
617 ->getMock();
618
619 $graby->expects($this->any())
620 ->method('fetchContent')
621 ->willReturn([
622 'html' => false,
623 'title' => $actualTitle,
624 'url' => '',
625 'content_type' => 'application/pdf',
626 'language' => '',
627 ]);
628
629 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
630 $entry = new Entry(new User());
631 $proxy->updateEntry($entry, 'http://0.0.0.0');
632
633 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
634 $expectedTitle = 'F09F98BB';
635 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
636 }
637
638 public function testPdfWithUTF8Title_doNothing()
639 {
640 // See http://graphemica.com for more info about the characters
641 // '😻' (U+1F63B;D83DDE3B) in hexadecimal and as UTF8
642 $actualTitle = $this->hexToStr('F09F98BB');
643
644 $tagger = $this->getTaggerMock();
645 $tagger->expects($this->once())
646 ->method('tag');
647
648 $graby = $this->getMockBuilder('Graby\Graby')
649 ->setMethods(['fetchContent'])
650 ->disableOriginalConstructor()
651 ->getMock();
652
653 $graby->expects($this->any())
654 ->method('fetchContent')
655 ->willReturn([
656 'html' => false,
657 'title' => $actualTitle,
658 'url' => '',
659 'content_type' => 'application/pdf',
660 'language' => '',
661 ]);
662
663 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
664 $entry = new Entry(new User());
665 $proxy->updateEntry($entry, 'http://0.0.0.0');
666
667 // '😻' (U+1F63B or F09F98BB) in hexadecimal and UTF-8
668 $expectedTitle = 'F09F98BB';
669 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
670 }
671
672 public function testPdfWithWINDOWS1252Title_convertToUTF8()
673 {
674 // See http://graphemica.com for more info about the characters
675 // '€' (80) in hexadecimal and WINDOWS-1252
676 $actualTitle = $this->hexToStr('80');
677
678 $tagger = $this->getTaggerMock();
679 $tagger->expects($this->once())
680 ->method('tag');
681
682 $graby = $this->getMockBuilder('Graby\Graby')
683 ->setMethods(['fetchContent'])
684 ->disableOriginalConstructor()
685 ->getMock();
686
687 $graby->expects($this->any())
688 ->method('fetchContent')
689 ->willReturn([
690 'html' => false,
691 'title' => $actualTitle,
692 'url' => '',
693 'content_type' => 'application/pdf',
694 'language' => '',
695 ]);
696
697 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
698 $entry = new Entry(new User());
699 $proxy->updateEntry($entry, 'http://0.0.0.0');
700
701 // '€' (U+20AC or E282AC) in hexadecimal and UTF-8
702 $expectedTitle = 'E282AC';
703 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
704 }
705
706 public function testPdfWithInvalidCharacterInTitle_removeInvalidCharacter()
707 {
708 // See http://graphemica.com for more info about the characters
709 // '😻ℤ�z' (U+1F63B or F09F98BB; U+2124 or E284A4; invalid character 81; U+007A or 7A) in hexadecimal and UTF-8
710 // 0x81 is not a valid character for UTF16, UTF8 and WINDOWS-1252
711 $actualTitle = $this->hexToStr('F09F98BB' . 'E284A4' . '81' . '7A');
712
713 $tagger = $this->getTaggerMock();
714 $tagger->expects($this->once())
715 ->method('tag');
716
717 $graby = $this->getMockBuilder('Graby\Graby')
718 ->setMethods(['fetchContent'])
719 ->disableOriginalConstructor()
720 ->getMock();
721
722 $graby->expects($this->any())
723 ->method('fetchContent')
724 ->willReturn([
725 'html' => false,
726 'title' => $actualTitle,
727 'url' => '',
728 'content_type' => 'application/pdf',
729 'language' => '',
730 ]);
731
732 $proxy = new ContentProxy($graby, $tagger, $this->getValidator(), $this->getLogger(), $this->fetchingErrorMessage);
733 $entry = new Entry(new User());
734 $proxy->updateEntry($entry, 'http://0.0.0.0');
735
736 // '😻ℤz' (U+1F63B or F09F98BB; U+2124 or E284A4; U+007A or 7A) in hexadecimal and UTF-8
737 // the 0x81 (represented by �) is invalid for UTF16, UTF8 and WINDOWS-1252 and is removed
738 $expectedTitle = 'F09F98BB' . 'E284A4' . '7A';
739 $this->assertSame($expectedTitle, $this->strToHex($entry->getTitle()));
740 }
741
742 /**
28cc645b
T
743 * https://stackoverflow.com/a/18506801.
744 *
c01d9532 745 * @param $string
28cc645b 746 *
c01d9532
T
747 * @return string
748 */
28cc645b
T
749 private function strToHex($string)
750 {
c01d9532 751 $hex = '';
28cc645b
T
752 for ($i = 0; $i < \strlen($string); ++$i) {
753 $ord = \ord($string[$i]);
c01d9532 754 $hexCode = dechex($ord);
28cc645b 755 $hex .= substr('0' . $hexCode, -2);
c01d9532 756 }
28cc645b
T
757
758 return strtoupper($hex);
c01d9532
T
759 }
760
761 /**
28cc645b
T
762 * https://stackoverflow.com/a/18506801.
763 *
c01d9532 764 * @param $hex
28cc645b 765 *
c01d9532
T
766 * @return string
767 */
28cc645b
T
768 private function hexToStr($hex)
769 {
770 $string = '';
771 for ($i = 0; $i < \strlen($hex) - 1; $i += 2) {
772 $string .= \chr(hexdec($hex[$i] . $hex[$i + 1]));
c01d9532 773 }
28cc645b 774
c01d9532
T
775 return $string;
776 }
777
f530f7f5
KG
778 private function getTaggerMock()
779 {
6bc6fb1f 780 return $this->getMockBuilder(RuleBasedTagger::class)
4094ea47 781 ->setMethods(['tag'])
f530f7f5
KG
782 ->disableOriginalConstructor()
783 ->getMock();
784 }
1c9cd2a7 785
0c5bcd82 786 private function getLogger()
1c9cd2a7 787 {
0c5bcd82 788 return new NullLogger();
1c9cd2a7 789 }
0d349ea6 790
5661e8d4 791 private function getValidator($withDefaultMock = true)
0d349ea6 792 {
5661e8d4 793 $mock = $this->getMockBuilder(RecursiveValidator::class)
0d349ea6
JB
794 ->setMethods(['validate'])
795 ->disableOriginalConstructor()
796 ->getMock();
5661e8d4
JB
797
798 if ($withDefaultMock) {
799 $mock->expects($this->any())
800 ->method('validate')
801 ->willReturn(new ConstraintViolationList());
802 }
803
804 return $mock;
0d349ea6 805 }
558d9aab 806}